# matrix multiply and its adjoint # subroutine matmul( adj, bb, nx,x, ny,y) integer ix, iy, adj, nx, ny real bb(ny,nx), x(nx), y(ny) call adjnull( adj, 0, x,nx, y,ny) do ix= 1, nx { do iy= 1, ny { if( adj == 0 ) y(iy) = y(iy) + bb(iy,ix) * x(ix) else x(ix) = x(ix) + bb(iy,ix) * y(iy) }} return; end
I have a routine to set a clip level and clip the residual.
subroutine hclip( nr, kwant, rr, cr) integer i, iter, nr, kwant real rr(nr), cr(nr), huber, cutoff common huber, cutoff, iter #if( iter < 10) call quantabs( kwant,nr,rr, cutoff) do i= 1, nr if( rr(i) > cutoff ) { cr(i) = cutoff } else if( rr(i) < -cutoff ) { cr(i) = -cutoff } else { cr(i) = rr(i) } return; end
From the equations defining the plane search,
we recognize the general steps required to convert the
usual
conjugate-direction solver to
a robust one using Huber functions.
First, the gradient
suggests a direction
to move the model.
Moving in this direction would cause a residual change of
.
is the residual change on the previous iteration.
So far, everything is like the usual
conjugate-direction method except that the residual
is clipped.
# Conjugate direction descent, minimize SUM Huber(rr(i)) # nx # rr(i) = sum aaa(i,j) * x(j) - yy(i) # j=1 subroutine cgmeth( nx,x, nr,yy,rr, aaa, niter, kwant) integer i, iter, nx, nr, niter, kwant real x(nx), yy(nr), rr(nr), aaa(nr,nx), cutoff, huber common huber, cutoff, iter temporary real dx(nx), sx(nx), dr(nr), sr(nr),cr(nr) do i= 1, nx x(i) = 0. do i= 1, nr rr(i) = - yy(i) do iter= 0, niter { call hclip( nr, kwant, rr, cr) call matmul( 1, aaa, nx,dx, nr,cr) # dx = A' clip(r) call matmul( 0, aaa, nx,dx, nr,dr) # dr = A dx call cdhuber( iter, nx, x,dx,sx, nr,rr,dr,sr,cr) # x=x+s; rr=rr+ss } return; end
The routine work that needs to be done inside
each step of any Huberized conjugate direction method
is to compute and
and apply them.
This is nothing but our traditional
cgplus()
subroutine
with the new equation (8).
Starting from
the old subroutine cgplus()
we make a new subroutine cdhuber()
modifying the old in two places:
(1) use the clipped residual to determine the distance,
and
(2)
omit clipped terms from entering the matrix
multiplying the vector of
.
# A step of conjugate-gradient descent. # subroutine cdhuber( iter, n, x, g, s, m, rr, gg, ss, cc) integer i, iter, n, m real x(n), rr(m), g(n), gg(m), s(n), ss(m), cc(m), huber, cutoff double precision wdot, ddot, sds, gdg, gds, determ, gdr, sdr, alfa, beta common huber, cutoff # for printouts temporary real ww(m) huber = 0. do i=1,m if( rr(i)==cc(i)) { ww(i) = 1.; huber= huber + rr(i)*rr(i)/2. } else { ww(i) = 0.; huber= huber + rr(i)*cc(i) - cc(i)*cc(i)/2 } if( iter < 1 ) { # steepest descent on first iteration. do i= 1, n s(i) = 0. do i= 1, m ss(i) = 0. if( wdot(m,ww,gg,gg)==0 ) call erexit('cdhuber: first grad vanishes ') alfa = - ddot(m,gg,cc) / wdot(m,ww,gg,gg) beta = 0. } else { # search plane by solving 2-by-2 gdg = wdot(m,ww,gg,gg) # G . (R + G*alfa + S*beta) = 0 sds = wdot(m,ww,ss,ss) # S . (R + G*alfa + S*beta) = 0 gds = wdot(m,ww,gg,ss) if ( gdg == 0.) {call seperr('cdhuber: grad vanishes'); return} else if( sds == 0.) { alfa= - ddot(m,gg,cc) / wdot(m,ww,gg,gg); beta=0.} else { determ = gdg * sds * dmax1( 1.d0 - (gds/gdg)*(gds/sds), 1.0d-12 ) gdr = - ddot(m,gg,cc) sdr = - ddot(m,ss,cc) alfa = ( sds * gdr - gds * sdr ) / determ beta = (-gds * gdr + gdg * sdr ) / determ } } do i= 1, n # s = model step s(i) = alfa * g(i) + beta * s(i) do i= 1, m # ss = conjugate ss(i) = alfa * gg(i) + beta * ss(i) do i= 1, n # update solution x(i) = x(i) + s(i) do i= 1, m # update residual rr(i) = rr(i) + ss(i) return; enddouble precision function wdot( n, w, x, y ) integer i, n; real w(n), x(n), y(n); double precision val val = 0.; do i=1,n { val = val + w(i) * x(i) * y(i) } wdot = val; return; end