SVD computation subroutine

Next: About this document ... Up: References Previous: Interface to SVD computation
SVD computation subroutine

      subroutine svdcomp(A,S,V,svord,m,n,isw,error)
      implicit none
c
c     mxweep : maximum number of sweeps allowed
c     mxnupd : maximum number of implicit norm updates
c     epsilon : machine precision
c     tol : tolerance used for termination of algorithm
c
      integer mxweep,mxnupd
      real epsilon,tol
      parameter(mxweep=30)
      parameter(mxnupd=10)
      parameter(epsilon=0.5e-6,tol=5.0e-2)
c
c     Error code returned
c
      integer error
c
c     Dimensions of A
c
      integer m,n
c
c     SVD arrays
c     as input A contains the original matrix
c     as output A contains matrix U of SVD
c
      real A(*),S(*),V(*)
cmf$  layout A(:serial),S(:serial),V(:serial)
c
c     Ordering of singular values
c
      integer svord(*)
cmf$  layout svord(:serial)
c
c     CM arrays
c
c     U1,U2 : each one contains half the columns of A and an extra column
c             as a buffer for shifting
c     V1,V2 : each one contains half the columns of V and an extra column
c             as a buffer for shifting
c     alpha,beta : square of the norms of the columns of U1,U2
c		   will also hold sine theta, cosine theta for Jacobi
c		   transformations
c		   at the end of the run they will hold the singular values
c     gamma : inner product of columns in the same processor
c     inprod : normalized inner products used to determine if transformations
c              are applied based on thresholds. Also used to determine
c              termination of algorithm
c     buffer : holds temporary results
c     U1norm : squares of column 2-norms of U1
c     U2norm : squares of column 2-norms of U2
c     sing : stores singular values for transmission to front end and
c            for sorting in descending order
c     order : ordering of singular values
c
      real U1(m,(n+1)/2)
cmf$  layout U1(:serial,:news)
      real U2(m,(n+1)/2)
cmf$  layout U2(:serial,:news)
      real V1(m,(n+1)/2)
cmf$  layout V1(:serial,:news)
      real V2(m,(n+1)/2)
cmf$  layout V2(:serial,:news)
      real alpha((n+1)/2),beta((n+1)/2)
cmf$  layout alpha(:news),beta(:news)
      real gamma((n+1)/2),inprod((n+1)/2)
cmf$  layout gamma(:news),inprod(:news)
      real buffer(m,(n+1)/2)
cmf$  layout buffer(:serial,:news)
      real U1norm((n+1)/2),U2norm((n+1)/2)
cmf$  layout U1norm(:news),U2norm(:news)
      real sing(n)
cmf$  layout sing(:news)
      integer order(n)
cmf$  layout order(:news)
c
c     lc : last column in U1
c
      integer lc
c
c     sflag : true if stopping criterion has been met
c
      logical sflag
c
c     Temporary variables used in conjunction with the computation
c     of smax and smin
c
      real max1,max2,min1,min2
c
c     Loop counters
c     isw : sweep counter
c     irot : rotation counter
c     nupd : number of times implicit norm update has been used
c
      integer i,j,k,isw,irot,nupd
c
c     Timing variables
c
      integer itimer1,itimer2
      real flops
      real nsum,nsub,nmul,ndiv,nsqrt,nabs,nsign
      real wsum,wsub,wmul,wdiv,wsqrt,wabs,wsign
      data wsum/1.0/
      data wsub/1.0/
      data wmul/1.0/
      data wdiv/1.0/
      data wsqrt/1.0/
      data wabs/1.0/
      data wsign/1.0/
c
      data itimer1/1/
      data itimer2/2/
c
c     Initialize U1,U2 <- A
c
      lc = (n+1)/2
      do i=1,m-1,2
         call cmf_to_cm(U1(i,:),A(n*(i-1)+1),lc)
	 call cmf_to_cm(U2(i,:),A(n*(i-1)+lc+1),n-lc)
         call cmf_to_cm(U1(i+1,:),A(n*i+1),lc)
	 call cmf_to_cm(U2(i+1,:),A(n*i+lc+1),n-lc)
      enddo
      if (2*(m/2) .ne. m) then
	 call cmf_to_cm(U1(m,:),A(n*(m-1)+1),lc)
	 call cmf_to_cm(U2(m,:),A(n*(m-1)+lc+1),n-lc)
      endif
      if (lc .ne. n-lc) U2(:,lc) = 0.0
c
c     Initialize V1,V2 <- I
c
      V1 = 0.0
      V2 = 0.0
      forall (i=1:lc) V1(i,i) = 1.0
      forall (i=1:n-lc) V2(i+lc,i) = 1.0
c
c     Initialize looping variables
c
      isw = 0
      sflag = .false.
      nupd = mxnupd
c
c     Start timer 1
c
      call CM_timer_start(itimer1)
c
c     Loop over all sweeps
c
      do while (isw .lt. mxweep .and. .not. sflag)
	 isw = isw + 1
	 sflag = .true.
c
c     Loop over all rotations
c
	 do irot=1,2*lc-1
c
c	 compute inner products
c
	    nupd = nupd + 1
            if (nupd .ge. mxnupd) then
	       U1norm = 0.0
	       U2norm = 0.0
	       do i=1,m-1,2
		  U1norm = U1norm + U1(i,:) * U1(i,:)
		  U1norm = U1norm + U1(i+1,:) * U1(i+1,:)
		  U2norm = U2norm + U2(i,:) * U2(i,:)
		  U2norm = U2norm + U2(i+1,:) * U2(i+1,:)
	       enddo
	       if (2*(m/2) .ne. m) then
		  U1norm = U1norm + U1(m,:) * U1(m,:)
		  U2norm = U2norm + U2(m,:) * U2(m,:)
	       endif
	       nupd = 0
	    endif
	    gamma = 0.0
	    do i=1,m-1,2
	       gamma = gamma + U1(i,:) * U2(i,:)
	       gamma = gamma + U1(i+1,:) * U2(i+1,:)
	    enddo
	    if (2*(m/2) .ne. m) gamma = gamma + U1(m,:) * U2(m,:)
c
c	If any of the normalized inner products is greater than
c	the tolerance we must execute the next sweep after this one
c
	    where (U1norm .eq. 0.0) gamma = 0.0
	    where (U2norm .eq. 0.0) gamma = 0.0
	    where (gamma .eq. 0.0)
	       inprod = 0.0
	    elsewhere
	       inprod = abs(gamma / sqrt(U1norm * U2norm))
	    endwhere
	    if (any(inprod .gt. tol)) sflag = .false.
c
c	In this set of calculations alpha and beta are used for
c	temporary storage and at the end they will have the
c	contents of the Jacobi rotation matrices
c	gamma is also used for temporary storage
c
	    where (inprod .gt. epsilon)
c
c	eta = (U2norm - U1norm) / (2.0 * gamma) stored in alpha
c
	       alpha = (U2norm - U1norm) / (2.0 * gamma)
c
c	t = sign(eta)/(abs(eta) + sqrt(1.0+eta^2)) stored in beta
c
	       beta = sign(1.0,alpha) / (abs(alpha) +
     *			sqrt(1.0+alpha*alpha))
c
c       Update norms
c
	       U1norm = U1norm - beta * gamma
	       U2norm = U2norm + beta * gamma
c
c	cos(theta) = 1.0/sqrt(1.0*t^2) stored in alpha
c
	       alpha = 1.0/sqrt(1.0+beta*beta)
c
c	sin(theta) = t * cos(theta) stored in beta
c
	       beta = beta * alpha
	    elsewhere
	       alpha = 1.0
	       beta = 0.0
	    endwhere
c
c	Apply one sided Jacobi transformations, use gamma variable as buffer
c
	    do i=1,m-1,2
	       gamma = U1(i,:) * alpha - U2(i,:) * beta
	       U2(i,:) = U1(i,:) * beta + U2(i,:) * alpha
	       U1(i,:) = gamma
	       gamma = U1(i+1,:) * alpha - U2(i+1,:) * beta
	       U2(i+1,:) = U1(i+1,:) * beta + U2(i+1,:) * alpha
	       U1(i+1,:) = gamma
	       gamma = V1(i,:) * alpha - V2(i,:) * beta
	       V2(i,:) = V1(i,:) * beta + V2(i,:) * alpha
	       V1(i,:) = gamma
	       gamma = V1(i+1,:) * alpha - V2(i+1,:) * beta
	       V2(i+1,:) = V1(i+1,:) * beta + V2(i+1,:) * alpha
	       V1(i+1,:) = gamma
	    enddo
	    if (2*(m/2) .ne. m) then
	       gamma = U1(m,:) * alpha - U2(m,:) * beta
	       U2(m,:) = U1(m,:) * beta + U2(m,:) * alpha
	       U1(m,:) = gamma
	       gamma = V1(m,:) * alpha - V2(m,:) * beta
	       V2(m,:) = V1(m,:) * beta + V2(m,:) * alpha
	       V1(m,:) = gamma
	    endif
c
c	Shifting when lc > 2
c
c	    call CM_timer_start(itimer2)
	    if (lc .gt. 2) then
c
c	Shift the columns of U1,U2,V1,V2 to obtain next 
c       combination of columns in sweep
c
	       buffer = cshift(U2,dim=2,shift=-1)
	       buffer(:,1) = U1(:,1)
	       buffer(:,lc) = U1(:,lc)
	       U1 = cshift(U1,dim=2,shift=-1)
	       U2 = cshift(U2,dim=2,shift=1)
	       U1(:,1:2) = buffer(:,1:2)
	       U2(:,lc) = buffer(:,lc)
c
	       buffer = cshift(V2,dim=2,shift=-1)
	       buffer(:,1) = V1(:,1)
	       buffer(:,lc) = V1(:,lc)
	       V1 = cshift(V1,dim=2,shift=-1)
	       V2 = cshift(V2,dim=2,shift=1)
	       V1(:,1:2) = buffer(:,1:2)
	       V2(:,lc) = buffer(:,lc)

c        Shift squares of column norms U1norm, U2norm

gamma = cshift(U2norm,dim=1,shift=-1)
	       gamma(1) = U1norm(1)
	       gamma(lc) = U1norm(lc)
	       U1norm = cshift(U1norm,dim=1,shift=-1)
	       U2norm = cshift(U2norm,dim=1,shift=1)
	       U1norm(1:2) = gamma(1:2)
	       U2norm(lc) = gamma(lc)
	    elseif (lc .eq. 2) then
c
c	Shift when lc = 2
c
	       buffer = cshift(U2,dim=2,shift=-1)
	       U2(:,2) = U1(:,2)
	       U1(:,2) = buffer(:,2)
	       U2(:,1) = buffer(:,1)
c
	       buffer = cshift(V2,dim=2,shift=-1)
	       V2(:,2) = V1(:,2)
	       V1(:,2) = buffer(:,2)
	       V2(:,1) = buffer(:,1)
c
	       gamma = cshift(U2norm,dim=1,shift=-1)
	       U2norm(2) = U1norm(2)
	       U1norm(2) = gamma(2)
	       U2norm(1) = gamma(1)
	    endif
c	    flops = 0.0
c	    Call MY_stop_timer('shift '//char(0),itimer2,0,flops)
	 enddo
      enddo
c
c   Timing information
c
      nsum = wsum * float(m*2*lc + (m+3)*lc + 2*(m-1)*lc/mxnupd)
      nsub = wsub * float(m*2*lc + 4*lc)
      nmul = wmul * float(m*lc*(9+2/mxnupd) + 7*lc)
      ndiv = wdiv * float(4*lc)
      nsqrt = wsqrt * float(3*lc)
      nabs = wabs * float(2*lc)
      nsign = wsign * float(lc)
      flops = float(isw * (2*lc-1)) *
     *			(nsum+nsub+nmul+ndiv+nsqrt+nabs+nsign)
      call MY_stop_timer('main loop '//char(0),itimer1,0,flops)
      if (sflag) then
	 error = 0
      else
	 error = 1
      endif
c
c	Compute singular values and U
c

buffer = U1 * U1
      alpha = sqrt(sum(buffer,dim=1))
      buffer = spread(alpha,dim=1,ncopies=m)
      where (buffer .ne. 0.0) U1 = U1 / buffer
c
      buffer = U2 * U2
      beta = sqrt(sum(buffer,dim=1))
      buffer = spread(beta,dim=1,ncopies=m)
      where (buffer .ne. 0.0) U2 = U2 / buffer
c
c	Set output arrays A,S,V to U,SIGMA,V
c
      do i=1,m
         call cmf_from_cm(A(n*(i-1)+1),U1(i,:),lc)
	 call cmf_from_cm(A(n*(i-1)+lc+1),U2(i,:),n-lc)
      enddo
c
      sing(1:lc) = alpha
      sing(lc+1:n) = beta(1:n-lc)
      call cmf_from_cm(S,sing,n)
c
      do i=1,n
         call cmf_from_cm(V(n*(i-1)+1),V1(i,:),lc)
	 call cmf_from_cm(V(n*(i-1)+lc+1),V2(i,:),n-lc)
      enddo
c
c     Generate singular value ordering vector svord
c
      call cmf_order(order,sing,1,.true.)
      do i=1,n
	 svord(n+1-order(i)) = i
      enddo
      return
      end
Next: About this document ... Up: References Previous: Interface to SVD computation
Stanford Exploration Project
12/18/1997