* From arpa!sol-michael.stanford.edu!mike 5 May 89 23:53:00 PDT
SUBROUTINE LSQR ( M, N, APROD, DAMP,
$ LENIW, LENRW, IW, RW,
$ U, V, W, X, SE,
$ ATOL, BTOL, CONLIM, ITNLIM, NOUT,
$ ISTOP, ITN, ANORM, ACOND, RNORM, ARNORM, XNORM)
EXTERNAL APROD
INTEGER M, N, LENIW, LENRW, ITNLIM, NOUT, ISTOP, ITN
INTEGER IW(LENIW)
DOUBLE PRECISION RW(LENRW), U(M), V(N), W(N), X(N), SE(N),
$ ATOL, BTOL, CONLIM, DAMP,
$ ANORM, ACOND, RNORM, ARNORM, XNORM
*-----------------------------------------------------------------------
*
* LSQR finds a solution x to the following problems:
*
* 1. Unsymmetric equations -- solve A*x = b
*
* 2. Linear least squares -- solve A*x = b
* in the least-squares sense
*
* 3. Damped least squares -- solve ( A )*x = ( b )
* ( damp*I ) ( 0 )
* in the least-squares sense
*
* where A is a matrix with m rows and n columns, b is an
* m-vector, and damp is a scalar. (All quantities are real.)
* The matrix A is intended to be large and sparse. It is accessed
* by means of subroutine calls of the form
*
* CALL APROD ( mode, m, n, x, y, LENIW, LENRW, IW, RW )
*
* which must perform the following functions:
*
* If MODE = 1, compute y = y + A*x.
* If MODE = 2, compute x = x + A(transpose)*y.
*
* The vectors x and y are input parameters in both cases.
* If mode = 1, y should be altered without changing x.
* If mode = 2, x should be altered without changing y.
* The parameters LENIW, LENRW, IW, RW may be used for workspace
* as described below.
*
* The rhs vector b is input via U, and subsequently overwritten.
*
*
* Note: LSQR uses an iterative method to approximate the solution.
* The number of iterations required to reach a certain accuracy
* depends strongly on the scaling of the problem. Poor scaling of
* the rows or columns of A should therefore be avoided where
* possible.
*
* For example, in problem 1 the solution is unaltered by
* row-scaling. If a row of A is very small or large compared to
* the other rows of A, the corresponding row of ( A b ) should be
* scaled up or down.
*
* In problems 1 and 2, the solution x is easily recovered
* following column-scaling. Unless better information is known,
* the nonzero columns of A should be scaled so that they all have
* the same Euclidean norm (e.g., 1.0).
*
* In problem 3, there is no freedom to re-scale if damp is
* nonzero. However, the value of damp should be assigned only
* after attention has been paid to the scaling of A.
*
* The parameter damp is intended to help regularize
* ill-conditioned systems, by preventing the true solution from
* being very large. Another aid to regularization is provided by
* the parameter ACOND, which may be used to terminate iterations
* before the computed solution becomes very large.
*
*
* Notation
* --------
*
* The following quantities are used in discussing the subroutine
* parameters:
*
* Abar = ( A ), bbar = ( b )
* ( damp*I ) ( 0 )
*
* r = b - A*x, rbar = bbar - Abar*x
*
* rnorm = sqrt( norm(r)**2 + damp**2 * norm(x)**2 )
* = norm( rbar )
*
* RELPR = the relative precision of floating-point arithmetic
* on the machine being used. For example, on the IBM 370,
* RELPR is about 1.0E-6 and 1.0D-16 in single and double
* precision respectively.
*
* LSQR minimizes the function rnorm with respect to x.
*
*
* Parameters
* ----------
*
* M input m, the number of rows in A.
*
* N input n, the number of columns in A.
*
* APROD external See above.
*
* DAMP input The damping parameter for problem 3 above.
* (DAMP should be 0.0 for problems 1 and 2.)
* If the system A*x = b is incompatible, values
* of DAMP in the range 0 to sqrt(RELPR)*norm(A)
* will probably have a negligible effect.
* Larger values of DAMP will tend to decrease
* the norm of x and reduce the number of
* iterations required by LSQR.
*
* The work per iteration and the storage needed
* by LSQR are the same for all values of DAMP.
*
* LENIW input The length of the workspace array IW.
* LENRW input The length of the workspace array RW.
* IW workspace An integer array of length LENIW.
* RW workspace A real array of length LENRW.
*
* Note: LSQR does not explicitly use the previous four
* parameters, but passes them to subroutine APROD for
* possible use as workspace. If APROD does not need
* IW or RW, the values LENIW = 1 or LENRW = 1 should
* be used, and the actual parameters corresponding to
* IW or RW may be any convenient array of suitable type.
*
* U(M) input The rhs vector b. Beware that U is
* over-written by LSQR.
*
* V(N) workspace
* W(N) workspace
*
* X(N) output Returns the computed solution x.
*
* SE(N) output Returns standard error estimates for the
* components of X. For each i, SE(i) is set
* to the value rnorm * sqrt( sigma(i,i) / T ),
* where sigma(i,i) is an estimate of the i-th
* diagonal of the inverse of Abar(transpose)*Abar
* and T = 1 if m .le. n,
* T = m - n if m .gt. n and damp = 0,
* T = m if damp .ne. 0.
*
* ATOL input An estimate of the relative error in the data
* defining the matrix A. For example,
* if A is accurate to about 6 digits, set
* ATOL = 1.0E-6 .
*
* BTOL input An extimate of the relative error in the data
* defining the rhs vector b. For example,
* if b is accurate to about 6 digits, set
* BTOL = 1.0E-6 .
*
* CONLIM input An upper limit on cond(Abar), the apparent
* condition number of the matrix Abar.
* Iterations will be terminated if a computed
* estimate of cond(Abar) exceeds CONLIM.
* This is intended to prevent certain small or
* zero singular values of A or Abar from
* coming into effect and causing unwanted growth
* in the computed solution.
*
* CONLIM and DAMP may be used separately or
* together to regularize ill-conditioned systems.
*
* Normally, CONLIM should be in the range
* 1000 to 1/RELPR.
* Suggested value:
* CONLIM = 1/(100*RELPR) for compatible systems,
* CONLIM = 1/(10*sqrt(RELPR)) for least squares.
*
* Note: If the user is not concerned about the parameters
* ATOL, BTOL and CONLIM, any or all of them may be set
* to zero. The effect will be the same as the values
* RELPR, RELPR and 1/RELPR respectively.
*
* ITNLIM input An upper limit on the number of iterations.
* Suggested value:
* ITNLIM = n/2 for well-conditioned systems
* with clustered singular values,
* ITNLIM = 4*n otherwise.
*
* NOUT input File number for printed output. If positive,
* a summary will be printed on file NOUT.
*
* ISTOP output An integer giving the reason for termination:
*
* 0 x = 0 is the exact solution.
* No iterations were performed.
*
* 1 The equations A*x = b are probably
* compatible. Norm(A*x - b) is sufficiently
* small, given the values of ATOL and BTOL.
*
* 2 The system A*x = b is probably not
* compatible. A least-squares solution has
* been obtained that is sufficiently accurate,
* given the value of ATOL.
*
* 3 An estimate of cond(Abar) has exceeded
* CONLIM. The system A*x = b appears to be
* ill-conditioned. Otherwise, there could be an
* error in subroutine APROD.
*
* 4 The equations A*x = b are probably
* compatible. Norm(A*x - b) is as small as
* seems reasonable on this machine.
*
* 5 The system A*x = b is probably not
* compatible. A least-squares solution has
* been obtained that is as accurate as seems
* reasonable on this machine.
*
* 6 Cond(Abar) seems to be so large that there is
* no point in doing further iterations,
* given the precision of this machine.
* There could be an error in subroutine APROD.
*
* 7 The iteration limit ITNLIM was reached.
*
* ITN output The number of iterations performed.
*
* ANORM output An estimate of the Frobenius norm of Abar.
* This is the square-root of the sum of squares
* of the elements of Abar.
* If DAMP is small and if the columns of A
* have all been scaled to have length 1.0,
* ANORM should increase to roughly sqrt(n).
* A radically different value for ANORM may
* indicate an error in subroutine APROD (there
* may be an inconsistency between modes 1 and 2).
*
* ACOND output An estimate of cond(Abar), the condition
* number of Abar. A very high value of ACOND
* may again indicate an error in APROD.
*
* RNORM output An estimate of the final value of norm(rbar),
* the function being minimized (see notation
* above). This will be small if A*x = b has
* a solution.
*
* ARNORM output An estimate of the final value of
* norm( Abar(transpose)*rbar ), the norm of
* the residual for the usual normal equations.
* This should be small in all cases. (ARNORM
* will often be smaller than the true value
* computed from the output vector X.)
*
* XNORM output An estimate of the norm of the final
* solution vector X.
*
*
* Subroutines and functions used
* ------------------------------
*
* USER APROD
* BLAS DCOPY, DNRM2, DSCAL (see Lawson et al. below)
*
*
* Precision
* ---------
*
* The number of iterations required by LSQR will usually decrease
* if the computation is performed in higher precision. To convert
* LSQR between single and double precision, change the words
* DOUBLE PRECISION
* DCOPY, DNRM2, DSCAL
* to the appropriate FORTRAN and BLAS equivalents.
* Also change 'D+' or 'E+' in the PARAMETER statement.
*
*
* References
* ----------
*
* C.C. Paige and M.A. Saunders, LSQR: An algorithm for sparse
* linear equations and sparse least squares,
* ACM Transactions on Mathematical Software 8, 1 (March 1982),
* pp. 43-71.
*
* C.C. Paige and M.A. Saunders, Algorithm 583, LSQR: Sparse
* linear equations and least-squares problems,
* ACM Transactions on Mathematical Software 8, 2 (June 1982),
* pp. 195-209.
*
* C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh,
* Basic linear algebra subprograms for Fortran usage,
* ACM Transactions on Mathematical Software 5, 3 (Sept 1979),
* pp. 308-323 and 324-325.
*-----------------------------------------------------------------------
*
*
* LSQR development:
* 22 Feb 1982: LSQR sent to ACM TOMS to become Algorithm 583.
* 15 Sep 1985: Final F66 version. LSQR sent to "misc" in netlib.
* 13 Oct 1987: Bug (Robert Davies, DSIR). Have to delete
* IF ( (ONE + DABS(T)) .LE. ONE ) GO TO 200
* from loop 200. The test was an attempt to reduce
* underflows, but caused W(I) not to be updated.
* 17 Mar 1989: First F77 version.
* 04 May 1989: Bug (David Gay, AT&T). When the second BETA is zero,
* RNORM = 0 and
* TEST2 = ARNORM / (ANORM * RNORM) overflows.
* Fixed by testing for RNORM = 0.
* 05 May 1989: Sent to "misc" in netlib.
*
* Michael A. Saunders (na.saunders @ NA-net.stanford.edu)
* Department of Operations Research
* Stanford University
* Stanford, CA 94305-4022.
*-----------------------------------------------------------------------
* Intrinsics and local variables
INTRINSIC ABS, MOD, SQRT
INTEGER I, NCONV, NSTOP
DOUBLE PRECISION DNRM2
DOUBLE PRECISION ALFA, BBNORM, BETA, BNORM,
$ CS, CS1, CS2, CTOL, DAMPSQ, DDNORM, DELTA,
$ GAMMA, GAMBAR, PHI, PHIBAR, PSI,
$ RES1, RES2, RHO, RHOBAR, RHBAR1, RHBAR2,
$ RHS, RTOL, SN, SN1, SN2,
$ T, TAU, TEST1, TEST2, TEST3,
$ THETA, T1, T2, T3, XXNORM, Z, ZBAR
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
CHARACTER*16 ENTER, EXIT
CHARACTER*60 MSG(0:7)
DATA ENTER /' Enter LSQR. '/,
$ EXIT /' Exit LSQR. '/
DATA MSG
$ / 'The exact solution is X = 0',
$ 'Ax - b is small enough, given ATOL, BTOL',
$ 'The least-squares solution is good enough, given ATOL',
$ 'The estimate of cond(Abar) has exceeded CONLIM',
$ 'Ax - b is small enough for this machine',
$ 'The least-squares solution is good enough for this machine',
$ 'Cond(Abar) seems to be too large for this machine',
$ 'The iteration limit has been reached' /
*-----------------------------------------------------------------------
* Initialize.
IF (NOUT .GT. 0)
$ WRITE(NOUT, 1000) ENTER, M, N, DAMP, ATOL, CONLIM, BTOL, ITNLIM
ITN = 0
ISTOP = 0
NSTOP = 0
CTOL = ZERO
IF (CONLIM .GT. ZERO) CTOL = ONE / CONLIM
ANORM = ZERO
ACOND = ZERO
BBNORM = ZERO
DAMPSQ = DAMP**2
DDNORM = ZERO
RES2 = ZERO
XNORM = ZERO
XXNORM = ZERO
CS2 = - ONE
SN2 = ZERO
Z = ZERO
DO 10 I = 1, N
V(I) = ZERO
X(I) = ZERO
SE(I) = ZERO
10 CONTINUE
* Set up the first vectors U and V for the bidiagonalization.
* These satisfy BETA*U = b, ALFA*V = A(transpose)*U.
ALFA = ZERO
BETA = DNRM2 ( M, U, 1 )
IF (BETA .GT. ZERO) THEN
CALL DSCAL ( M, (ONE / BETA), U, 1 )
CALL APROD ( 2, M, N, V, U, LENIW, LENRW, IW, RW )
ALFA = DNRM2 ( N, V, 1 )
END IF
IF (ALFA .GT. ZERO) THEN
CALL DSCAL ( N, (ONE / ALFA), V, 1 )
CALL DCOPY ( N, V, 1, W, 1 )
END IF
ARNORM = ALFA * BETA
IF (ARNORM .EQ. ZERO) GO TO 800
RHOBAR = ALFA
PHIBAR = BETA
BNORM = BETA
RNORM = BETA
IF (NOUT .GT. 0 ) THEN
IF (DAMPSQ .EQ. ZERO) THEN
WRITE(NOUT, 1200)
ELSE
WRITE(NOUT, 1300)
END IF
TEST1 = ONE
TEST2 = ALFA / BETA
WRITE(NOUT, 1500) ITN, X(1), RNORM, TEST1, TEST2
WRITE(NOUT, 1600)
END IF
* ------------------------------------------------------------------
* Main iteration loop.
* ------------------------------------------------------------------
100 ITN = ITN + 1
* Perform the next step of the bidiagonalization to obtain the
* next BETA, U, ALFA, V. These satisfy the relations
* BETA*U = A*V - ALFA*U,
* ALFA*V = A(transpose)*U - BETA*V.
CALL DSCAL ( M, (- ALFA), U, 1 )
CALL APROD ( 1, M, N, V, U, LENIW, LENRW, IW, RW )
BETA = DNRM2 ( M, U, 1 )
BBNORM = BBNORM + ALFA**2 + BETA**2 + DAMPSQ
IF (BETA .GT. ZERO) THEN
CALL DSCAL ( M, (ONE / BETA), U, 1 )
CALL DSCAL ( N, (- BETA), V, 1 )
CALL APROD ( 2, M, N, V, U, LENIW, LENRW, IW, RW )
ALFA = DNRM2 ( N, V, 1 )
IF (ALFA .GT. ZERO) THEN
CALL DSCAL ( N, (ONE / ALFA), V, 1 )
END IF
END IF
* Use a plane rotation to eliminate the damping parameter.
* This alters the diagonal (RHOBAR) of the lower-bidiagonal matrix.
RHBAR2 = RHOBAR**2 + DAMPSQ
RHBAR1 = SQRT( RHBAR2 )
CS1 = RHOBAR / RHBAR1
SN1 = DAMP / RHBAR1
PSI = SN1 * PHIBAR
PHIBAR = CS1 * PHIBAR
* Use a plane rotation to eliminate the subdiagonal element (BETA)
* of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
RHO = SQRT( RHBAR2 + BETA**2 )
CS = RHBAR1 / RHO
SN = BETA / RHO
THETA = SN * ALFA
RHOBAR = - CS * ALFA
PHI = CS * PHIBAR
PHIBAR = SN * PHIBAR
TAU = SN * PHI
* Update X, W and the standard error estimates.
T1 = PHI / RHO
T2 = - THETA / RHO
T3 = ONE / RHO
DO 200 I = 1, N
T = W(I)
X(I) = T1*T + X(I)
W(I) = T2*T + V(I)
T = (T3*T)**2
SE(I) = T + SE(I)
DDNORM = T + DDNORM
200 CONTINUE
* Use a plane rotation on the right to eliminate the
* super-diagonal element (THETA) of the upper-bidiagonal matrix.
* Then use the result to estimate norm(X).
DELTA = SN2 * RHO
GAMBAR = - CS2 * RHO
RHS = PHI - DELTA * Z
ZBAR = RHS / GAMBAR
XNORM = SQRT( XXNORM + ZBAR **2 )
GAMMA = SQRT( GAMBAR**2 + THETA**2 )
CS2 = GAMBAR / GAMMA
SN2 = THETA / GAMMA
Z = RHS / GAMMA
XXNORM = XXNORM + Z**2
* Test for convergence.
* First, estimate the norm and condition of the matrix Abar,
* and the norms of rbar and Abar(transpose)*rbar.
ANORM = SQRT( BBNORM )
ACOND = ANORM * SQRT( DDNORM )
RES1 = PHIBAR**2
RES2 = RES2 + PSI**2
RNORM = SQRT( RES1 + RES2 )
ARNORM = ALFA * ABS( TAU )
* Now use these norms to estimate certain other quantities,
* some of which will be small near a solution.
TEST1 = RNORM / BNORM
TEST2 = ZERO
IF (RNORM .GT. ZERO) TEST2 = ARNORM / (ANORM * RNORM)
TEST3 = ONE / ACOND
T1 = TEST1 / (ONE + ANORM * XNORM / BNORM)
RTOL = BTOL + ATOL * ANORM * XNORM / BNORM
* The following tests guard against extremely small values of
* ATOL, BTOL or CTOL. (The user may have set any or all of
* the parameters ATOL, BTOL, CONLIM to zero.)
* The effect is equivalent to the normal tests using
* ATOL = RELPR, BTOL = RELPR, CONLIM = 1/RELPR.
T3 = ONE + TEST3
T2 = ONE + TEST2
T1 = ONE + T1
IF (ITN .GE. ITNLIM) ISTOP = 7
IF (T3 .LE. ONE ) ISTOP = 6
IF (T2 .LE. ONE ) ISTOP = 5
IF (T1 .LE. ONE ) ISTOP = 4
* Allow for tolerances set by the user.
IF (TEST3 .LE. CTOL) ISTOP = 3
IF (TEST2 .LE. ATOL) ISTOP = 2
IF (TEST1 .LE. RTOL) ISTOP = 1
* ==================================================================
* See if it is time to print something.
IF (NOUT .LE. 0 ) GO TO 600
IF (N .LE. 40 ) GO TO 400
IF (ITN .LE. 10 ) GO TO 400
IF (ITN .GE. ITNLIM-10) GO TO 400
IF (MOD(ITN,10) .EQ. 0 ) GO TO 400
IF (TEST3 .LE. 2.0*CTOL) GO TO 400
IF (TEST2 .LE. 10.0*ATOL) GO TO 400
IF (TEST1 .LE. 10.0*RTOL) GO TO 400
IF (ISTOP .NE. 0 ) GO TO 400
GO TO 600
* Print a line for this iteration.
400 WRITE(NOUT, 1500) ITN, X(1), RNORM, TEST1, TEST2, ANORM, ACOND
IF (MOD(ITN,10) .EQ. 0) WRITE(NOUT, 1600)
* ==================================================================
* Stop if appropriate.
* The convergence criteria are required to be met on NCONV
* consecutive iterations, where NCONV is set below.
* Suggested value: NCONV = 1, 2 or 3.
600 IF (ISTOP .EQ. 0) NSTOP = 0
IF (ISTOP .EQ. 0) GO TO 100
NCONV = 1
NSTOP = NSTOP + 1
IF (NSTOP .LT. NCONV .AND. ITN .LT. ITNLIM) ISTOP = 0
IF (ISTOP .EQ. 0) GO TO 100
* ------------------------------------------------------------------
* End of iteration loop.
* ------------------------------------------------------------------
* Finish off the standard error estimates.
T = ONE
IF (M .GT. N ) T = M - N
IF (DAMPSQ .GT. ZERO) T = M
T = RNORM / SQRT( T )
DO 700 I = 1, N
SE(I) = T * SQRT( SE(I) )
700 CONTINUE
* Print the stopping condition.
800 IF (NOUT .GT. 0) THEN
WRITE(NOUT, 2000) EXIT, ISTOP, ITN,
$ EXIT, ANORM, ACOND,
$ EXIT, RNORM, ARNORM,
$ EXIT, BNORM, XNORM
WRITE(NOUT, 3000) EXIT, MSG(ISTOP)
END IF
900 RETURN
* ------------------------------------------------------------------
1000 FORMAT(// 1P, A, ' Least-squares solution of A*x = b'
$ / ' The matrix A has', I7, ' rows and', I7, ' columns'
$ / ' The damping parameter is DAMP =', E10.2
$ / ' ATOL =', E10.2, 15X, 'CONLIM =', E10.2
$ / ' BTOL =', E10.2, 15X, 'ITNLIM =', I10)
1200 FORMAT(// ' Itn x(1) Function',
$ ' Compatible LS Norm A Cond A' /)
1300 FORMAT(// ' Itn x(1) Function',
$ ' Compatible LS Norm Abar Cond Abar' /)
1500 FORMAT(1P, I6, 2E17.9, 4E10.2)
1600 FORMAT(1X)
2000 FORMAT(/ 1P, A, 6X, 'ISTOP =', I3, 16X, 'ITN =', I9
$ / A, 6X, 'ANORM =', E13.5, 6X, 'ACOND =', E13.5
$ / A, 6X, 'RNORM =', E13.5, 6X, 'ARNORM =', E13.5,
$ / A, 6X, 'BNORM =', E13.5, 6X, 'XNORM =', E13.5)
3000 FORMAT( A, 6X, A )
* ------------------------------------------------------------------
* End of LSQR
END
C ******************************************************
C
C WARNING. Delete the following imitation BLAS routines
C if a genuine BLAS library is available.
C
C ******************************************************
C
SUBROUTINE DCOPY ( N,X,INCX,Y,INCY )
INTEGER N,INCX,INCY
DOUBLE PRECISION X(N),Y(N)
C
C This may be replaced by the corresponding BLAS routine.
C The following is a simple version for use with LSQR.
C
DO 10 I = 1, N
Y(I) = X(I)
10 CONTINUE
RETURN
C
C END OF DCOPY
END
DOUBLE PRECISION FUNCTION DNRM2 ( N,X,INCX )
INTEGER N,INCX
DOUBLE PRECISION X(N)
C
C This may be replaced by the corresponding BLAS routine.
C The following is a simple version for use with LSQR.
C
INTEGER I
DOUBLE PRECISION D, DSQRT
C
D = 0.0
DO 10 I = 1, N
D = D + X(I)**2
10 CONTINUE
DNRM2 = DSQRT(D)
RETURN
C
C END OF DNRM2
END
SUBROUTINE DSCAL ( N,A,X,INCX )
INTEGER N,INCX
DOUBLE PRECISION A,X(N)
C
C This may be replaced by the corresponding BLAS routine.
C The following is a simple version for use with LSQR.
C
DO 10 I = 1, N
X(I) = A*X(I)
10 CONTINUE
RETURN
C
C END OF DSCAL
END
*********************************************************
*
* These routines are for testing LSQR.
*
*********************************************************
SUBROUTINE APROD ( MODE, M, N, X, Y, LENIW, LENRW, IW, RW )
INTEGER MODE, M, N, LENIW, LENRW
INTEGER IW(LENIW)
DOUBLE PRECISION X(N), Y(M), RW(LENRW)
* ------------------------------------------------------------------
* This is the matrix-vector product routine required by LSQR
* for a test matrix of the form A = HY*D*HZ. The quantities
* defining D, HY, HZ are in the work array RW, followed by a
* work array W. These are passed to APROD1 and APROD2 in order to
* make the code readable.
* ------------------------------------------------------------------
INTEGER LOCD, LOCHY, LOCHZ, LOCW
LOCD = 1
LOCHY = LOCD + N
LOCHZ = LOCHY + M
LOCW = LOCHZ + N
IF (MODE .EQ. 1) CALL APROD1( M, N, X, Y,
$ RW(LOCD), RW(LOCHY), RW(LOCHZ), RW(LOCW) )
IF (MODE .NE. 1) CALL APROD2( M, N, X, Y,
$ RW(LOCD), RW(LOCHY), RW(LOCHZ), RW(LOCW) )
* End of APROD
END
SUBROUTINE APROD1( M, N, X, Y, D, HY, HZ, W )
INTEGER M, N
DOUBLE PRECISION X(N), Y(M), D(N), HY(M), HZ(N), W(M)
* ------------------------------------------------------------------
* APROD1 computes Y = Y + A*X for subroutine APROD,
* where A is a test matrix of the form A = HY*D*HZ,
* and the latter matrices HY, D, HZ are represented by
* input vectors with the same name.
* ------------------------------------------------------------------
INTEGER I
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0 )
CALL HPROD ( N, HZ, X, W )
DO 100 I = 1, N
W(I) = D(I) * W(I)
100 CONTINUE
DO 200 I = N + 1, M
W(I) = ZERO
200 CONTINUE
CALL HPROD ( M, HY, W, W )
DO 600 I = 1, M
Y(I) = Y(I) + W(I)
600 CONTINUE
* End of APROD1
END
SUBROUTINE APROD2( M, N, X, Y, D, HY, HZ, W )
INTEGER M, N
DOUBLE PRECISION X(N), Y(M), D(N), HY(M), HZ(N), W(M)
* ------------------------------------------------------------------
* APROD2 computes X = X + A(T)*Y for subroutine APROD,
* where A is a test matrix of the form A = HY*D*HZ,
* and the latter matrices HY, D, HZ are represented by
* input vectors with the same name.
* ------------------------------------------------------------------
INTEGER I
CALL HPROD ( M, HY, Y, W )
DO 100 I = 1, N
W(I) = D(I)*W(I)
100 CONTINUE
CALL HPROD ( N, HZ, W, W )
DO 600 I = 1, N
X(I) = X(I) + W(I)
600 CONTINUE
* End of APROD2
END
SUBROUTINE HPROD ( N, HZ, X, Y )
INTEGER N
DOUBLE PRECISION HZ(N), X(N), Y(N)
* ------------------------------------------------------------------
* HPROD applies a Householder transformation stored in HZ
* to get Y = ( I - 2*HZ*HZ(transpose) ) * X.
* ------------------------------------------------------------------
INTEGER I
DOUBLE PRECISION S
S = 0.0
DO 100 I = 1, N
S = HZ(I) * X(I) + S
100 CONTINUE
S = S + S
DO 200 I = 1, N
Y(I) = X(I) - S * HZ(I)
200 CONTINUE
* End of HPROD
END
SUBROUTINE LSTP ( M, N, NDUPLC, NPOWER, DAMP, X,
$ B, D, HY, HZ, W, ACOND, RNORM )
INTEGER M, N, MAXMN, NDUPLC, NPOWER
DOUBLE PRECISION DAMP, ACOND, RNORM
DOUBLE PRECISION B(M), X(N), D(N), HY(M), HZ(N), W(M)
* ------------------------------------------------------------------
* LSTP generates a sparse least-squares test problem of the form
*
* ( A )*X = ( B )
* ( DAMP*I ) ( 0 )
*
* having a specified solution X. The matrix A is constructed
* in the form A = HY*D*HZ, where D is an M by N diagonal matrix,
* and HY and HZ are Householder transformations.
*
* The first 6 parameters are input to LSTP. The remainder are
* output. LSTP is intended for use with M .GE. N.
*
*
* Functions and subroutines
*
* TESTPROB APROD1, HPROD
* BLAS DNRM2
* ------------------------------------------------------------------
* Intrinsics and local variables
INTRINSIC COS, SIN, SQRT
INTEGER I, J
DOUBLE PRECISION DNRM2
DOUBLE PRECISION ALFA, BETA, DAMPSQ, FOURPI, T
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
* ------------------------------------------------------------------
* Make two vectors of norm 1.0 for the Householder transformations.
* FOURPI need not be exact.
* ------------------------------------------------------------------
DAMPSQ = DAMP**2
FOURPI = 4.0 * 3.141592
ALFA = FOURPI / M
BETA = FOURPI / N
DO 100 I = 1, M
HY(I) = SIN( I * ALFA )
100 CONTINUE
DO 200 I = 1, N
HZ(I) = COS( I * BETA )
200 CONTINUE
ALFA = DNRM2 ( M, HY, 1 )
BETA = DNRM2 ( N, HZ, 1 )
CALL DSCAL ( M, (- ONE / ALFA), HY, 1 )
CALL DSCAL ( N, (- ONE / BETA), HZ, 1 )
*
* ------------------------------------------------------------------
* Set the diagonal matrix D. These are the singular values of A.
* ------------------------------------------------------------------
DO 300 I = 1, N
J = (I - 1 + NDUPLC) / NDUPLC
T = J * NDUPLC
T = T / N
D(I) = T**NPOWER
300 CONTINUE
ACOND = SQRT( (D(N)**2 + DAMPSQ) / (D(1)**2 + DAMPSQ) )
* ------------------------------------------------------------------
* Compute the residual vector, storing it in B.
* It takes the form HY*( s )
* ( t )
* where s is obtained from D*s = DAMP**2 * HZ * X
* and t can be anything.
* ------------------------------------------------------------------
CALL HPROD ( N, HZ, X, B )
DO 500 I = 1, N
B(I) = DAMPSQ * B(I) / D(I)
500 CONTINUE
T = ONE
DO 600 I = N + 1, M
J = I - N
B(I) = (T * J) / M
T = - T
600 CONTINUE
CALL HPROD ( M, HY, B, B )
* ------------------------------------------------------------------
* Now compute the true B = RESIDUAL + A*X.
* ------------------------------------------------------------------
RNORM = SQRT( DNRM2 ( M, B, 1 )**2
$ + DAMPSQ * DNRM2 ( N, X, 1 )**2 )
CALL APROD1( M, N, X, B, D, HY, HZ, W )
* End of LSTP
END
SUBROUTINE TEST ( M, N, NDUPLC, NPOWER, DAMP )
INTEGER M, N, NDUPLC, NPOWER
DOUBLE PRECISION DAMP
* ------------------------------------------------------------------
* This is an example driver routine for running LSQR.
* It generates a test problem, solves it, and examines the results.
* Note that subroutine APROD must be declared EXTERNAL
* if it is used only in the call to LSQR.
*
*
* Functions and subroutines
*
* TESTPROB APROD
* BLAS DCOPY, DNRM2, DSCAL
* ------------------------------------------------------------------
* Intrinsics and local variables
INTRINSIC MAX, SQRT
EXTERNAL APROD
INTEGER ISTOP, ITNLIM, J, NOUT
DOUBLE PRECISION DNRM2
PARAMETER ( MAXM = 200, MAXN = 100 )
DOUBLE PRECISION B(MAXM), U(MAXM),
$ V(MAXN), W(MAXN), X(MAXN),
$ SE(MAXN), XTRUE(MAXN)
DOUBLE PRECISION ATOL, BTOL, CONLIM,
$ ANORM, ACOND, RNORM, ARNORM,
$ DAMPSQ, ENORM, ETOL, XNORM
PARAMETER ( LENIW = 1, LENRW = 600 )
INTEGER IW(LENIW)
DOUBLE PRECISION RW(LENRW)
INTEGER LOCD, LOCHY, LOCHZ, LOCW, LTOTAL
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0 )
CHARACTER*34 LINE
DATA LINE
$ /'----------------------------------'/
* Set the desired solution XTRUE.
DO 100 J = 1, N
XTRUE(J) = N - J
100 CONTINUE
* Generate the specified test problem.
* The workspace array IW is not needed in this application.
* The workspace array RW is used for the following vectors:
* D(N), HY(M), HZ(N), W(MAX(M,N)).
* The vectors D, HY, HZ will define the test matrix A.
* W is needed for workspace in APROD1 and APROD2.
LOCD = 1
LOCHY = LOCD + N
LOCHZ = LOCHY + M
LOCW = LOCHZ + N
LTOTAL = LOCW + MAX(M,N) - 1
IF (LTOTAL .GT. LENRW) GO TO 900
CALL LSTP ( M, N, NDUPLC, NPOWER, DAMP, XTRUE,
$ B, RW(LOCD), RW(LOCHY), RW(LOCHZ), RW(LOCW),
$ ACOND, RNORM )
* Solve the problem defined by APROD, DAMP and B.
* Copy the rhs vector B into U (LSQR will overwrite U)
* and set the other input parameters for LSQR.
CALL DCOPY ( M, B, 1, U, 1 )
ATOL = 1.0E-10
BTOL = ATOL
CONLIM = 10.0 * ACOND
ITNLIM = M + N + 50
NOUT = 6
WRITE(NOUT, 1000) LINE, LINE,
$ M, N, NDUPLC, NPOWER, DAMP, ACOND, RNORM,
$ LINE, LINE
CALL LSQR ( M, N, APROD, DAMP,
$ LENIW, LENRW, IW, RW,
$ U, V, W, X, SE,
$ ATOL, BTOL, CONLIM, ITNLIM, NOUT,
$ ISTOP, ITN, ANORM, ACOND, RNORM, ARNORM, XNORM )
* Examine the results.
* We print the residual norms RNORM and ARNORM given by LSQR,
* and then compute their true values in terms of the solution X
* obtained by LSQR. At least one of them should be small.
DAMPSQ = DAMP**2
WRITE(NOUT, 2000)
WRITE(NOUT, 2100) RNORM, ARNORM, XNORM
* Compute U = A*X - B.
* This is the negative of the usual residual vector.
* It will be close to zero only if B is a compatible rhs
* and X is close to a solution.
CALL DCOPY ( M, B, 1, U, 1 )
CALL DSCAL ( M, (-ONE), U, 1 )
CALL APROD ( 1, M, N, X, U, LENIW, LENRW, IW, RW )
* Compute V = A(transpose)*U + DAMP**2 * X.
* This will be close to zero in all cases
* if X is close to a solution.
CALL DCOPY ( N, X, 1, V, 1 )
CALL DSCAL ( N, DAMPSQ, V, 1 )
CALL APROD ( 2, M, N, V, U, LENIW, LENRW, IW, RW )
* Compute the norms associated with X, U, V.
XNORM = DNRM2 ( N, X, 1 )
RNORM = SQRT( DNRM2 ( M, U, 1 )**2 + DAMPSQ * XNORM**2 )
ARNORM = DNRM2 ( N, V, 1 )
WRITE(NOUT, 2200) RNORM, ARNORM, XNORM
* Print the solution and standard error estimates from LSQR.
WRITE(NOUT, 2500) (J, X(J), J = 1, N)
WRITE(NOUT, 2600) (J, SE(J), J = 1, N)
* Print a clue about whether the solution looks OK.
DO 500 J = 1, N
W(J) = X(J) - XTRUE(J)
500 CONTINUE
ENORM = DNRM2 ( N, W, 1 ) / (ONE + DNRM2 ( N, XTRUE, 1 ))
ETOL = 1.0D-5
IF (ENORM .LE. ETOL) WRITE(NOUT, 3000) ENORM
IF (ENORM .GT. ETOL) WRITE(NOUT, 3100) ENORM
RETURN
* Not enough workspace.
900 WRITE(NOUT, 9000) LTOTAL
RETURN
1000 FORMAT(1P
$ // 1X, 2A
$ / ' Least-Squares Test Problem P(', 4I5, E12.2, ' )'
$ // ' Condition no. =', E12.4, ' Residual function =', E17.9
$ / 1X, 2A)
2000 FORMAT(
$ // 22X, ' Residual norm Residual norm Solution norm'
$ / 22X, '(Abar X - bbar) (Normal eqns) (X)' /)
2100 FORMAT(1P, ' Estimated by LSQR', 3E17.5)
2200 FORMAT(1P, ' Computed from X ', 3E17.5)
2500 FORMAT(//' Solution X' / 4(I6, G14.6))
2600 FORMAT(/ ' Standard errors SE' / 4(I6, G14.6))
3000 FORMAT(1P / ' LSQR appears to be successful.',
$ ' Relative error in X =', E10.2)
3100 FORMAT(1P / ' LSQR appears to have failed. ',
$ ' Relative error in X =', E10.2)
9000 FORMAT(/ ' XXX Insufficient workspace.',
$ ' The length of RW should be at least', I6)
* End of TEST
END
* -------------
* Main program.
* -------------
DOUBLE PRECISION DAMP1, DAMP2, DAMP3, DAMP4, ZERO
*
ZERO = 0.0
DAMP1 = 0.1
DAMP2 = 0.01
DAMP3 = 0.001
DAMP4 = 0.0001
CALL TEST ( 1, 1, 1, 1, ZERO )
CALL TEST ( 2, 1, 1, 1, ZERO )
CALL TEST ( 40, 40, 4, 4, ZERO )
CALL TEST ( 40, 40, 4, 4, DAMP2 )
CALL TEST ( 80, 40, 4, 4, DAMP2 )
STOP
* End of main program for testing LSQR
END