0001:       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
0002:      $                   WORK, LWORK, INFO )
0003: *
0004: *  -- LAPACK driver routine (version 3.2) --
0005: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
0006: *     November 2006
0007: *
0008: *     .. Scalar Arguments ..
0009:       CHARACTER          JOBU, JOBVT
0010:       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
0011: *     ..
0012: *     .. Array Arguments ..
0013:       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
0014:      $                   VT( LDVT, * ), WORK( * )
0015: *     ..
0016: *
0017: *  Purpose
0018: *  =======
0019: *
0020: *  DGESVD computes the singular value decomposition (SVD) of a real
0021: *  M-by-N matrix A, optionally computing the left and/or right singular
0022: *  vectors. The SVD is written
0023: *
0024: *       A = U * SIGMA * transpose(V)
0025: *
0026: *  where SIGMA is an M-by-N matrix which is zero except for its
0027: *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
0028: *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
0029: *  are the singular values of A; they are real and non-negative, and
0030: *  are returned in descending order.  The first min(m,n) columns of
0031: *  U and V are the left and right singular vectors of A.
0032: *
0033: *  Note that the routine returns V**T, not V.
0034: *
0035: *  Arguments
0036: *  =========
0037: *
0038: *  JOBU    (input) CHARACTER*1
0039: *          Specifies options for computing all or part of the matrix U:
0040: *          = 'A':  all M columns of U are returned in array U:
0041: *          = 'S':  the first min(m,n) columns of U (the left singular
0042: *                  vectors) are returned in the array U;
0043: *          = 'O':  the first min(m,n) columns of U (the left singular
0044: *                  vectors) are overwritten on the array A;
0045: *          = 'N':  no columns of U (no left singular vectors) are
0046: *                  computed.
0047: *
0048: *  JOBVT   (input) CHARACTER*1
0049: *          Specifies options for computing all or part of the matrix
0050: *          V**T:
0051: *          = 'A':  all N rows of V**T are returned in the array VT;
0052: *          = 'S':  the first min(m,n) rows of V**T (the right singular
0053: *                  vectors) are returned in the array VT;
0054: *          = 'O':  the first min(m,n) rows of V**T (the right singular
0055: *                  vectors) are overwritten on the array A;
0056: *          = 'N':  no rows of V**T (no right singular vectors) are
0057: *                  computed.
0058: *
0059: *          JOBVT and JOBU cannot both be 'O'.
0060: *
0061: *  M       (input) INTEGER
0062: *          The number of rows of the input matrix A.  M >= 0.
0063: *
0064: *  N       (input) INTEGER
0065: *          The number of columns of the input matrix A.  N >= 0.
0066: *
0067: *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
0068: *          On entry, the M-by-N matrix A.
0069: *          On exit,
0070: *          if JOBU = 'O',  A is overwritten with the first min(m,n)
0071: *                          columns of U (the left singular vectors,
0072: *                          stored columnwise);
0073: *          if JOBVT = 'O', A is overwritten with the first min(m,n)
0074: *                          rows of V**T (the right singular vectors,
0075: *                          stored rowwise);
0076: *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
0077: *                          are destroyed.
0078: *
0079: *  LDA     (input) INTEGER
0080: *          The leading dimension of the array A.  LDA >= max(1,M).
0081: *
0082: *  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
0083: *          The singular values of A, sorted so that S(i) >= S(i+1).
0084: *
0085: *  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
0086: *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
0087: *          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
0088: *          if JOBU = 'S', U contains the first min(m,n) columns of U
0089: *          (the left singular vectors, stored columnwise);
0090: *          if JOBU = 'N' or 'O', U is not referenced.
0091: *
0092: *  LDU     (input) INTEGER
0093: *          The leading dimension of the array U.  LDU >= 1; if
0094: *          JOBU = 'S' or 'A', LDU >= M.
0095: *
0096: *  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
0097: *          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
0098: *          V**T;
0099: *          if JOBVT = 'S', VT contains the first min(m,n) rows of
0100: *          V**T (the right singular vectors, stored rowwise);
0101: *          if JOBVT = 'N' or 'O', VT is not referenced.
0102: *
0103: *  LDVT    (input) INTEGER
0104: *          The leading dimension of the array VT.  LDVT >= 1; if
0105: *          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
0106: *
0107: *  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
0108: *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
0109: *          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
0110: *          superdiagonal elements of an upper bidiagonal matrix B
0111: *          whose diagonal is in S (not necessarily sorted). B
0112: *          satisfies A = U * B * VT, so it has the same singular values
0113: *          as A, and singular vectors related by U and VT.
0114: *
0115: *  LWORK   (input) INTEGER
0116: *          The dimension of the array WORK.
0117: *          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
0118: *          For good performance, LWORK should generally be larger.
0119: *
0120: *          If LWORK = -1, then a workspace query is assumed; the routine
0121: *          only calculates the optimal size of the WORK array, returns
0122: *          this value as the first entry of the WORK array, and no error
0123: *          message related to LWORK is issued by XERBLA.
0124: *
0125: *  INFO    (output) INTEGER
0126: *          = 0:  successful exit.
0127: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
0128: *          > 0:  if DBDSQR did not converge, INFO specifies how many
0129: *                superdiagonals of an intermediate bidiagonal form B
0130: *                did not converge to zero. See the description of WORK
0131: *                above for details.
0132: *
0133: *  =====================================================================
0134: *
0135: *     .. Parameters ..
0136:       DOUBLE PRECISION   ZERO, ONE
0137:       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
0138: *     ..
0139: *     .. Local Scalars ..
0140:       LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
0141:      $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
0142:       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
0143:      $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
0144:      $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
0145:      $                   NRVT, WRKBL
0146:       DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
0147: *     ..
0148: *     .. Local Arrays ..
0149:       DOUBLE PRECISION   DUM( 1 )
0150: *     ..
0151: *     .. External Subroutines ..
0152:       EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
0153:      $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
0154:      $                   XERBLA
0155: *     ..
0156: *     .. External Functions ..
0157:       LOGICAL            LSAME
0158:       INTEGER            ILAENV
0159:       DOUBLE PRECISION   DLAMCH, DLANGE
0160:       EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
0161: *     ..
0162: *     .. Intrinsic Functions ..
0163:       INTRINSIC          MAX, MIN, SQRT
0164: *     ..
0165: *     .. Executable Statements ..
0166: *
0167: *     Test the input arguments
0168: *
0169:       INFO = 0
0170:       MINMN = MIN( M, N )
0171:       WNTUA = LSAME( JOBU, 'A' )
0172:       WNTUS = LSAME( JOBU, 'S' )
0173:       WNTUAS = WNTUA .OR. WNTUS
0174:       WNTUO = LSAME( JOBU, 'O' )
0175:       WNTUN = LSAME( JOBU, 'N' )
0176:       WNTVA = LSAME( JOBVT, 'A' )
0177:       WNTVS = LSAME( JOBVT, 'S' )
0178:       WNTVAS = WNTVA .OR. WNTVS
0179:       WNTVO = LSAME( JOBVT, 'O' )
0180:       WNTVN = LSAME( JOBVT, 'N' )
0181:       LQUERY = ( LWORK.EQ.-1 )
0182: *
0183:       IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
0184:          INFO = -1
0185:       ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
0186:      $         ( WNTVO .AND. WNTUO ) ) THEN
0187:          INFO = -2
0188:       ELSE IF( M.LT.0 ) THEN
0189:          INFO = -3
0190:       ELSE IF( N.LT.0 ) THEN
0191:          INFO = -4
0192:       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
0193:          INFO = -6
0194:       ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
0195:          INFO = -9
0196:       ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
0197:      $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
0198:          INFO = -11
0199:       END IF
0200: *
0201: *     Compute workspace
0202: *      (Note: Comments in the code beginning "Workspace:" describe the
0203: *       minimal amount of workspace needed at that point in the code,
0204: *       as well as the preferred amount for good performance.
0205: *       NB refers to the optimal block size for the immediately
0206: *       following subroutine, as returned by ILAENV.)
0207: *
0208:       IF( INFO.EQ.0 ) THEN
0209:          MINWRK = 1
0210:          MAXWRK = 1
0211:          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
0212: *
0213: *           Compute space needed for DBDSQR
0214: *
0215:             MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
0216:             BDSPAC = 5*N
0217:             IF( M.GE.MNTHR ) THEN
0218:                IF( WNTUN ) THEN
0219: *
0220: *                 Path 1 (M much larger than N, JOBU='N')
0221: *
0222:                   MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
0223:      $                     -1 )
0224:                   MAXWRK = MAX( MAXWRK, 3*N+2*N*
0225:      $                     ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0226:                   IF( WNTVO .OR. WNTVAS )
0227:      $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
0228:      $                        ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0229:                   MAXWRK = MAX( MAXWRK, BDSPAC )
0230:                   MINWRK = MAX( 4*N, BDSPAC )
0231:                ELSE IF( WNTUO .AND. WNTVN ) THEN
0232: *
0233: *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
0234: *
0235:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0236:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
0237:      $                    N, N, -1 ) )
0238:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0239:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0240:                   WRKBL = MAX( WRKBL, 3*N+N*
0241:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0242:                   WRKBL = MAX( WRKBL, BDSPAC )
0243:                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
0244:                   MINWRK = MAX( 3*N+M, BDSPAC )
0245:                ELSE IF( WNTUO .AND. WNTVAS ) THEN
0246: *
0247: *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
0248: *                 'A')
0249: *
0250:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0251:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
0252:      $                    N, N, -1 ) )
0253:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0254:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0255:                   WRKBL = MAX( WRKBL, 3*N+N*
0256:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0257:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0258:      $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0259:                   WRKBL = MAX( WRKBL, BDSPAC )
0260:                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
0261:                   MINWRK = MAX( 3*N+M, BDSPAC )
0262:                ELSE IF( WNTUS .AND. WNTVN ) THEN
0263: *
0264: *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
0265: *
0266:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0267:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
0268:      $                    N, N, -1 ) )
0269:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0270:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0271:                   WRKBL = MAX( WRKBL, 3*N+N*
0272:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0273:                   WRKBL = MAX( WRKBL, BDSPAC )
0274:                   MAXWRK = N*N + WRKBL
0275:                   MINWRK = MAX( 3*N+M, BDSPAC )
0276:                ELSE IF( WNTUS .AND. WNTVO ) THEN
0277: *
0278: *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
0279: *
0280:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0281:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
0282:      $                    N, N, -1 ) )
0283:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0284:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0285:                   WRKBL = MAX( WRKBL, 3*N+N*
0286:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0287:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0288:      $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0289:                   WRKBL = MAX( WRKBL, BDSPAC )
0290:                   MAXWRK = 2*N*N + WRKBL
0291:                   MINWRK = MAX( 3*N+M, BDSPAC )
0292:                ELSE IF( WNTUS .AND. WNTVAS ) THEN
0293: *
0294: *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
0295: *                 'A')
0296: *
0297:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0298:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
0299:      $                    N, N, -1 ) )
0300:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0301:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0302:                   WRKBL = MAX( WRKBL, 3*N+N*
0303:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0304:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0305:      $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0306:                   WRKBL = MAX( WRKBL, BDSPAC )
0307:                   MAXWRK = N*N + WRKBL
0308:                   MINWRK = MAX( 3*N+M, BDSPAC )
0309:                ELSE IF( WNTUA .AND. WNTVN ) THEN
0310: *
0311: *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
0312: *
0313:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0314:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
0315:      $                    M, N, -1 ) )
0316:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0317:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0318:                   WRKBL = MAX( WRKBL, 3*N+N*
0319:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0320:                   WRKBL = MAX( WRKBL, BDSPAC )
0321:                   MAXWRK = N*N + WRKBL
0322:                   MINWRK = MAX( 3*N+M, BDSPAC )
0323:                ELSE IF( WNTUA .AND. WNTVO ) THEN
0324: *
0325: *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
0326: *
0327:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0328:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
0329:      $                    M, N, -1 ) )
0330:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0331:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0332:                   WRKBL = MAX( WRKBL, 3*N+N*
0333:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0334:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0335:      $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0336:                   WRKBL = MAX( WRKBL, BDSPAC )
0337:                   MAXWRK = 2*N*N + WRKBL
0338:                   MINWRK = MAX( 3*N+M, BDSPAC )
0339:                ELSE IF( WNTUA .AND. WNTVAS ) THEN
0340: *
0341: *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
0342: *                 'A')
0343: *
0344:                   WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
0345:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
0346:      $                    M, N, -1 ) )
0347:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0348:      $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
0349:                   WRKBL = MAX( WRKBL, 3*N+N*
0350:      $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
0351:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0352:      $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0353:                   WRKBL = MAX( WRKBL, BDSPAC )
0354:                   MAXWRK = N*N + WRKBL
0355:                   MINWRK = MAX( 3*N+M, BDSPAC )
0356:                END IF
0357:             ELSE
0358: *
0359: *              Path 10 (M at least N, but not much larger)
0360: *
0361:                MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
0362:      $                  -1, -1 )
0363:                IF( WNTUS .OR. WNTUO )
0364:      $            MAXWRK = MAX( MAXWRK, 3*N+N*
0365:      $                     ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
0366:                IF( WNTUA )
0367:      $            MAXWRK = MAX( MAXWRK, 3*N+M*
0368:      $                     ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
0369:                IF( .NOT.WNTVN )
0370:      $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
0371:      $                     ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
0372:                MAXWRK = MAX( MAXWRK, BDSPAC )
0373:                MINWRK = MAX( 3*N+M, BDSPAC )
0374:             END IF
0375:          ELSE IF( MINMN.GT.0 ) THEN
0376: *
0377: *           Compute space needed for DBDSQR
0378: *
0379:             MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
0380:             BDSPAC = 5*M
0381:             IF( N.GE.MNTHR ) THEN
0382:                IF( WNTVN ) THEN
0383: *
0384: *                 Path 1t(N much larger than M, JOBVT='N')
0385: *
0386:                   MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
0387:      $                     -1 )
0388:                   MAXWRK = MAX( MAXWRK, 3*M+2*M*
0389:      $                     ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0390:                   IF( WNTUO .OR. WNTUAS )
0391:      $               MAXWRK = MAX( MAXWRK, 3*M+M*
0392:      $                        ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0393:                   MAXWRK = MAX( MAXWRK, BDSPAC )
0394:                   MINWRK = MAX( 4*M, BDSPAC )
0395:                ELSE IF( WNTVO .AND. WNTUN ) THEN
0396: *
0397: *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
0398: *
0399:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0400:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
0401:      $                    N, M, -1 ) )
0402:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0403:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0404:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0405:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0406:                   WRKBL = MAX( WRKBL, BDSPAC )
0407:                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
0408:                   MINWRK = MAX( 3*M+N, BDSPAC )
0409:                ELSE IF( WNTVO .AND. WNTUAS ) THEN
0410: *
0411: *                 Path 3t(N much larger than M, JOBU='S' or 'A',
0412: *                 JOBVT='O')
0413: *
0414:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0415:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
0416:      $                    N, M, -1 ) )
0417:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0418:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0419:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0420:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0421:                   WRKBL = MAX( WRKBL, 3*M+M*
0422:      $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0423:                   WRKBL = MAX( WRKBL, BDSPAC )
0424:                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
0425:                   MINWRK = MAX( 3*M+N, BDSPAC )
0426:                ELSE IF( WNTVS .AND. WNTUN ) THEN
0427: *
0428: *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
0429: *
0430:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0431:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
0432:      $                    N, M, -1 ) )
0433:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0434:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0435:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0436:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0437:                   WRKBL = MAX( WRKBL, BDSPAC )
0438:                   MAXWRK = M*M + WRKBL
0439:                   MINWRK = MAX( 3*M+N, BDSPAC )
0440:                ELSE IF( WNTVS .AND. WNTUO ) THEN
0441: *
0442: *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
0443: *
0444:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0445:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
0446:      $                    N, M, -1 ) )
0447:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0448:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0449:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0450:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0451:                   WRKBL = MAX( WRKBL, 3*M+M*
0452:      $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0453:                   WRKBL = MAX( WRKBL, BDSPAC )
0454:                   MAXWRK = 2*M*M + WRKBL
0455:                   MINWRK = MAX( 3*M+N, BDSPAC )
0456:                ELSE IF( WNTVS .AND. WNTUAS ) THEN
0457: *
0458: *                 Path 6t(N much larger than M, JOBU='S' or 'A',
0459: *                 JOBVT='S')
0460: *
0461:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0462:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
0463:      $                    N, M, -1 ) )
0464:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0465:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0466:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0467:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0468:                   WRKBL = MAX( WRKBL, 3*M+M*
0469:      $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0470:                   WRKBL = MAX( WRKBL, BDSPAC )
0471:                   MAXWRK = M*M + WRKBL
0472:                   MINWRK = MAX( 3*M+N, BDSPAC )
0473:                ELSE IF( WNTVA .AND. WNTUN ) THEN
0474: *
0475: *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
0476: *
0477:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0478:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
0479:      $                    N, M, -1 ) )
0480:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0481:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0482:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0483:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0484:                   WRKBL = MAX( WRKBL, BDSPAC )
0485:                   MAXWRK = M*M + WRKBL
0486:                   MINWRK = MAX( 3*M+N, BDSPAC )
0487:                ELSE IF( WNTVA .AND. WNTUO ) THEN
0488: *
0489: *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
0490: *
0491:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0492:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
0493:      $                    N, M, -1 ) )
0494:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0495:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0496:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0497:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0498:                   WRKBL = MAX( WRKBL, 3*M+M*
0499:      $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0500:                   WRKBL = MAX( WRKBL, BDSPAC )
0501:                   MAXWRK = 2*M*M + WRKBL
0502:                   MINWRK = MAX( 3*M+N, BDSPAC )
0503:                ELSE IF( WNTVA .AND. WNTUAS ) THEN
0504: *
0505: *                 Path 9t(N much larger than M, JOBU='S' or 'A',
0506: *                 JOBVT='A')
0507: *
0508:                   WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
0509:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
0510:      $                    N, M, -1 ) )
0511:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0512:      $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
0513:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0514:      $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
0515:                   WRKBL = MAX( WRKBL, 3*M+M*
0516:      $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0517:                   WRKBL = MAX( WRKBL, BDSPAC )
0518:                   MAXWRK = M*M + WRKBL
0519:                   MINWRK = MAX( 3*M+N, BDSPAC )
0520:                END IF
0521:             ELSE
0522: *
0523: *              Path 10t(N greater than M, but not much larger)
0524: *
0525:                MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
0526:      $                  -1, -1 )
0527:                IF( WNTVS .OR. WNTVO )
0528:      $            MAXWRK = MAX( MAXWRK, 3*M+M*
0529:      $                     ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
0530:                IF( WNTVA )
0531:      $            MAXWRK = MAX( MAXWRK, 3*M+N*
0532:      $                     ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
0533:                IF( .NOT.WNTUN )
0534:      $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
0535:      $                     ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
0536:                MAXWRK = MAX( MAXWRK, BDSPAC )
0537:                MINWRK = MAX( 3*M+N, BDSPAC )
0538:             END IF
0539:          END IF
0540:          MAXWRK = MAX( MAXWRK, MINWRK )
0541:          WORK( 1 ) = MAXWRK
0542: *
0543:          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
0544:             INFO = -13
0545:          END IF
0546:       END IF
0547: *
0548:       IF( INFO.NE.0 ) THEN
0549:          CALL XERBLA( 'DGESVD', -INFO )
0550:          RETURN
0551:       ELSE IF( LQUERY ) THEN
0552:          RETURN
0553:       END IF
0554: *
0555: *     Quick return if possible
0556: *
0557:       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
0558:          RETURN
0559:       END IF
0560: *
0561: *     Get machine constants
0562: *
0563:       EPS = DLAMCH( 'P' )
0564:       SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
0565:       BIGNUM = ONE / SMLNUM
0566: *
0567: *     Scale A if max element outside range [SMLNUM,BIGNUM]
0568: *
0569:       ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
0570:       ISCL = 0
0571:       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
0572:          ISCL = 1
0573:          CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
0574:       ELSE IF( ANRM.GT.BIGNUM ) THEN
0575:          ISCL = 1
0576:          CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
0577:       END IF
0578: *
0579:       IF( M.GE.N ) THEN
0580: *
0581: *        A has at least as many rows as columns. If A has sufficiently
0582: *        more rows than columns, first reduce using the QR
0583: *        decomposition (if sufficient workspace available)
0584: *
0585:          IF( M.GE.MNTHR ) THEN
0586: *
0587:             IF( WNTUN ) THEN
0588: *
0589: *              Path 1 (M much larger than N, JOBU='N')
0590: *              No left singular vectors to be computed
0591: *
0592:                ITAU = 1
0593:                IWORK = ITAU + N
0594: *
0595: *              Compute A=Q*R
0596: *              (Workspace: need 2*N, prefer N+N*NB)
0597: *
0598:                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
0599:      $                      LWORK-IWORK+1, IERR )
0600: *
0601: *              Zero out below R
0602: *
0603:                CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
0604:                IE = 1
0605:                ITAUQ = IE + N
0606:                ITAUP = ITAUQ + N
0607:                IWORK = ITAUP + N
0608: *
0609: *              Bidiagonalize R in A
0610: *              (Workspace: need 4*N, prefer 3*N+2*N*NB)
0611: *
0612:                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
0613:      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
0614:      $                      IERR )
0615:                NCVT = 0
0616:                IF( WNTVO .OR. WNTVAS ) THEN
0617: *
0618: *                 If right singular vectors desired, generate P'.
0619: *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
0620: *
0621:                   CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
0622:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0623:                   NCVT = N
0624:                END IF
0625:                IWORK = IE + N
0626: *
0627: *              Perform bidiagonal QR iteration, computing right
0628: *              singular vectors of A in A if desired
0629: *              (Workspace: need BDSPAC)
0630: *
0631:                CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
0632:      $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
0633: *
0634: *              If right singular vectors desired in VT, copy them there
0635: *
0636:                IF( WNTVAS )
0637:      $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
0638: *
0639:             ELSE IF( WNTUO .AND. WNTVN ) THEN
0640: *
0641: *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
0642: *              N left singular vectors to be overwritten on A and
0643: *              no right singular vectors to be computed
0644: *
0645:                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0646: *
0647: *                 Sufficient workspace for a fast algorithm
0648: *
0649:                   IR = 1
0650:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
0651: *
0652: *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
0653: *
0654:                      LDWRKU = LDA
0655:                      LDWRKR = LDA
0656:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
0657: *
0658: *                    WORK(IU) is LDA by N, WORK(IR) is N by N
0659: *
0660:                      LDWRKU = LDA
0661:                      LDWRKR = N
0662:                   ELSE
0663: *
0664: *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
0665: *
0666:                      LDWRKU = ( LWORK-N*N-N ) / N
0667:                      LDWRKR = N
0668:                   END IF
0669:                   ITAU = IR + LDWRKR*N
0670:                   IWORK = ITAU + N
0671: *
0672: *                 Compute A=Q*R
0673: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0674: *
0675:                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
0676:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0677: *
0678: *                 Copy R to WORK(IR) and zero out below it
0679: *
0680:                   CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
0681:                   CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
0682:      $                         LDWRKR )
0683: *
0684: *                 Generate Q in A
0685: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0686: *
0687:                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
0688:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0689:                   IE = ITAU
0690:                   ITAUQ = IE + N
0691:                   ITAUP = ITAUQ + N
0692:                   IWORK = ITAUP + N
0693: *
0694: *                 Bidiagonalize R in WORK(IR)
0695: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0696: *
0697:                   CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
0698:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0699:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0700: *
0701: *                 Generate left vectors bidiagonalizing R
0702: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0703: *
0704:                   CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0705:      $                         WORK( ITAUQ ), WORK( IWORK ),
0706:      $                         LWORK-IWORK+1, IERR )
0707:                   IWORK = IE + N
0708: *
0709: *                 Perform bidiagonal QR iteration, computing left
0710: *                 singular vectors of R in WORK(IR)
0711: *                 (Workspace: need N*N+BDSPAC)
0712: *
0713:                   CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
0714:      $                         WORK( IR ), LDWRKR, DUM, 1,
0715:      $                         WORK( IWORK ), INFO )
0716:                   IU = IE + N
0717: *
0718: *                 Multiply Q in A by left singular vectors of R in
0719: *                 WORK(IR), storing result in WORK(IU) and copying to A
0720: *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
0721: *
0722:                   DO 10 I = 1, M, LDWRKU
0723:                      CHUNK = MIN( M-I+1, LDWRKU )
0724:                      CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
0725:      $                           LDA, WORK( IR ), LDWRKR, ZERO,
0726:      $                           WORK( IU ), LDWRKU )
0727:                      CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
0728:      $                            A( I, 1 ), LDA )
0729:    10             CONTINUE
0730: *
0731:                ELSE
0732: *
0733: *                 Insufficient workspace for a fast algorithm
0734: *
0735:                   IE = 1
0736:                   ITAUQ = IE + N
0737:                   ITAUP = ITAUQ + N
0738:                   IWORK = ITAUP + N
0739: *
0740: *                 Bidiagonalize A
0741: *                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
0742: *
0743:                   CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
0744:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0745:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0746: *
0747: *                 Generate left vectors bidiagonalizing A
0748: *                 (Workspace: need 4*N, prefer 3*N+N*NB)
0749: *
0750:                   CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
0751:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0752:                   IWORK = IE + N
0753: *
0754: *                 Perform bidiagonal QR iteration, computing left
0755: *                 singular vectors of A in A
0756: *                 (Workspace: need BDSPAC)
0757: *
0758:                   CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
0759:      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
0760: *
0761:                END IF
0762: *
0763:             ELSE IF( WNTUO .AND. WNTVAS ) THEN
0764: *
0765: *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
0766: *              N left singular vectors to be overwritten on A and
0767: *              N right singular vectors to be computed in VT
0768: *
0769:                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0770: *
0771: *                 Sufficient workspace for a fast algorithm
0772: *
0773:                   IR = 1
0774:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
0775: *
0776: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
0777: *
0778:                      LDWRKU = LDA
0779:                      LDWRKR = LDA
0780:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
0781: *
0782: *                    WORK(IU) is LDA by N and WORK(IR) is N by N
0783: *
0784:                      LDWRKU = LDA
0785:                      LDWRKR = N
0786:                   ELSE
0787: *
0788: *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
0789: *
0790:                      LDWRKU = ( LWORK-N*N-N ) / N
0791:                      LDWRKR = N
0792:                   END IF
0793:                   ITAU = IR + LDWRKR*N
0794:                   IWORK = ITAU + N
0795: *
0796: *                 Compute A=Q*R
0797: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0798: *
0799:                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
0800:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0801: *
0802: *                 Copy R to VT, zeroing out below it
0803: *
0804:                   CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
0805:                   IF( N.GT.1 )
0806:      $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
0807:      $                            VT( 2, 1 ), LDVT )
0808: *
0809: *                 Generate Q in A
0810: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0811: *
0812:                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
0813:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0814:                   IE = ITAU
0815:                   ITAUQ = IE + N
0816:                   ITAUP = ITAUQ + N
0817:                   IWORK = ITAUP + N
0818: *
0819: *                 Bidiagonalize R in VT, copying result to WORK(IR)
0820: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0821: *
0822:                   CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
0823:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0824:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0825:                   CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
0826: *
0827: *                 Generate left vectors bidiagonalizing R in WORK(IR)
0828: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0829: *
0830:                   CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0831:      $                         WORK( ITAUQ ), WORK( IWORK ),
0832:      $                         LWORK-IWORK+1, IERR )
0833: *
0834: *                 Generate right vectors bidiagonalizing R in VT
0835: *                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
0836: *
0837:                   CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
0838:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0839:                   IWORK = IE + N
0840: *
0841: *                 Perform bidiagonal QR iteration, computing left
0842: *                 singular vectors of R in WORK(IR) and computing right
0843: *                 singular vectors of R in VT
0844: *                 (Workspace: need N*N+BDSPAC)
0845: *
0846:                   CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
0847:      $                         WORK( IR ), LDWRKR, DUM, 1,
0848:      $                         WORK( IWORK ), INFO )
0849:                   IU = IE + N
0850: *
0851: *                 Multiply Q in A by left singular vectors of R in
0852: *                 WORK(IR), storing result in WORK(IU) and copying to A
0853: *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
0854: *
0855:                   DO 20 I = 1, M, LDWRKU
0856:                      CHUNK = MIN( M-I+1, LDWRKU )
0857:                      CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
0858:      $                           LDA, WORK( IR ), LDWRKR, ZERO,
0859:      $                           WORK( IU ), LDWRKU )
0860:                      CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
0861:      $                            A( I, 1 ), LDA )
0862:    20             CONTINUE
0863: *
0864:                ELSE
0865: *
0866: *                 Insufficient workspace for a fast algorithm
0867: *
0868:                   ITAU = 1
0869:                   IWORK = ITAU + N
0870: *
0871: *                 Compute A=Q*R
0872: *                 (Workspace: need 2*N, prefer N+N*NB)
0873: *
0874:                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
0875:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0876: *
0877: *                 Copy R to VT, zeroing out below it
0878: *
0879:                   CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
0880:                   IF( N.GT.1 )
0881:      $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
0882:      $                            VT( 2, 1 ), LDVT )
0883: *
0884: *                 Generate Q in A
0885: *                 (Workspace: need 2*N, prefer N+N*NB)
0886: *
0887:                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
0888:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0889:                   IE = ITAU
0890:                   ITAUQ = IE + N
0891:                   ITAUP = ITAUQ + N
0892:                   IWORK = ITAUP + N
0893: *
0894: *                 Bidiagonalize R in VT
0895: *                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
0896: *
0897:                   CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
0898:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0899:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0900: *
0901: *                 Multiply Q in A by left vectors bidiagonalizing R
0902: *                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
0903: *
0904:                   CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
0905:      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
0906:      $                         LWORK-IWORK+1, IERR )
0907: *
0908: *                 Generate right vectors bidiagonalizing R in VT
0909: *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
0910: *
0911:                   CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
0912:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0913:                   IWORK = IE + N
0914: *
0915: *                 Perform bidiagonal QR iteration, computing left
0916: *                 singular vectors of A in A and computing right
0917: *                 singular vectors of A in VT
0918: *                 (Workspace: need BDSPAC)
0919: *
0920:                   CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
0921:      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
0922: *
0923:                END IF
0924: *
0925:             ELSE IF( WNTUS ) THEN
0926: *
0927:                IF( WNTVN ) THEN
0928: *
0929: *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
0930: *                 N left singular vectors to be computed in U and
0931: *                 no right singular vectors to be computed
0932: *
0933:                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0934: *
0935: *                    Sufficient workspace for a fast algorithm
0936: *
0937:                      IR = 1
0938:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
0939: *
0940: *                       WORK(IR) is LDA by N
0941: *
0942:                         LDWRKR = LDA
0943:                      ELSE
0944: *
0945: *                       WORK(IR) is N by N
0946: *
0947:                         LDWRKR = N
0948:                      END IF
0949:                      ITAU = IR + LDWRKR*N
0950:                      IWORK = ITAU + N
0951: *
0952: *                    Compute A=Q*R
0953: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0954: *
0955:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
0956:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
0957: *
0958: *                    Copy R to WORK(IR), zeroing out below it
0959: *
0960:                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
0961:      $                            LDWRKR )
0962:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
0963:      $                            WORK( IR+1 ), LDWRKR )
0964: *
0965: *                    Generate Q in A
0966: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0967: *
0968:                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
0969:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
0970:                      IE = ITAU
0971:                      ITAUQ = IE + N
0972:                      ITAUP = ITAUQ + N
0973:                      IWORK = ITAUP + N
0974: *
0975: *                    Bidiagonalize R in WORK(IR)
0976: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0977: *
0978:                      CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
0979:      $                            WORK( IE ), WORK( ITAUQ ),
0980:      $                            WORK( ITAUP ), WORK( IWORK ),
0981:      $                            LWORK-IWORK+1, IERR )
0982: *
0983: *                    Generate left vectors bidiagonalizing R in WORK(IR)
0984: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0985: *
0986:                      CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0987:      $                            WORK( ITAUQ ), WORK( IWORK ),
0988:      $                            LWORK-IWORK+1, IERR )
0989:                      IWORK = IE + N
0990: *
0991: *                    Perform bidiagonal QR iteration, computing left
0992: *                    singular vectors of R in WORK(IR)
0993: *                    (Workspace: need N*N+BDSPAC)
0994: *
0995:                      CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
0996:      $                            1, WORK( IR ), LDWRKR, DUM, 1,
0997:      $                            WORK( IWORK ), INFO )
0998: *
0999: *                    Multiply Q in A by left singular vectors of R in
1000: *                    WORK(IR), storing result in U
1001: *                    (Workspace: need N*N)
1002: *
1003:                      CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1004:      $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1005: *
1006:                   ELSE
1007: *
1008: *                    Insufficient workspace for a fast algorithm
1009: *
1010:                      ITAU = 1
1011:                      IWORK = ITAU + N
1012: *
1013: *                    Compute A=Q*R, copying result to U
1014: *                    (Workspace: need 2*N, prefer N+N*NB)
1015: *
1016:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1017:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1018:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1019: *
1020: *                    Generate Q in U
1021: *                    (Workspace: need 2*N, prefer N+N*NB)
1022: *
1023:                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1024:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1025:                      IE = ITAU
1026:                      ITAUQ = IE + N
1027:                      ITAUP = ITAUQ + N
1028:                      IWORK = ITAUP + N
1029: *
1030: *                    Zero out below R in A
1031: *
1032:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1033:      $                            LDA )
1034: *
1035: *                    Bidiagonalize R in A
1036: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1037: *
1038:                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1039:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1040:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1041: *
1042: *                    Multiply Q in U by left vectors bidiagonalizing R
1043: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1044: *
1045:                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1046:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1047:      $                            LWORK-IWORK+1, IERR )
1048:                      IWORK = IE + N
1049: *
1050: *                    Perform bidiagonal QR iteration, computing left
1051: *                    singular vectors of A in U
1052: *                    (Workspace: need BDSPAC)
1053: *
1054:                      CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1055:      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1056:      $                            INFO )
1057: *
1058:                   END IF
1059: *
1060:                ELSE IF( WNTVO ) THEN
1061: *
1062: *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1063: *                 N left singular vectors to be computed in U and
1064: *                 N right singular vectors to be overwritten on A
1065: *
1066:                   IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1067: *
1068: *                    Sufficient workspace for a fast algorithm
1069: *
1070:                      IU = 1
1071:                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1072: *
1073: *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1074: *
1075:                         LDWRKU = LDA
1076:                         IR = IU + LDWRKU*N
1077:                         LDWRKR = LDA
1078:                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1079: *
1080: *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1081: *
1082:                         LDWRKU = LDA
1083:                         IR = IU + LDWRKU*N
1084:                         LDWRKR = N
1085:                      ELSE
1086: *
1087: *                       WORK(IU) is N by N and WORK(IR) is N by N
1088: *
1089:                         LDWRKU = N
1090:                         IR = IU + LDWRKU*N
1091:                         LDWRKR = N
1092:                      END IF
1093:                      ITAU = IR + LDWRKR*N
1094:                      IWORK = ITAU + N
1095: *
1096: *                    Compute A=Q*R
1097: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1098: *
1099:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1100:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1101: *
1102: *                    Copy R to WORK(IU), zeroing out below it
1103: *
1104:                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1105:      $                            LDWRKU )
1106:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1107:      $                            WORK( IU+1 ), LDWRKU )
1108: *
1109: *                    Generate Q in A
1110: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1111: *
1112:                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1113:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1114:                      IE = ITAU
1115:                      ITAUQ = IE + N
1116:                      ITAUP = ITAUQ + N
1117:                      IWORK = ITAUP + N
1118: *
1119: *                    Bidiagonalize R in WORK(IU), copying result to
1120: *                    WORK(IR)
1121: *                    (Workspace: need 2*N*N+4*N,
1122: *                                prefer 2*N*N+3*N+2*N*NB)
1123: *
1124:                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1125:      $                            WORK( IE ), WORK( ITAUQ ),
1126:      $                            WORK( ITAUP ), WORK( IWORK ),
1127:      $                            LWORK-IWORK+1, IERR )
1128:                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1129:      $                            WORK( IR ), LDWRKR )
1130: *
1131: *                    Generate left bidiagonalizing vectors in WORK(IU)
1132: *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1133: *
1134:                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1135:      $                            WORK( ITAUQ ), WORK( IWORK ),
1136:      $                            LWORK-IWORK+1, IERR )
1137: *
1138: *                    Generate right bidiagonalizing vectors in WORK(IR)
1139: *                    (Workspace: need 2*N*N+4*N-1,
1140: *                                prefer 2*N*N+3*N+(N-1)*NB)
1141: *
1142:                      CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1143:      $                            WORK( ITAUP ), WORK( IWORK ),
1144:      $                            LWORK-IWORK+1, IERR )
1145:                      IWORK = IE + N
1146: *
1147: *                    Perform bidiagonal QR iteration, computing left
1148: *                    singular vectors of R in WORK(IU) and computing
1149: *                    right singular vectors of R in WORK(IR)
1150: *                    (Workspace: need 2*N*N+BDSPAC)
1151: *
1152:                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1153:      $                            WORK( IR ), LDWRKR, WORK( IU ),
1154:      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1155: *
1156: *                    Multiply Q in A by left singular vectors of R in
1157: *                    WORK(IU), storing result in U
1158: *                    (Workspace: need N*N)
1159: *
1160:                      CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1161:      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1162: *
1163: *                    Copy right singular vectors of R to A
1164: *                    (Workspace: need N*N)
1165: *
1166:                      CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1167:      $                            LDA )
1168: *
1169:                   ELSE
1170: *
1171: *                    Insufficient workspace for a fast algorithm
1172: *
1173:                      ITAU = 1
1174:                      IWORK = ITAU + N
1175: *
1176: *                    Compute A=Q*R, copying result to U
1177: *                    (Workspace: need 2*N, prefer N+N*NB)
1178: *
1179:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1180:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1181:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1182: *
1183: *                    Generate Q in U
1184: *                    (Workspace: need 2*N, prefer N+N*NB)
1185: *
1186:                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1187:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1188:                      IE = ITAU
1189:                      ITAUQ = IE + N
1190:                      ITAUP = ITAUQ + N
1191:                      IWORK = ITAUP + N
1192: *
1193: *                    Zero out below R in A
1194: *
1195:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1196:      $                            LDA )
1197: *
1198: *                    Bidiagonalize R in A
1199: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1200: *
1201:                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1202:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1203:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1204: *
1205: *                    Multiply Q in U by left vectors bidiagonalizing R
1206: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1207: *
1208:                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1209:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1210:      $                            LWORK-IWORK+1, IERR )
1211: *
1212: *                    Generate right vectors bidiagonalizing R in A
1213: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1214: *
1215:                      CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1216:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1217:                      IWORK = IE + N
1218: *
1219: *                    Perform bidiagonal QR iteration, computing left
1220: *                    singular vectors of A in U and computing right
1221: *                    singular vectors of A in A
1222: *                    (Workspace: need BDSPAC)
1223: *
1224:                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1225:      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1226:      $                            INFO )
1227: *
1228:                   END IF
1229: *
1230:                ELSE IF( WNTVAS ) THEN
1231: *
1232: *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1233: *                         or 'A')
1234: *                 N left singular vectors to be computed in U and
1235: *                 N right singular vectors to be computed in VT
1236: *
1237:                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1238: *
1239: *                    Sufficient workspace for a fast algorithm
1240: *
1241:                      IU = 1
1242:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1243: *
1244: *                       WORK(IU) is LDA by N
1245: *
1246:                         LDWRKU = LDA
1247:                      ELSE
1248: *
1249: *                       WORK(IU) is N by N
1250: *
1251:                         LDWRKU = N
1252:                      END IF
1253:                      ITAU = IU + LDWRKU*N
1254:                      IWORK = ITAU + N
1255: *
1256: *                    Compute A=Q*R
1257: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1258: *
1259:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1260:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1261: *
1262: *                    Copy R to WORK(IU), zeroing out below it
1263: *
1264:                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1265:      $                            LDWRKU )
1266:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1267:      $                            WORK( IU+1 ), LDWRKU )
1268: *
1269: *                    Generate Q in A
1270: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1271: *
1272:                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1273:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1274:                      IE = ITAU
1275:                      ITAUQ = IE + N
1276:                      ITAUP = ITAUQ + N
1277:                      IWORK = ITAUP + N
1278: *
1279: *                    Bidiagonalize R in WORK(IU), copying result to VT
1280: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1281: *
1282:                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1283:      $                            WORK( IE ), WORK( ITAUQ ),
1284:      $                            WORK( ITAUP ), WORK( IWORK ),
1285:      $                            LWORK-IWORK+1, IERR )
1286:                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1287:      $                            LDVT )
1288: *
1289: *                    Generate left bidiagonalizing vectors in WORK(IU)
1290: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1291: *
1292:                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1293:      $                            WORK( ITAUQ ), WORK( IWORK ),
1294:      $                            LWORK-IWORK+1, IERR )
1295: *
1296: *                    Generate right bidiagonalizing vectors in VT
1297: *                    (Workspace: need N*N+4*N-1,
1298: *                                prefer N*N+3*N+(N-1)*NB)
1299: *
1300:                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1301:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1302:                      IWORK = IE + N
1303: *
1304: *                    Perform bidiagonal QR iteration, computing left
1305: *                    singular vectors of R in WORK(IU) and computing
1306: *                    right singular vectors of R in VT
1307: *                    (Workspace: need N*N+BDSPAC)
1308: *
1309:                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1310:      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1311:      $                            WORK( IWORK ), INFO )
1312: *
1313: *                    Multiply Q in A by left singular vectors of R in
1314: *                    WORK(IU), storing result in U
1315: *                    (Workspace: need N*N)
1316: *
1317:                      CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1318:      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1319: *
1320:                   ELSE
1321: *
1322: *                    Insufficient workspace for a fast algorithm
1323: *
1324:                      ITAU = 1
1325:                      IWORK = ITAU + N
1326: *
1327: *                    Compute A=Q*R, copying result to U
1328: *                    (Workspace: need 2*N, prefer N+N*NB)
1329: *
1330:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1331:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1332:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1333: *
1334: *                    Generate Q in U
1335: *                    (Workspace: need 2*N, prefer N+N*NB)
1336: *
1337:                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1338:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1339: *
1340: *                    Copy R to VT, zeroing out below it
1341: *
1342:                      CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1343:                      IF( N.GT.1 )
1344:      $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1345:      $                               VT( 2, 1 ), LDVT )
1346:                      IE = ITAU
1347:                      ITAUQ = IE + N
1348:                      ITAUP = ITAUQ + N
1349:                      IWORK = ITAUP + N
1350: *
1351: *                    Bidiagonalize R in VT
1352: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1353: *
1354:                      CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1355:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1356:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1357: *
1358: *                    Multiply Q in U by left bidiagonalizing vectors
1359: *                    in VT
1360: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1361: *
1362:                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1363:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1364:      $                            LWORK-IWORK+1, IERR )
1365: *
1366: *                    Generate right bidiagonalizing vectors in VT
1367: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1368: *
1369:                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1370:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1371:                      IWORK = IE + N
1372: *
1373: *                    Perform bidiagonal QR iteration, computing left
1374: *                    singular vectors of A in U and computing right
1375: *                    singular vectors of A in VT
1376: *                    (Workspace: need BDSPAC)
1377: *
1378:                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1379:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1380:      $                            INFO )
1381: *
1382:                   END IF
1383: *
1384:                END IF
1385: *
1386:             ELSE IF( WNTUA ) THEN
1387: *
1388:                IF( WNTVN ) THEN
1389: *
1390: *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1391: *                 M left singular vectors to be computed in U and
1392: *                 no right singular vectors to be computed
1393: *
1394:                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1395: *
1396: *                    Sufficient workspace for a fast algorithm
1397: *
1398:                      IR = 1
1399:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1400: *
1401: *                       WORK(IR) is LDA by N
1402: *
1403:                         LDWRKR = LDA
1404:                      ELSE
1405: *
1406: *                       WORK(IR) is N by N
1407: *
1408:                         LDWRKR = N
1409:                      END IF
1410:                      ITAU = IR + LDWRKR*N
1411:                      IWORK = ITAU + N
1412: *
1413: *                    Compute A=Q*R, copying result to U
1414: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1415: *
1416:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1417:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1418:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1419: *
1420: *                    Copy R to WORK(IR), zeroing out below it
1421: *
1422:                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1423:      $                            LDWRKR )
1424:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1425:      $                            WORK( IR+1 ), LDWRKR )
1426: *
1427: *                    Generate Q in U
1428: *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1429: *
1430:                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1431:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1432:                      IE = ITAU
1433:                      ITAUQ = IE + N
1434:                      ITAUP = ITAUQ + N
1435:                      IWORK = ITAUP + N
1436: *
1437: *                    Bidiagonalize R in WORK(IR)
1438: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1439: *
1440:                      CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1441:      $                            WORK( IE ), WORK( ITAUQ ),
1442:      $                            WORK( ITAUP ), WORK( IWORK ),
1443:      $                            LWORK-IWORK+1, IERR )
1444: *
1445: *                    Generate left bidiagonalizing vectors in WORK(IR)
1446: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1447: *
1448:                      CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1449:      $                            WORK( ITAUQ ), WORK( IWORK ),
1450:      $                            LWORK-IWORK+1, IERR )
1451:                      IWORK = IE + N
1452: *
1453: *                    Perform bidiagonal QR iteration, computing left
1454: *                    singular vectors of R in WORK(IR)
1455: *                    (Workspace: need N*N+BDSPAC)
1456: *
1457:                      CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1458:      $                            1, WORK( IR ), LDWRKR, DUM, 1,
1459:      $                            WORK( IWORK ), INFO )
1460: *
1461: *                    Multiply Q in U by left singular vectors of R in
1462: *                    WORK(IR), storing result in A
1463: *                    (Workspace: need N*N)
1464: *
1465:                      CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1466:      $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1467: *
1468: *                    Copy left singular vectors of A from A to U
1469: *
1470:                      CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1471: *
1472:                   ELSE
1473: *
1474: *                    Insufficient workspace for a fast algorithm
1475: *
1476:                      ITAU = 1
1477:                      IWORK = ITAU + N
1478: *
1479: *                    Compute A=Q*R, copying result to U
1480: *                    (Workspace: need 2*N, prefer N+N*NB)
1481: *
1482:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1483:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1484:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1485: *
1486: *                    Generate Q in U
1487: *                    (Workspace: need N+M, prefer N+M*NB)
1488: *
1489:                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1490:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1491:                      IE = ITAU
1492:                      ITAUQ = IE + N
1493:                      ITAUP = ITAUQ + N
1494:                      IWORK = ITAUP + N
1495: *
1496: *                    Zero out below R in A
1497: *
1498:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1499:      $                            LDA )
1500: *
1501: *                    Bidiagonalize R in A
1502: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1503: *
1504:                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1505:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1506:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1507: *
1508: *                    Multiply Q in U by left bidiagonalizing vectors
1509: *                    in A
1510: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1511: *
1512:                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1513:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1514:      $                            LWORK-IWORK+1, IERR )
1515:                      IWORK = IE + N
1516: *
1517: *                    Perform bidiagonal QR iteration, computing left
1518: *                    singular vectors of A in U
1519: *                    (Workspace: need BDSPAC)
1520: *
1521:                      CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1522:      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1523:      $                            INFO )
1524: *
1525:                   END IF
1526: *
1527:                ELSE IF( WNTVO ) THEN
1528: *
1529: *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1530: *                 M left singular vectors to be computed in U and
1531: *                 N right singular vectors to be overwritten on A
1532: *
1533:                   IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1534: *
1535: *                    Sufficient workspace for a fast algorithm
1536: *
1537:                      IU = 1
1538:                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1539: *
1540: *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1541: *
1542:                         LDWRKU = LDA
1543:                         IR = IU + LDWRKU*N
1544:                         LDWRKR = LDA
1545:                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1546: *
1547: *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1548: *
1549:                         LDWRKU = LDA
1550:                         IR = IU + LDWRKU*N
1551:                         LDWRKR = N
1552:                      ELSE
1553: *
1554: *                       WORK(IU) is N by N and WORK(IR) is N by N
1555: *
1556:                         LDWRKU = N
1557:                         IR = IU + LDWRKU*N
1558:                         LDWRKR = N
1559:                      END IF
1560:                      ITAU = IR + LDWRKR*N
1561:                      IWORK = ITAU + N
1562: *
1563: *                    Compute A=Q*R, copying result to U
1564: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1565: *
1566:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1567:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1568:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1569: *
1570: *                    Generate Q in U
1571: *                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1572: *
1573:                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1574:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1575: *
1576: *                    Copy R to WORK(IU), zeroing out below it
1577: *
1578:                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1579:      $                            LDWRKU )
1580:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1581:      $                            WORK( IU+1 ), LDWRKU )
1582:                      IE = ITAU
1583:                      ITAUQ = IE + N
1584:                      ITAUP = ITAUQ + N
1585:                      IWORK = ITAUP + N
1586: *
1587: *                    Bidiagonalize R in WORK(IU), copying result to
1588: *                    WORK(IR)
1589: *                    (Workspace: need 2*N*N+4*N,
1590: *                                prefer 2*N*N+3*N+2*N*NB)
1591: *
1592:                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1593:      $                            WORK( IE ), WORK( ITAUQ ),
1594:      $                            WORK( ITAUP ), WORK( IWORK ),
1595:      $                            LWORK-IWORK+1, IERR )
1596:                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1597:      $                            WORK( IR ), LDWRKR )
1598: *
1599: *                    Generate left bidiagonalizing vectors in WORK(IU)
1600: *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1601: *
1602:                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1603:      $                            WORK( ITAUQ ), WORK( IWORK ),
1604:      $                            LWORK-IWORK+1, IERR )
1605: *
1606: *                    Generate right bidiagonalizing vectors in WORK(IR)
1607: *                    (Workspace: need 2*N*N+4*N-1,
1608: *                                prefer 2*N*N+3*N+(N-1)*NB)
1609: *
1610:                      CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1611:      $                            WORK( ITAUP ), WORK( IWORK ),
1612:      $                            LWORK-IWORK+1, IERR )
1613:                      IWORK = IE + N
1614: *
1615: *                    Perform bidiagonal QR iteration, computing left
1616: *                    singular vectors of R in WORK(IU) and computing
1617: *                    right singular vectors of R in WORK(IR)
1618: *                    (Workspace: need 2*N*N+BDSPAC)
1619: *
1620:                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1621:      $                            WORK( IR ), LDWRKR, WORK( IU ),
1622:      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1623: *
1624: *                    Multiply Q in U by left singular vectors of R in
1625: *                    WORK(IU), storing result in A
1626: *                    (Workspace: need N*N)
1627: *
1628:                      CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1629:      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1630: *
1631: *                    Copy left singular vectors of A from A to U
1632: *
1633:                      CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1634: *
1635: *                    Copy right singular vectors of R from WORK(IR) to A
1636: *
1637:                      CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1638:      $                            LDA )
1639: *
1640:                   ELSE
1641: *
1642: *                    Insufficient workspace for a fast algorithm
1643: *
1644:                      ITAU = 1
1645:                      IWORK = ITAU + N
1646: *
1647: *                    Compute A=Q*R, copying result to U
1648: *                    (Workspace: need 2*N, prefer N+N*NB)
1649: *
1650:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1651:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1652:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1653: *
1654: *                    Generate Q in U
1655: *                    (Workspace: need N+M, prefer N+M*NB)
1656: *
1657:                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1658:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1659:                      IE = ITAU
1660:                      ITAUQ = IE + N
1661:                      ITAUP = ITAUQ + N
1662:                      IWORK = ITAUP + N
1663: *
1664: *                    Zero out below R in A
1665: *
1666:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1667:      $                            LDA )
1668: *
1669: *                    Bidiagonalize R in A
1670: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1671: *
1672:                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1673:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1674:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1675: *
1676: *                    Multiply Q in U by left bidiagonalizing vectors
1677: *                    in A
1678: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1679: *
1680:                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1681:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1682:      $                            LWORK-IWORK+1, IERR )
1683: *
1684: *                    Generate right bidiagonalizing vectors in A
1685: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1686: *
1687:                      CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1688:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1689:                      IWORK = IE + N
1690: *
1691: *                    Perform bidiagonal QR iteration, computing left
1692: *                    singular vectors of A in U and computing right
1693: *                    singular vectors of A in A
1694: *                    (Workspace: need BDSPAC)
1695: *
1696:                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1697:      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1698:      $                            INFO )
1699: *
1700:                   END IF
1701: *
1702:                ELSE IF( WNTVAS ) THEN
1703: *
1704: *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1705: *                         or 'A')
1706: *                 M left singular vectors to be computed in U and
1707: *                 N right singular vectors to be computed in VT
1708: *
1709:                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1710: *
1711: *                    Sufficient workspace for a fast algorithm
1712: *
1713:                      IU = 1
1714:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1715: *
1716: *                       WORK(IU) is LDA by N
1717: *
1718:                         LDWRKU = LDA
1719:                      ELSE
1720: *
1721: *                       WORK(IU) is N by N
1722: *
1723:                         LDWRKU = N
1724:                      END IF
1725:                      ITAU = IU + LDWRKU*N
1726:                      IWORK = ITAU + N
1727: *
1728: *                    Compute A=Q*R, copying result to U
1729: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1730: *
1731:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1732:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1733:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1734: *
1735: *                    Generate Q in U
1736: *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1737: *
1738:                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1739:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1740: *
1741: *                    Copy R to WORK(IU), zeroing out below it
1742: *
1743:                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1744:      $                            LDWRKU )
1745:                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1746:      $                            WORK( IU+1 ), LDWRKU )
1747:                      IE = ITAU
1748:                      ITAUQ = IE + N
1749:                      ITAUP = ITAUQ + N
1750:                      IWORK = ITAUP + N
1751: *
1752: *                    Bidiagonalize R in WORK(IU), copying result to VT
1753: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1754: *
1755:                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1756:      $                            WORK( IE ), WORK( ITAUQ ),
1757:      $                            WORK( ITAUP ), WORK( IWORK ),
1758:      $                            LWORK-IWORK+1, IERR )
1759:                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1760:      $                            LDVT )
1761: *
1762: *                    Generate left bidiagonalizing vectors in WORK(IU)
1763: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1764: *
1765:                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1766:      $                            WORK( ITAUQ ), WORK( IWORK ),
1767:      $                            LWORK-IWORK+1, IERR )
1768: *
1769: *                    Generate right bidiagonalizing vectors in VT
1770: *                    (Workspace: need N*N+4*N-1,
1771: *                                prefer N*N+3*N+(N-1)*NB)
1772: *
1773:                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1774:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1775:                      IWORK = IE + N
1776: *
1777: *                    Perform bidiagonal QR iteration, computing left
1778: *                    singular vectors of R in WORK(IU) and computing
1779: *                    right singular vectors of R in VT
1780: *                    (Workspace: need N*N+BDSPAC)
1781: *
1782:                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1783:      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1784:      $                            WORK( IWORK ), INFO )
1785: *
1786: *                    Multiply Q in U by left singular vectors of R in
1787: *                    WORK(IU), storing result in A
1788: *                    (Workspace: need N*N)
1789: *
1790:                      CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1791:      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1792: *
1793: *                    Copy left singular vectors of A from A to U
1794: *
1795:                      CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1796: *
1797:                   ELSE
1798: *
1799: *                    Insufficient workspace for a fast algorithm
1800: *
1801:                      ITAU = 1
1802:                      IWORK = ITAU + N
1803: *
1804: *                    Compute A=Q*R, copying result to U
1805: *                    (Workspace: need 2*N, prefer N+N*NB)
1806: *
1807:                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1808:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1809:                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1810: *
1811: *                    Generate Q in U
1812: *                    (Workspace: need N+M, prefer N+M*NB)
1813: *
1814:                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1815:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1816: *
1817: *                    Copy R from A to VT, zeroing out below it
1818: *
1819:                      CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1820:                      IF( N.GT.1 )
1821:      $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1822:      $                               VT( 2, 1 ), LDVT )
1823:                      IE = ITAU
1824:                      ITAUQ = IE + N
1825:                      ITAUP = ITAUQ + N
1826:                      IWORK = ITAUP + N
1827: *
1828: *                    Bidiagonalize R in VT
1829: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1830: *
1831:                      CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1832:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1833:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1834: *
1835: *                    Multiply Q in U by left bidiagonalizing vectors
1836: *                    in VT
1837: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1838: *
1839:                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1840:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1841:      $                            LWORK-IWORK+1, IERR )
1842: *
1843: *                    Generate right bidiagonalizing vectors in VT
1844: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1845: *
1846:                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1847:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1848:                      IWORK = IE + N
1849: *
1850: *                    Perform bidiagonal QR iteration, computing left
1851: *                    singular vectors of A in U and computing right
1852: *                    singular vectors of A in VT
1853: *                    (Workspace: need BDSPAC)
1854: *
1855:                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1856:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1857:      $                            INFO )
1858: *
1859:                   END IF
1860: *
1861:                END IF
1862: *
1863:             END IF
1864: *
1865:          ELSE
1866: *
1867: *           M .LT. MNTHR
1868: *
1869: *           Path 10 (M at least N, but not much larger)
1870: *           Reduce to bidiagonal form without QR decomposition
1871: *
1872:             IE = 1
1873:             ITAUQ = IE + N
1874:             ITAUP = ITAUQ + N
1875:             IWORK = ITAUP + N
1876: *
1877: *           Bidiagonalize A
1878: *           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1879: *
1880:             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1881:      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1882:      $                   IERR )
1883:             IF( WNTUAS ) THEN
1884: *
1885: *              If left singular vectors desired in U, copy result to U
1886: *              and generate left bidiagonalizing vectors in U
1887: *              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1888: *
1889:                CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1890:                IF( WNTUS )
1891:      $            NCU = N
1892:                IF( WNTUA )
1893:      $            NCU = M
1894:                CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1895:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1896:             END IF
1897:             IF( WNTVAS ) THEN
1898: *
1899: *              If right singular vectors desired in VT, copy result to
1900: *              VT and generate right bidiagonalizing vectors in VT
1901: *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1902: *
1903:                CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1904:                CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1905:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1906:             END IF
1907:             IF( WNTUO ) THEN
1908: *
1909: *              If left singular vectors desired in A, generate left
1910: *              bidiagonalizing vectors in A
1911: *              (Workspace: need 4*N, prefer 3*N+N*NB)
1912: *
1913:                CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1914:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1915:             END IF
1916:             IF( WNTVO ) THEN
1917: *
1918: *              If right singular vectors desired in A, generate right
1919: *              bidiagonalizing vectors in A
1920: *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1921: *
1922:                CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1923:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1924:             END IF
1925:             IWORK = IE + N
1926:             IF( WNTUAS .OR. WNTUO )
1927:      $         NRU = M
1928:             IF( WNTUN )
1929:      $         NRU = 0
1930:             IF( WNTVAS .OR. WNTVO )
1931:      $         NCVT = N
1932:             IF( WNTVN )
1933:      $         NCVT = 0
1934:             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1935: *
1936: *              Perform bidiagonal QR iteration, if desired, computing
1937: *              left singular vectors in U and computing right singular
1938: *              vectors in VT
1939: *              (Workspace: need BDSPAC)
1940: *
1941:                CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1942:      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1943:             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1944: *
1945: *              Perform bidiagonal QR iteration, if desired, computing
1946: *              left singular vectors in U and computing right singular
1947: *              vectors in A
1948: *              (Workspace: need BDSPAC)
1949: *
1950:                CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1951:      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
1952:             ELSE
1953: *
1954: *              Perform bidiagonal QR iteration, if desired, computing
1955: *              left singular vectors in A and computing right singular
1956: *              vectors in VT
1957: *              (Workspace: need BDSPAC)
1958: *
1959:                CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1960:      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1961:             END IF
1962: *
1963:          END IF
1964: *
1965:       ELSE
1966: *
1967: *        A has more columns than rows. If A has sufficiently more
1968: *        columns than rows, first reduce using the LQ decomposition (if
1969: *        sufficient workspace available)
1970: *
1971:          IF( N.GE.MNTHR ) THEN
1972: *
1973:             IF( WNTVN ) THEN
1974: *
1975: *              Path 1t(N much larger than M, JOBVT='N')
1976: *              No right singular vectors to be computed
1977: *
1978:                ITAU = 1
1979:                IWORK = ITAU + M
1980: *
1981: *              Compute A=L*Q
1982: *              (Workspace: need 2*M, prefer M+M*NB)
1983: *
1984:                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1985:      $                      LWORK-IWORK+1, IERR )
1986: *
1987: *              Zero out above L
1988: *
1989:                CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1990:                IE = 1
1991:                ITAUQ = IE + M
1992:                ITAUP = ITAUQ + M
1993:                IWORK = ITAUP + M
1994: *
1995: *              Bidiagonalize L in A
1996: *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
1997: *
1998:                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1999:      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2000:      $                      IERR )
2001:                IF( WNTUO .OR. WNTUAS ) THEN
2002: *
2003: *                 If left singular vectors desired, generate Q
2004: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2005: *
2006:                   CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2007:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2008:                END IF
2009:                IWORK = IE + M
2010:                NRU = 0
2011:                IF( WNTUO .OR. WNTUAS )
2012:      $            NRU = M
2013: *
2014: *              Perform bidiagonal QR iteration, computing left singular
2015: *              vectors of A in A if desired
2016: *              (Workspace: need BDSPAC)
2017: *
2018:                CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2019:      $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2020: *
2021: *              If left singular vectors desired in U, copy them there
2022: *
2023:                IF( WNTUAS )
2024:      $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2025: *
2026:             ELSE IF( WNTVO .AND. WNTUN ) THEN
2027: *
2028: *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2029: *              M right singular vectors to be overwritten on A and
2030: *              no left singular vectors to be computed
2031: *
2032:                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2033: *
2034: *                 Sufficient workspace for a fast algorithm
2035: *
2036:                   IR = 1
2037:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2038: *
2039: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2040: *
2041:                      LDWRKU = LDA
2042:                      CHUNK = N
2043:                      LDWRKR = LDA
2044:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2045: *
2046: *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2047: *
2048:                      LDWRKU = LDA
2049:                      CHUNK = N
2050:                      LDWRKR = M
2051:                   ELSE
2052: *
2053: *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2054: *
2055:                      LDWRKU = M
2056:                      CHUNK = ( LWORK-M*M-M ) / M
2057:                      LDWRKR = M
2058:                   END IF
2059:                   ITAU = IR + LDWRKR*M
2060:                   IWORK = ITAU + M
2061: *
2062: *                 Compute A=L*Q
2063: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2064: *
2065:                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2066:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2067: *
2068: *                 Copy L to WORK(IR) and zero out above it
2069: *
2070:                   CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2071:                   CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2072:      $                         WORK( IR+LDWRKR ), LDWRKR )
2073: *
2074: *                 Generate Q in A
2075: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2076: *
2077:                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2078:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2079:                   IE = ITAU
2080:                   ITAUQ = IE + M
2081:                   ITAUP = ITAUQ + M
2082:                   IWORK = ITAUP + M
2083: *
2084: *                 Bidiagonalize L in WORK(IR)
2085: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2086: *
2087:                   CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2088:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2089:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2090: *
2091: *                 Generate right vectors bidiagonalizing L
2092: *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2093: *
2094:                   CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2095:      $                         WORK( ITAUP ), WORK( IWORK ),
2096:      $                         LWORK-IWORK+1, IERR )
2097:                   IWORK = IE + M
2098: *
2099: *                 Perform bidiagonal QR iteration, computing right
2100: *                 singular vectors of L in WORK(IR)
2101: *                 (Workspace: need M*M+BDSPAC)
2102: *
2103:                   CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2104:      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2105:      $                         WORK( IWORK ), INFO )
2106:                   IU = IE + M
2107: *
2108: *                 Multiply right singular vectors of L in WORK(IR) by Q
2109: *                 in A, storing result in WORK(IU) and copying to A
2110: *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2111: *
2112:                   DO 30 I = 1, N, CHUNK
2113:                      BLK = MIN( N-I+1, CHUNK )
2114:                      CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2115:      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2116:      $                           WORK( IU ), LDWRKU )
2117:                      CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2118:      $                            A( 1, I ), LDA )
2119:    30             CONTINUE
2120: *
2121:                ELSE
2122: *
2123: *                 Insufficient workspace for a fast algorithm
2124: *
2125:                   IE = 1
2126:                   ITAUQ = IE + M
2127:                   ITAUP = ITAUQ + M
2128:                   IWORK = ITAUP + M
2129: *
2130: *                 Bidiagonalize A
2131: *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2132: *
2133:                   CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2134:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2135:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2136: *
2137: *                 Generate right vectors bidiagonalizing A
2138: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2139: *
2140:                   CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2141:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2142:                   IWORK = IE + M
2143: *
2144: *                 Perform bidiagonal QR iteration, computing right
2145: *                 singular vectors of A in A
2146: *                 (Workspace: need BDSPAC)
2147: *
2148:                   CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2149:      $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2150: *
2151:                END IF
2152: *
2153:             ELSE IF( WNTVO .AND. WNTUAS ) THEN
2154: *
2155: *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2156: *              M right singular vectors to be overwritten on A and
2157: *              M left singular vectors to be computed in U
2158: *
2159:                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2160: *
2161: *                 Sufficient workspace for a fast algorithm
2162: *
2163:                   IR = 1
2164:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2165: *
2166: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2167: *
2168:                      LDWRKU = LDA
2169:                      CHUNK = N
2170:                      LDWRKR = LDA
2171:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2172: *
2173: *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2174: *
2175:                      LDWRKU = LDA
2176:                      CHUNK = N
2177:                      LDWRKR = M
2178:                   ELSE
2179: *
2180: *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2181: *
2182:                      LDWRKU = M
2183:                      CHUNK = ( LWORK-M*M-M ) / M
2184:                      LDWRKR = M
2185:                   END IF
2186:                   ITAU = IR + LDWRKR*M
2187:                   IWORK = ITAU + M
2188: *
2189: *                 Compute A=L*Q
2190: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2191: *
2192:                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2193:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2194: *
2195: *                 Copy L to U, zeroing about above it
2196: *
2197:                   CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2198:                   CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2199:      $                         LDU )
2200: *
2201: *                 Generate Q in A
2202: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2203: *
2204:                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2205:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2206:                   IE = ITAU
2207:                   ITAUQ = IE + M
2208:                   ITAUP = ITAUQ + M
2209:                   IWORK = ITAUP + M
2210: *
2211: *                 Bidiagonalize L in U, copying result to WORK(IR)
2212: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2213: *
2214:                   CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2215:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2216:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2217:                   CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2218: *
2219: *                 Generate right vectors bidiagonalizing L in WORK(IR)
2220: *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2221: *
2222:                   CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2223:      $                         WORK( ITAUP ), WORK( IWORK ),
2224:      $                         LWORK-IWORK+1, IERR )
2225: *
2226: *                 Generate left vectors bidiagonalizing L in U
2227: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2228: *
2229:                   CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2230:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2231:                   IWORK = IE + M
2232: *
2233: *                 Perform bidiagonal QR iteration, computing left
2234: *                 singular vectors of L in U, and computing right
2235: *                 singular vectors of L in WORK(IR)
2236: *                 (Workspace: need M*M+BDSPAC)
2237: *
2238:                   CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2239:      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2240:      $                         WORK( IWORK ), INFO )
2241:                   IU = IE + M
2242: *
2243: *                 Multiply right singular vectors of L in WORK(IR) by Q
2244: *                 in A, storing result in WORK(IU) and copying to A
2245: *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2246: *
2247:                   DO 40 I = 1, N, CHUNK
2248:                      BLK = MIN( N-I+1, CHUNK )
2249:                      CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2250:      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2251:      $                           WORK( IU ), LDWRKU )
2252:                      CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2253:      $                            A( 1, I ), LDA )
2254:    40             CONTINUE
2255: *
2256:                ELSE
2257: *
2258: *                 Insufficient workspace for a fast algorithm
2259: *
2260:                   ITAU = 1
2261:                   IWORK = ITAU + M
2262: *
2263: *                 Compute A=L*Q
2264: *                 (Workspace: need 2*M, prefer M+M*NB)
2265: *
2266:                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2267:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2268: *
2269: *                 Copy L to U, zeroing out above it
2270: *
2271:                   CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2272:                   CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2273:      $                         LDU )
2274: *
2275: *                 Generate Q in A
2276: *                 (Workspace: need 2*M, prefer M+M*NB)
2277: *
2278:                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2279:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2280:                   IE = ITAU
2281:                   ITAUQ = IE + M
2282:                   ITAUP = ITAUQ + M
2283:                   IWORK = ITAUP + M
2284: *
2285: *                 Bidiagonalize L in U
2286: *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2287: *
2288:                   CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2289:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2290:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2291: *
2292: *                 Multiply right vectors bidiagonalizing L by Q in A
2293: *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2294: *
2295:                   CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2296:      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2297:      $                         LWORK-IWORK+1, IERR )
2298: *
2299: *                 Generate left vectors bidiagonalizing L in U
2300: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2301: *
2302:                   CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2303:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2304:                   IWORK = IE + M
2305: *
2306: *                 Perform bidiagonal QR iteration, computing left
2307: *                 singular vectors of A in U and computing right
2308: *                 singular vectors of A in A
2309: *                 (Workspace: need BDSPAC)
2310: *
2311:                   CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2312:      $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2313: *
2314:                END IF
2315: *
2316:             ELSE IF( WNTVS ) THEN
2317: *
2318:                IF( WNTUN ) THEN
2319: *
2320: *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2321: *                 M right singular vectors to be computed in VT and
2322: *                 no left singular vectors to be computed
2323: *
2324:                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2325: *
2326: *                    Sufficient workspace for a fast algorithm
2327: *
2328:                      IR = 1
2329:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2330: *
2331: *                       WORK(IR) is LDA by M
2332: *
2333:                         LDWRKR = LDA
2334:                      ELSE
2335: *
2336: *                       WORK(IR) is M by M
2337: *
2338:                         LDWRKR = M
2339:                      END IF
2340:                      ITAU = IR + LDWRKR*M
2341:                      IWORK = ITAU + M
2342: *
2343: *                    Compute A=L*Q
2344: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2345: *
2346:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2347:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2348: *
2349: *                    Copy L to WORK(IR), zeroing out above it
2350: *
2351:                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2352:      $                            LDWRKR )
2353:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2354:      $                            WORK( IR+LDWRKR ), LDWRKR )
2355: *
2356: *                    Generate Q in A
2357: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2358: *
2359:                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2360:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2361:                      IE = ITAU
2362:                      ITAUQ = IE + M
2363:                      ITAUP = ITAUQ + M
2364:                      IWORK = ITAUP + M
2365: *
2366: *                    Bidiagonalize L in WORK(IR)
2367: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2368: *
2369:                      CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2370:      $                            WORK( IE ), WORK( ITAUQ ),
2371:      $                            WORK( ITAUP ), WORK( IWORK ),
2372:      $                            LWORK-IWORK+1, IERR )
2373: *
2374: *                    Generate right vectors bidiagonalizing L in
2375: *                    WORK(IR)
2376: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2377: *
2378:                      CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2379:      $                            WORK( ITAUP ), WORK( IWORK ),
2380:      $                            LWORK-IWORK+1, IERR )
2381:                      IWORK = IE + M
2382: *
2383: *                    Perform bidiagonal QR iteration, computing right
2384: *                    singular vectors of L in WORK(IR)
2385: *                    (Workspace: need M*M+BDSPAC)
2386: *
2387:                      CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2388:      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2389:      $                            WORK( IWORK ), INFO )
2390: *
2391: *                    Multiply right singular vectors of L in WORK(IR) by
2392: *                    Q in A, storing result in VT
2393: *                    (Workspace: need M*M)
2394: *
2395:                      CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2396:      $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2397: *
2398:                   ELSE
2399: *
2400: *                    Insufficient workspace for a fast algorithm
2401: *
2402:                      ITAU = 1
2403:                      IWORK = ITAU + M
2404: *
2405: *                    Compute A=L*Q
2406: *                    (Workspace: need 2*M, prefer M+M*NB)
2407: *
2408:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2409:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2410: *
2411: *                    Copy result to VT
2412: *
2413:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2414: *
2415: *                    Generate Q in VT
2416: *                    (Workspace: need 2*M, prefer M+M*NB)
2417: *
2418:                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2419:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2420:                      IE = ITAU
2421:                      ITAUQ = IE + M
2422:                      ITAUP = ITAUQ + M
2423:                      IWORK = ITAUP + M
2424: *
2425: *                    Zero out above L in A
2426: *
2427:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2428:      $                            LDA )
2429: *
2430: *                    Bidiagonalize L in A
2431: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2432: *
2433:                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2434:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2435:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2436: *
2437: *                    Multiply right vectors bidiagonalizing L by Q in VT
2438: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2439: *
2440:                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2441:      $                            WORK( ITAUP ), VT, LDVT,
2442:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2443:                      IWORK = IE + M
2444: *
2445: *                    Perform bidiagonal QR iteration, computing right
2446: *                    singular vectors of A in VT
2447: *                    (Workspace: need BDSPAC)
2448: *
2449:                      CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2450:      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2451:      $                            INFO )
2452: *
2453:                   END IF
2454: *
2455:                ELSE IF( WNTUO ) THEN
2456: *
2457: *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2458: *                 M right singular vectors to be computed in VT and
2459: *                 M left singular vectors to be overwritten on A
2460: *
2461:                   IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2462: *
2463: *                    Sufficient workspace for a fast algorithm
2464: *
2465:                      IU = 1
2466:                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2467: *
2468: *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2469: *
2470:                         LDWRKU = LDA
2471:                         IR = IU + LDWRKU*M
2472:                         LDWRKR = LDA
2473:                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2474: *
2475: *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2476: *
2477:                         LDWRKU = LDA
2478:                         IR = IU + LDWRKU*M
2479:                         LDWRKR = M
2480:                      ELSE
2481: *
2482: *                       WORK(IU) is M by M and WORK(IR) is M by M
2483: *
2484:                         LDWRKU = M
2485:                         IR = IU + LDWRKU*M
2486:                         LDWRKR = M
2487:                      END IF
2488:                      ITAU = IR + LDWRKR*M
2489:                      IWORK = ITAU + M
2490: *
2491: *                    Compute A=L*Q
2492: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2493: *
2494:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2495:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2496: *
2497: *                    Copy L to WORK(IU), zeroing out below it
2498: *
2499:                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2500:      $                            LDWRKU )
2501:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2502:      $                            WORK( IU+LDWRKU ), LDWRKU )
2503: *
2504: *                    Generate Q in A
2505: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2506: *
2507:                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2508:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2509:                      IE = ITAU
2510:                      ITAUQ = IE + M
2511:                      ITAUP = ITAUQ + M
2512:                      IWORK = ITAUP + M
2513: *
2514: *                    Bidiagonalize L in WORK(IU), copying result to
2515: *                    WORK(IR)
2516: *                    (Workspace: need 2*M*M+4*M,
2517: *                                prefer 2*M*M+3*M+2*M*NB)
2518: *
2519:                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2520:      $                            WORK( IE ), WORK( ITAUQ ),
2521:      $                            WORK( ITAUP ), WORK( IWORK ),
2522:      $                            LWORK-IWORK+1, IERR )
2523:                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2524:      $                            WORK( IR ), LDWRKR )
2525: *
2526: *                    Generate right bidiagonalizing vectors in WORK(IU)
2527: *                    (Workspace: need 2*M*M+4*M-1,
2528: *                                prefer 2*M*M+3*M+(M-1)*NB)
2529: *
2530:                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2531:      $                            WORK( ITAUP ), WORK( IWORK ),
2532:      $                            LWORK-IWORK+1, IERR )
2533: *
2534: *                    Generate left bidiagonalizing vectors in WORK(IR)
2535: *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2536: *
2537:                      CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2538:      $                            WORK( ITAUQ ), WORK( IWORK ),
2539:      $                            LWORK-IWORK+1, IERR )
2540:                      IWORK = IE + M
2541: *
2542: *                    Perform bidiagonal QR iteration, computing left
2543: *                    singular vectors of L in WORK(IR) and computing
2544: *                    right singular vectors of L in WORK(IU)
2545: *                    (Workspace: need 2*M*M+BDSPAC)
2546: *
2547:                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2548:      $                            WORK( IU ), LDWRKU, WORK( IR ),
2549:      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2550: *
2551: *                    Multiply right singular vectors of L in WORK(IU) by
2552: *                    Q in A, storing result in VT
2553: *                    (Workspace: need M*M)
2554: *
2555:                      CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2556:      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2557: *
2558: *                    Copy left singular vectors of L to A
2559: *                    (Workspace: need M*M)
2560: *
2561:                      CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2562:      $                            LDA )
2563: *
2564:                   ELSE
2565: *
2566: *                    Insufficient workspace for a fast algorithm
2567: *
2568:                      ITAU = 1
2569:                      IWORK = ITAU + M
2570: *
2571: *                    Compute A=L*Q, copying result to VT
2572: *                    (Workspace: need 2*M, prefer M+M*NB)
2573: *
2574:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2575:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2576:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2577: *
2578: *                    Generate Q in VT
2579: *                    (Workspace: need 2*M, prefer M+M*NB)
2580: *
2581:                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2582:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2583:                      IE = ITAU
2584:                      ITAUQ = IE + M
2585:                      ITAUP = ITAUQ + M
2586:                      IWORK = ITAUP + M
2587: *
2588: *                    Zero out above L in A
2589: *
2590:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2591:      $                            LDA )
2592: *
2593: *                    Bidiagonalize L in A
2594: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2595: *
2596:                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2597:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2598:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2599: *
2600: *                    Multiply right vectors bidiagonalizing L by Q in VT
2601: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2602: *
2603:                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2604:      $                            WORK( ITAUP ), VT, LDVT,
2605:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2606: *
2607: *                    Generate left bidiagonalizing vectors of L in A
2608: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2609: *
2610:                      CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2611:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2612:                      IWORK = IE + M
2613: *
2614: *                    Perform bidiagonal QR iteration, compute left
2615: *                    singular vectors of A in A and compute right
2616: *                    singular vectors of A in VT
2617: *                    (Workspace: need BDSPAC)
2618: *
2619:                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2620:      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2621:      $                            INFO )
2622: *
2623:                   END IF
2624: *
2625:                ELSE IF( WNTUAS ) THEN
2626: *
2627: *                 Path 6t(N much larger than M, JOBU='S' or 'A',
2628: *                         JOBVT='S')
2629: *                 M right singular vectors to be computed in VT and
2630: *                 M left singular vectors to be computed in U
2631: *
2632:                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2633: *
2634: *                    Sufficient workspace for a fast algorithm
2635: *
2636:                      IU = 1
2637:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2638: *
2639: *                       WORK(IU) is LDA by N
2640: *
2641:                         LDWRKU = LDA
2642:                      ELSE
2643: *
2644: *                       WORK(IU) is LDA by M
2645: *
2646:                         LDWRKU = M
2647:                      END IF
2648:                      ITAU = IU + LDWRKU*M
2649:                      IWORK = ITAU + M
2650: *
2651: *                    Compute A=L*Q
2652: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2653: *
2654:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2655:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2656: *
2657: *                    Copy L to WORK(IU), zeroing out above it
2658: *
2659:                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2660:      $                            LDWRKU )
2661:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2662:      $                            WORK( IU+LDWRKU ), LDWRKU )
2663: *
2664: *                    Generate Q in A
2665: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2666: *
2667:                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2668:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2669:                      IE = ITAU
2670:                      ITAUQ = IE + M
2671:                      ITAUP = ITAUQ + M
2672:                      IWORK = ITAUP + M
2673: *
2674: *                    Bidiagonalize L in WORK(IU), copying result to U
2675: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2676: *
2677:                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2678:      $                            WORK( IE ), WORK( ITAUQ ),
2679:      $                            WORK( ITAUP ), WORK( IWORK ),
2680:      $                            LWORK-IWORK+1, IERR )
2681:                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2682:      $                            LDU )
2683: *
2684: *                    Generate right bidiagonalizing vectors in WORK(IU)
2685: *                    (Workspace: need M*M+4*M-1,
2686: *                                prefer M*M+3*M+(M-1)*NB)
2687: *
2688:                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2689:      $                            WORK( ITAUP ), WORK( IWORK ),
2690:      $                            LWORK-IWORK+1, IERR )
2691: *
2692: *                    Generate left bidiagonalizing vectors in U
2693: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2694: *
2695:                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2696:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2697:                      IWORK = IE + M
2698: *
2699: *                    Perform bidiagonal QR iteration, computing left
2700: *                    singular vectors of L in U and computing right
2701: *                    singular vectors of L in WORK(IU)
2702: *                    (Workspace: need M*M+BDSPAC)
2703: *
2704:                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2705:      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2706:      $                            WORK( IWORK ), INFO )
2707: *
2708: *                    Multiply right singular vectors of L in WORK(IU) by
2709: *                    Q in A, storing result in VT
2710: *                    (Workspace: need M*M)
2711: *
2712:                      CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2713:      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2714: *
2715:                   ELSE
2716: *
2717: *                    Insufficient workspace for a fast algorithm
2718: *
2719:                      ITAU = 1
2720:                      IWORK = ITAU + M
2721: *
2722: *                    Compute A=L*Q, copying result to VT
2723: *                    (Workspace: need 2*M, prefer M+M*NB)
2724: *
2725:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2726:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2727:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2728: *
2729: *                    Generate Q in VT
2730: *                    (Workspace: need 2*M, prefer M+M*NB)
2731: *
2732:                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2733:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2734: *
2735: *                    Copy L to U, zeroing out above it
2736: *
2737:                      CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2738:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2739:      $                            LDU )
2740:                      IE = ITAU
2741:                      ITAUQ = IE + M
2742:                      ITAUP = ITAUQ + M
2743:                      IWORK = ITAUP + M
2744: *
2745: *                    Bidiagonalize L in U
2746: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2747: *
2748:                      CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2749:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2750:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2751: *
2752: *                    Multiply right bidiagonalizing vectors in U by Q
2753: *                    in VT
2754: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2755: *
2756:                      CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2757:      $                            WORK( ITAUP ), VT, LDVT,
2758:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2759: *
2760: *                    Generate left bidiagonalizing vectors in U
2761: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2762: *
2763:                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2764:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2765:                      IWORK = IE + M
2766: *
2767: *                    Perform bidiagonal QR iteration, computing left
2768: *                    singular vectors of A in U and computing right
2769: *                    singular vectors of A in VT
2770: *                    (Workspace: need BDSPAC)
2771: *
2772:                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2773:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2774:      $                            INFO )
2775: *
2776:                   END IF
2777: *
2778:                END IF
2779: *
2780:             ELSE IF( WNTVA ) THEN
2781: *
2782:                IF( WNTUN ) THEN
2783: *
2784: *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2785: *                 N right singular vectors to be computed in VT and
2786: *                 no left singular vectors to be computed
2787: *
2788:                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2789: *
2790: *                    Sufficient workspace for a fast algorithm
2791: *
2792:                      IR = 1
2793:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2794: *
2795: *                       WORK(IR) is LDA by M
2796: *
2797:                         LDWRKR = LDA
2798:                      ELSE
2799: *
2800: *                       WORK(IR) is M by M
2801: *
2802:                         LDWRKR = M
2803:                      END IF
2804:                      ITAU = IR + LDWRKR*M
2805:                      IWORK = ITAU + M
2806: *
2807: *                    Compute A=L*Q, copying result to VT
2808: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2809: *
2810:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2811:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2812:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2813: *
2814: *                    Copy L to WORK(IR), zeroing out above it
2815: *
2816:                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2817:      $                            LDWRKR )
2818:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2819:      $                            WORK( IR+LDWRKR ), LDWRKR )
2820: *
2821: *                    Generate Q in VT
2822: *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2823: *
2824:                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2825:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2826:                      IE = ITAU
2827:                      ITAUQ = IE + M
2828:                      ITAUP = ITAUQ + M
2829:                      IWORK = ITAUP + M
2830: *
2831: *                    Bidiagonalize L in WORK(IR)
2832: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2833: *
2834:                      CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2835:      $                            WORK( IE ), WORK( ITAUQ ),
2836:      $                            WORK( ITAUP ), WORK( IWORK ),
2837:      $                            LWORK-IWORK+1, IERR )
2838: *
2839: *                    Generate right bidiagonalizing vectors in WORK(IR)
2840: *                    (Workspace: need M*M+4*M-1,
2841: *                                prefer M*M+3*M+(M-1)*NB)
2842: *
2843:                      CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2844:      $                            WORK( ITAUP ), WORK( IWORK ),
2845:      $                            LWORK-IWORK+1, IERR )
2846:                      IWORK = IE + M
2847: *
2848: *                    Perform bidiagonal QR iteration, computing right
2849: *                    singular vectors of L in WORK(IR)
2850: *                    (Workspace: need M*M+BDSPAC)
2851: *
2852:                      CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2853:      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2854:      $                            WORK( IWORK ), INFO )
2855: *
2856: *                    Multiply right singular vectors of L in WORK(IR) by
2857: *                    Q in VT, storing result in A
2858: *                    (Workspace: need M*M)
2859: *
2860:                      CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2861:      $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2862: *
2863: *                    Copy right singular vectors of A from A to VT
2864: *
2865:                      CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2866: *
2867:                   ELSE
2868: *
2869: *                    Insufficient workspace for a fast algorithm
2870: *
2871:                      ITAU = 1
2872:                      IWORK = ITAU + M
2873: *
2874: *                    Compute A=L*Q, copying result to VT
2875: *                    (Workspace: need 2*M, prefer M+M*NB)
2876: *
2877:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2878:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2879:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2880: *
2881: *                    Generate Q in VT
2882: *                    (Workspace: need M+N, prefer M+N*NB)
2883: *
2884:                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2885:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2886:                      IE = ITAU
2887:                      ITAUQ = IE + M
2888:                      ITAUP = ITAUQ + M
2889:                      IWORK = ITAUP + M
2890: *
2891: *                    Zero out above L in A
2892: *
2893:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2894:      $                            LDA )
2895: *
2896: *                    Bidiagonalize L in A
2897: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2898: *
2899:                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2900:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2901:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2902: *
2903: *                    Multiply right bidiagonalizing vectors in A by Q
2904: *                    in VT
2905: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2906: *
2907:                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2908:      $                            WORK( ITAUP ), VT, LDVT,
2909:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2910:                      IWORK = IE + M
2911: *
2912: *                    Perform bidiagonal QR iteration, computing right
2913: *                    singular vectors of A in VT
2914: *                    (Workspace: need BDSPAC)
2915: *
2916:                      CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2917:      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2918:      $                            INFO )
2919: *
2920:                   END IF
2921: *
2922:                ELSE IF( WNTUO ) THEN
2923: *
2924: *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2925: *                 N right singular vectors to be computed in VT and
2926: *                 M left singular vectors to be overwritten on A
2927: *
2928:                   IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2929: *
2930: *                    Sufficient workspace for a fast algorithm
2931: *
2932:                      IU = 1
2933:                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2934: *
2935: *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2936: *
2937:                         LDWRKU = LDA
2938:                         IR = IU + LDWRKU*M
2939:                         LDWRKR = LDA
2940:                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2941: *
2942: *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2943: *
2944:                         LDWRKU = LDA
2945:                         IR = IU + LDWRKU*M
2946:                         LDWRKR = M
2947:                      ELSE
2948: *
2949: *                       WORK(IU) is M by M and WORK(IR) is M by M
2950: *
2951:                         LDWRKU = M
2952:                         IR = IU + LDWRKU*M
2953:                         LDWRKR = M
2954:                      END IF
2955:                      ITAU = IR + LDWRKR*M
2956:                      IWORK = ITAU + M
2957: *
2958: *                    Compute A=L*Q, copying result to VT
2959: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2960: *
2961:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2962:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2963:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2964: *
2965: *                    Generate Q in VT
2966: *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2967: *
2968:                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2969:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2970: *
2971: *                    Copy L to WORK(IU), zeroing out above it
2972: *
2973:                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2974:      $                            LDWRKU )
2975:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2976:      $                            WORK( IU+LDWRKU ), LDWRKU )
2977:                      IE = ITAU
2978:                      ITAUQ = IE + M
2979:                      ITAUP = ITAUQ + M
2980:                      IWORK = ITAUP + M
2981: *
2982: *                    Bidiagonalize L in WORK(IU), copying result to
2983: *                    WORK(IR)
2984: *                    (Workspace: need 2*M*M+4*M,
2985: *                                prefer 2*M*M+3*M+2*M*NB)
2986: *
2987:                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2988:      $                            WORK( IE ), WORK( ITAUQ ),
2989:      $                            WORK( ITAUP ), WORK( IWORK ),
2990:      $                            LWORK-IWORK+1, IERR )
2991:                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2992:      $                            WORK( IR ), LDWRKR )
2993: *
2994: *                    Generate right bidiagonalizing vectors in WORK(IU)
2995: *                    (Workspace: need 2*M*M+4*M-1,
2996: *                                prefer 2*M*M+3*M+(M-1)*NB)
2997: *
2998:                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2999:      $                            WORK( ITAUP ), WORK( IWORK ),
3000:      $                            LWORK-IWORK+1, IERR )
3001: *
3002: *                    Generate left bidiagonalizing vectors in WORK(IR)
3003: *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3004: *
3005:                      CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3006:      $                            WORK( ITAUQ ), WORK( IWORK ),
3007:      $                            LWORK-IWORK+1, IERR )
3008:                      IWORK = IE + M
3009: *
3010: *                    Perform bidiagonal QR iteration, computing left
3011: *                    singular vectors of L in WORK(IR) and computing
3012: *                    right singular vectors of L in WORK(IU)
3013: *                    (Workspace: need 2*M*M+BDSPAC)
3014: *
3015:                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3016:      $                            WORK( IU ), LDWRKU, WORK( IR ),
3017:      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3018: *
3019: *                    Multiply right singular vectors of L in WORK(IU) by
3020: *                    Q in VT, storing result in A
3021: *                    (Workspace: need M*M)
3022: *
3023:                      CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3024:      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3025: *
3026: *                    Copy right singular vectors of A from A to VT
3027: *
3028:                      CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3029: *
3030: *                    Copy left singular vectors of A from WORK(IR) to A
3031: *
3032:                      CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3033:      $                            LDA )
3034: *
3035:                   ELSE
3036: *
3037: *                    Insufficient workspace for a fast algorithm
3038: *
3039:                      ITAU = 1
3040:                      IWORK = ITAU + M
3041: *
3042: *                    Compute A=L*Q, copying result to VT
3043: *                    (Workspace: need 2*M, prefer M+M*NB)
3044: *
3045:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3046:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3047:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3048: *
3049: *                    Generate Q in VT
3050: *                    (Workspace: need M+N, prefer M+N*NB)
3051: *
3052:                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3053:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3054:                      IE = ITAU
3055:                      ITAUQ = IE + M
3056:                      ITAUP = ITAUQ + M
3057:                      IWORK = ITAUP + M
3058: *
3059: *                    Zero out above L in A
3060: *
3061:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3062:      $                            LDA )
3063: *
3064: *                    Bidiagonalize L in A
3065: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3066: *
3067:                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3068:      $                            WORK( ITAUQ ), WORK( ITAUP ),
3069:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3070: *
3071: *                    Multiply right bidiagonalizing vectors in A by Q
3072: *                    in VT
3073: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3074: *
3075:                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3076:      $                            WORK( ITAUP ), VT, LDVT,
3077:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3078: *
3079: *                    Generate left bidiagonalizing vectors in A
3080: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3081: *
3082:                      CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3083:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3084:                      IWORK = IE + M
3085: *
3086: *                    Perform bidiagonal QR iteration, computing left
3087: *                    singular vectors of A in A and computing right
3088: *                    singular vectors of A in VT
3089: *                    (Workspace: need BDSPAC)
3090: *
3091:                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3092:      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3093:      $                            INFO )
3094: *
3095:                   END IF
3096: *
3097:                ELSE IF( WNTUAS ) THEN
3098: *
3099: *                 Path 9t(N much larger than M, JOBU='S' or 'A',
3100: *                         JOBVT='A')
3101: *                 N right singular vectors to be computed in VT and
3102: *                 M left singular vectors to be computed in U
3103: *
3104:                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3105: *
3106: *                    Sufficient workspace for a fast algorithm
3107: *
3108:                      IU = 1
3109:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
3110: *
3111: *                       WORK(IU) is LDA by M
3112: *
3113:                         LDWRKU = LDA
3114:                      ELSE
3115: *
3116: *                       WORK(IU) is M by M
3117: *
3118:                         LDWRKU = M
3119:                      END IF
3120:                      ITAU = IU + LDWRKU*M
3121:                      IWORK = ITAU + M
3122: *
3123: *                    Compute A=L*Q, copying result to VT
3124: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3125: *
3126:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3127:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3128:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3129: *
3130: *                    Generate Q in VT
3131: *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3132: *
3133:                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3134:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3135: *
3136: *                    Copy L to WORK(IU), zeroing out above it
3137: *
3138:                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3139:      $                            LDWRKU )
3140:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3141:      $                            WORK( IU+LDWRKU ), LDWRKU )
3142:                      IE = ITAU
3143:                      ITAUQ = IE + M
3144:                      ITAUP = ITAUQ + M
3145:                      IWORK = ITAUP + M
3146: *
3147: *                    Bidiagonalize L in WORK(IU), copying result to U
3148: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3149: *
3150:                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3151:      $                            WORK( IE ), WORK( ITAUQ ),
3152:      $                            WORK( ITAUP ), WORK( IWORK ),
3153:      $                            LWORK-IWORK+1, IERR )
3154:                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3155:      $                            LDU )
3156: *
3157: *                    Generate right bidiagonalizing vectors in WORK(IU)
3158: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3159: *
3160:                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3161:      $                            WORK( ITAUP ), WORK( IWORK ),
3162:      $                            LWORK-IWORK+1, IERR )
3163: *
3164: *                    Generate left bidiagonalizing vectors in U
3165: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3166: *
3167:                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3168:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3169:                      IWORK = IE + M
3170: *
3171: *                    Perform bidiagonal QR iteration, computing left
3172: *                    singular vectors of L in U and computing right
3173: *                    singular vectors of L in WORK(IU)
3174: *                    (Workspace: need M*M+BDSPAC)
3175: *
3176:                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3177:      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3178:      $                            WORK( IWORK ), INFO )
3179: *
3180: *                    Multiply right singular vectors of L in WORK(IU) by
3181: *                    Q in VT, storing result in A
3182: *                    (Workspace: need M*M)
3183: *
3184:                      CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3185:      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3186: *
3187: *                    Copy right singular vectors of A from A to VT
3188: *
3189:                      CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3190: *
3191:                   ELSE
3192: *
3193: *                    Insufficient workspace for a fast algorithm
3194: *
3195:                      ITAU = 1
3196:                      IWORK = ITAU + M
3197: *
3198: *                    Compute A=L*Q, copying result to VT
3199: *                    (Workspace: need 2*M, prefer M+M*NB)
3200: *
3201:                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3202:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3203:                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3204: *
3205: *                    Generate Q in VT
3206: *                    (Workspace: need M+N, prefer M+N*NB)
3207: *
3208:                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3209:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3210: *
3211: *                    Copy L to U, zeroing out above it
3212: *
3213:                      CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3214:                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3215:      $                            LDU )
3216:                      IE = ITAU
3217:                      ITAUQ = IE + M
3218:                      ITAUP = ITAUQ + M
3219:                      IWORK = ITAUP + M
3220: *
3221: *                    Bidiagonalize L in U
3222: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3223: *
3224:                      CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3225:      $                            WORK( ITAUQ ), WORK( ITAUP ),
3226:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3227: *
3228: *                    Multiply right bidiagonalizing vectors in U by Q
3229: *                    in VT
3230: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3231: *
3232:                      CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3233:      $                            WORK( ITAUP ), VT, LDVT,
3234:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3235: *
3236: *                    Generate left bidiagonalizing vectors in U
3237: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3238: *
3239:                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3240:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3241:                      IWORK = IE + M
3242: *
3243: *                    Perform bidiagonal QR iteration, computing left
3244: *                    singular vectors of A in U and computing right
3245: *                    singular vectors of A in VT
3246: *                    (Workspace: need BDSPAC)
3247: *
3248:                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3249:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3250:      $                            INFO )
3251: *
3252:                   END IF
3253: *
3254:                END IF
3255: *
3256:             END IF
3257: *
3258:          ELSE
3259: *
3260: *           N .LT. MNTHR
3261: *
3262: *           Path 10t(N greater than M, but not much larger)
3263: *           Reduce to bidiagonal form without LQ decomposition
3264: *
3265:             IE = 1
3266:             ITAUQ = IE + M
3267:             ITAUP = ITAUQ + M
3268:             IWORK = ITAUP + M
3269: *
3270: *           Bidiagonalize A
3271: *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3272: *
3273:             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3274:      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3275:      $                   IERR )
3276:             IF( WNTUAS ) THEN
3277: *
3278: *              If left singular vectors desired in U, copy result to U
3279: *              and generate left bidiagonalizing vectors in U
3280: *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3281: *
3282:                CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3283:                CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3284:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3285:             END IF
3286:             IF( WNTVAS ) THEN
3287: *
3288: *              If right singular vectors desired in VT, copy result to
3289: *              VT and generate right bidiagonalizing vectors in VT
3290: *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3291: *
3292:                CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3293:                IF( WNTVA )
3294:      $            NRVT = N
3295:                IF( WNTVS )
3296:      $            NRVT = M
3297:                CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3298:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3299:             END IF
3300:             IF( WNTUO ) THEN
3301: *
3302: *              If left singular vectors desired in A, generate left
3303: *              bidiagonalizing vectors in A
3304: *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3305: *
3306:                CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3307:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3308:             END IF
3309:             IF( WNTVO ) THEN
3310: *
3311: *              If right singular vectors desired in A, generate right
3312: *              bidiagonalizing vectors in A
3313: *              (Workspace: need 4*M, prefer 3*M+M*NB)
3314: *
3315:                CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3316:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3317:             END IF
3318:             IWORK = IE + M
3319:             IF( WNTUAS .OR. WNTUO )
3320:      $         NRU = M
3321:             IF( WNTUN )
3322:      $         NRU = 0
3323:             IF( WNTVAS .OR. WNTVO )
3324:      $         NCVT = N
3325:             IF( WNTVN )
3326:      $         NCVT = 0
3327:             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3328: *
3329: *              Perform bidiagonal QR iteration, if desired, computing
3330: *              left singular vectors in U and computing right singular
3331: *              vectors in VT
3332: *              (Workspace: need BDSPAC)
3333: *
3334:                CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3335:      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3336:             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3337: *
3338: *              Perform bidiagonal QR iteration, if desired, computing
3339: *              left singular vectors in U and computing right singular
3340: *              vectors in A
3341: *              (Workspace: need BDSPAC)
3342: *
3343:                CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3344:      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3345:             ELSE
3346: *
3347: *              Perform bidiagonal QR iteration, if desired, computing
3348: *              left singular vectors in A and computing right singular
3349: *              vectors in VT
3350: *              (Workspace: need BDSPAC)
3351: *
3352:                CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3353:      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3354:             END IF
3355: *
3356:          END IF
3357: *
3358:       END IF
3359: *
3360: *     If DBDSQR failed to converge, copy unconverged superdiagonals
3361: *     to WORK( 2:MINMN )
3362: *
3363:       IF( INFO.NE.0 ) THEN
3364:          IF( IE.GT.2 ) THEN
3365:             DO 50 I = 1, MINMN - 1
3366:                WORK( I+1 ) = WORK( I+IE-1 )
3367:    50       CONTINUE
3368:          END IF
3369:          IF( IE.LT.2 ) THEN
3370:             DO 60 I = MINMN - 1, 1, -1
3371:                WORK( I+1 ) = WORK( I+IE-1 )
3372:    60       CONTINUE
3373:          END IF
3374:       END IF
3375: *
3376: *     Undo scaling if necessary
3377: *
3378:       IF( ISCL.EQ.1 ) THEN
3379:          IF( ANRM.GT.BIGNUM )
3380:      $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3381:      $                   IERR )
3382:          IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3383:      $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3384:      $                   MINMN, IERR )
3385:          IF( ANRM.LT.SMLNUM )
3386:      $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3387:      $                   IERR )
3388:          IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3389:      $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3390:      $                   MINMN, IERR )
3391:       END IF
3392: *
3393: *     Return optimal workspace in WORK(1)
3394: *
3395:       WORK( 1 ) = MAXWRK
3396: *
3397:       RETURN
3398: *
3399: *     End of DGESVD
3400: *
3401:       END
3402: