SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, \$ WORK, LWORK, RWORK, INFO ) * * -- LAPACK driver routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 30, 1999 * 8-15-00: Improve consistency of WS calculations (eca) * * .. Scalar Arguments .. CHARACTER JOBU, JOBVT INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N * .. * .. Array Arguments .. REAL RWORK( * ), S( * ) COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), \$ WORK( * ) * .. * * Purpose * ======= * * CGESVD computes the singular value decomposition (SVD) of a complex * M-by-N matrix A, optionally computing the left and/or right singular * vectors. The SVD is written * * A = U * SIGMA * conjugate-transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(m,n) diagonal elements, U is an M-by-M unitary matrix, and * V is an N-by-N unitary matrix. The diagonal elements of SIGMA * are the singular values of A; they are real and non-negative, and * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * * Note that the routine returns V**H, not V. * * Arguments * ========= * * JOBU (input) CHARACTER*1 * Specifies options for computing all or part of the matrix U: * = 'A': all M columns of U are returned in array U: * = 'S': the first min(m,n) columns of U (the left singular * vectors) are returned in the array U; * = 'O': the first min(m,n) columns of U (the left singular * vectors) are overwritten on the array A; * = 'N': no columns of U (no left singular vectors) are * computed. * * JOBVT (input) CHARACTER*1 * Specifies options for computing all or part of the matrix * V**H: * = 'A': all N rows of V**H are returned in the array VT; * = 'S': the first min(m,n) rows of V**H (the right singular * vectors) are returned in the array VT; * = 'O': the first min(m,n) rows of V**H (the right singular * vectors) are overwritten on the array A; * = 'N': no rows of V**H (no right singular vectors) are * computed. * * JOBVT and JOBU cannot both be 'O'. * * M (input) INTEGER * The number of rows of the input matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the input matrix A. N >= 0. * * A (input/output) COMPLEX array, dimension (LDA,N) * On entry, the M-by-N matrix A. * On exit, * if JOBU = 'O', A is overwritten with the first min(m,n) * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) * rows of V**H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * S (output) REAL array, dimension (min(M,N)) * The singular values of A, sorted so that S(i) >= S(i+1). * * U (output) COMPLEX array, dimension (LDU,UCOL) * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. * If JOBU = 'A', U contains the M-by-M unitary matrix U; * if JOBU = 'S', U contains the first min(m,n) columns of U * (the left singular vectors, stored columnwise); * if JOBU = 'N' or 'O', U is not referenced. * * LDU (input) INTEGER * The leading dimension of the array U. LDU >= 1; if * JOBU = 'S' or 'A', LDU >= M. * * VT (output) COMPLEX array, dimension (LDVT,N) * If JOBVT = 'A', VT contains the N-by-N unitary matrix * V**H; * if JOBVT = 'S', VT contains the first min(m,n) rows of * V**H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * LDVT (input) INTEGER * The leading dimension of the array VT. LDVT >= 1; if * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). * * WORK (workspace/output) COMPLEX array, dimension (LWORK) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. * * LWORK (input) INTEGER * The dimension of the array WORK. LWORK >= 1. * LWORK >= 2*MIN(M,N)+MAX(M,N). * For good performance, LWORK should generally be larger. * * If LWORK = -1, a workspace query is assumed. The optimal * size for the WORK array is calculated and stored in WORK(1), * and no other work except argument checking is performed. * * RWORK (workspace) REAL array, dimension * (5*min(M,N)) * On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the * unconverged superdiagonal elements of an upper bidiagonal * matrix B whose diagonal is in S (not necessarily sorted). * B satisfies A = U * B * VT, so it has the same singular * values as A, and singular vectors related by U and VT. * * INFO (output) INTEGER * = 0: successful exit. * < 0: if INFO = -i, the i-th argument had an illegal value. * > 0: if CBDSQR did not converge, INFO specifies how many * superdiagonals of an intermediate bidiagonal form B * did not converge to zero. See the description of RWORK * above for details. * * ===================================================================== * * .. Parameters .. INTEGER LQUERV PARAMETER ( LQUERV = -1 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), \$ CONE = ( 1.0E0, 0.0E0 ) ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. LOGICAL WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, WNTVA, \$ WNTVAS, WNTVN, WNTVO, WNTVS INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL, \$ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, \$ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, \$ NRVT, WRKBL REAL ANRM, BIGNUM, EPS, SMLNUM * .. * .. Local Arrays .. REAL DUM( 1 ) COMPLEX CDUM( 1 ) * .. * .. External Subroutines .. EXTERNAL CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY, \$ CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR, \$ SLASCL, XERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV REAL CLANGE, SLAMCH EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 MINMN = MIN( M, N ) MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) WNTUA = LSAME( JOBU, 'A' ) WNTUS = LSAME( JOBU, 'S' ) WNTUAS = WNTUA .OR. WNTUS WNTUO = LSAME( JOBU, 'O' ) WNTUN = LSAME( JOBU, 'N' ) WNTVA = LSAME( JOBVT, 'A' ) WNTVS = LSAME( JOBVT, 'S' ) WNTVAS = WNTVA .OR. WNTVS WNTVO = LSAME( JOBVT, 'O' ) WNTVN = LSAME( JOBVT, 'N' ) MINWRK = 1 MAXWRK = 1 * IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN INFO = -1 ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. \$ ( WNTVO .AND. WNTUO ) ) THEN INFO = -2 ELSE IF( M.LT.0 ) THEN INFO = -3 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -6 ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN INFO = -9 ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. \$ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN INFO = -11 END IF * * Compute workspace * (Note: Comments in the code beginning "Workspace:" describe the * minimal amount of workspace needed at that point in the code, * as well as the preferred amount for good performance. * CWorkspace refers to complex workspace, and RWorkspace to * real workspace. NB refers to the optimal block size for the * immediately following subroutine, as returned by ILAENV.) * IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN IF( M.GE.N ) THEN * * Space needed for CBDSQR is BDSPAC = 5*N * IF( M.GE.MNTHR ) THEN IF( WNTUN ) THEN * * Path 1 (M much larger than N, JOBU='N') * MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, \$ -1 ) MAXWRK = MAX( MAXWRK, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) IF( WNTVO .OR. WNTVAS ) \$ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MINWRK = 3*N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUO .AND. WNTVN ) THEN * * Path 2 (M much larger than N, JOBU='O', JOBVT='N') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, \$ N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUO .AND. WNTVAS ) THEN * * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or * 'A') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, \$ N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUS .AND. WNTVN ) THEN * * Path 4 (M much larger than N, JOBU='S', JOBVT='N') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, \$ N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) MAXWRK = N*N + WRKBL MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUS .AND. WNTVO ) THEN * * Path 5 (M much larger than N, JOBU='S', JOBVT='O') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, \$ N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MAXWRK = 2*N*N + WRKBL MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUS .AND. WNTVAS ) THEN * * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or * 'A') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, \$ N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MAXWRK = N*N + WRKBL MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUA .AND. WNTVN ) THEN * * Path 7 (M much larger than N, JOBU='A', JOBVT='N') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, \$ M, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) MAXWRK = N*N + WRKBL MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUA .AND. WNTVO ) THEN * * Path 8 (M much larger than N, JOBU='A', JOBVT='O') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, \$ M, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MAXWRK = 2*N*N + WRKBL MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTUA .AND. WNTVAS ) THEN * * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or * 'A') * WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, \$ M, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+2*N* \$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) WRKBL = MAX( WRKBL, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MAXWRK = N*N + WRKBL MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) END IF ELSE * * Path 10 (M at least N, but not much larger) * MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, \$ -1, -1 ) IF( WNTUS .OR. WNTUO ) \$ MAXWRK = MAX( MAXWRK, 2*N+N* \$ ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) ) IF( WNTUA ) \$ MAXWRK = MAX( MAXWRK, 2*N+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) IF( .NOT.WNTVN ) \$ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) MINWRK = 2*N + M MAXWRK = MAX( MINWRK, MAXWRK ) END IF ELSE * * Space needed for CBDSQR is BDSPAC = 5*M * IF( N.GE.MNTHR ) THEN IF( WNTVN ) THEN * * Path 1t(N much larger than M, JOBVT='N') * MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, \$ -1 ) MAXWRK = MAX( MAXWRK, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) IF( WNTUO .OR. WNTUAS ) \$ MAXWRK = MAX( MAXWRK, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MINWRK = 3*M MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVO .AND. WNTUN ) THEN * * Path 2t(N much larger than M, JOBU='N', JOBVT='O') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVO .AND. WNTUAS ) THEN * * Path 3t(N much larger than M, JOBU='S' or 'A', * JOBVT='O') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVS .AND. WNTUN ) THEN * * Path 4t(N much larger than M, JOBU='N', JOBVT='S') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) MAXWRK = M*M + WRKBL MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVS .AND. WNTUO ) THEN * * Path 5t(N much larger than M, JOBU='O', JOBVT='S') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MAXWRK = 2*M*M + WRKBL MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVS .AND. WNTUAS ) THEN * * Path 6t(N much larger than M, JOBU='S' or 'A', * JOBVT='S') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MAXWRK = M*M + WRKBL MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVA .AND. WNTUN ) THEN * * Path 7t(N much larger than M, JOBU='N', JOBVT='A') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) MAXWRK = M*M + WRKBL MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVA .AND. WNTUO ) THEN * * Path 8t(N much larger than M, JOBU='O', JOBVT='A') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MAXWRK = 2*M*M + WRKBL MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) ELSE IF( WNTVA .AND. WNTUAS ) THEN * * Path 9t(N much larger than M, JOBU='S' or 'A', * JOBVT='A') * WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, \$ N, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+2*M* \$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) WRKBL = MAX( WRKBL, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) WRKBL = MAX( WRKBL, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MAXWRK = M*M + WRKBL MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) END IF ELSE * * Path 10t(N greater than M, but not much larger) * MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, \$ -1, -1 ) IF( WNTVS .OR. WNTVO ) \$ MAXWRK = MAX( MAXWRK, 2*M+M* \$ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) ) IF( WNTVA ) \$ MAXWRK = MAX( MAXWRK, 2*M+N* \$ ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) ) IF( .NOT.WNTUN ) \$ MAXWRK = MAX( MAXWRK, 2*M+( M-1 )* \$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) MINWRK = 2*M + N MAXWRK = MAX( MINWRK, MAXWRK ) END IF END IF END IF IF( INFO.EQ.0 ) THEN WORK( 1 ) = MAXWRK IF( LWORK.LT.MINWRK .AND. LWORK.NE.LQUERV ) \$ INFO = -13 END IF * * Quick returns * IF( INFO.NE.0 ) THEN CALL XERBLA( 'CGESVD', -INFO ) RETURN END IF IF( LWORK.EQ.LQUERV ) RETURN IF( M.EQ.0 .OR. N.EQ.0 ) THEN RETURN END IF * * Get machine constants * EPS = SLAMCH( 'P' ) SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS BIGNUM = ONE / SMLNUM * * Scale A if max element outside range [SMLNUM,BIGNUM] * ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) ISCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN ISCL = 1 CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) ELSE IF( ANRM.GT.BIGNUM ) THEN ISCL = 1 CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) END IF * IF( M.GE.N ) THEN * * A has at least as many rows as columns. If A has sufficiently * more rows than columns, first reduce using the QR * decomposition (if sufficient workspace available) * IF( M.GE.MNTHR ) THEN * IF( WNTUN ) THEN * * Path 1 (M much larger than N, JOBU='N') * No left singular vectors to be computed * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: need 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Zero out below R * CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), \$ LDA ) IE = 1 ITAUQ = 1 ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in A * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, \$ IERR ) NCVT = 0 IF( WNTVO .OR. WNTVAS ) THEN * * If right singular vectors desired, generate P'. * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) NCVT = N END IF IRWORK = IE + N * * Perform bidiagonal QR iteration, computing right * singular vectors of A in A if desired * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA, \$ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) * * If right singular vectors desired in VT, copy them there * IF( WNTVAS ) \$ CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) * ELSE IF( WNTUO .AND. WNTVN ) THEN * * Path 2 (M much larger than N, JOBU='O', JOBVT='N') * N left singular vectors to be overwritten on A and * no right singular vectors to be computed * IF( LWORK.GE.N*N+3*N ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN * * WORK(IU) is LDA by N, WORK(IR) is LDA by N * LDWRKU = LDA LDWRKR = LDA ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN * * WORK(IU) is LDA by N, WORK(IR) is N by N * LDWRKU = LDA LDWRKR = N ELSE * * WORK(IU) is LDWRKU by N, WORK(IR) is N by N * LDWRKU = ( LWORK-N*N ) / N LDWRKR = N END IF ITAU = IR + LDWRKR*N IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to WORK(IR) and zero out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IR+1 ), LDWRKR ) * * Generate Q in A * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate left vectors bidiagonalizing R * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) * (RWorkspace: need 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) * (CWorkspace: need N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1, \$ WORK( IR ), LDWRKR, CDUM, 1, \$ RWORK( IRWORK ), INFO ) IU = ITAUQ * * Multiply Q in A by left singular vectors of R in * WORK(IR), storing result in WORK(IU) and copying to A * (CWorkspace: need N*N+N, prefer N*N+M*N) * (RWorkspace: 0) * DO 10 I = 1, M, LDWRKU CHUNK = MIN( M-I+1, LDWRKU ) CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), \$ LDA, WORK( IR ), LDWRKR, CZERO, \$ WORK( IU ), LDWRKU ) CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, \$ A( I, 1 ), LDA ) 10 CONTINUE * ELSE * * Insufficient workspace for a fast algorithm * IE = 1 ITAUQ = 1 ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize A * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) * (RWorkspace: N) * CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate left vectors bidiagonalizing A * (CWorkspace: need 3*N, prefer 2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in A * (CWorkspace: need 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1, \$ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTUO .AND. WNTVAS ) THEN * * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') * N left singular vectors to be overwritten on A and * N right singular vectors to be computed in VT * IF( LWORK.GE.N*N+3*N ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN * * WORK(IU) is LDA by N and WORK(IR) is LDA by N * LDWRKU = LDA LDWRKR = LDA ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN * * WORK(IU) is LDA by N and WORK(IR) is N by N * LDWRKU = LDA LDWRKR = N ELSE * * WORK(IU) is LDWRKU by N and WORK(IR) is N by N * LDWRKU = ( LWORK-N*N ) / N LDWRKR = N END IF ITAU = IR + LDWRKR*N IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to VT, zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ), \$ LDVT ) * * Generate Q in A * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in VT, copying result to WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) * * Generate left vectors bidiagonalizing R in WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right vectors bidiagonalizing R in VT * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) and computing right * singular vectors of R in VT * (CWorkspace: need N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, \$ LDVT, WORK( IR ), LDWRKR, CDUM, 1, \$ RWORK( IRWORK ), INFO ) IU = ITAUQ * * Multiply Q in A by left singular vectors of R in * WORK(IR), storing result in WORK(IU) and copying to A * (CWorkspace: need N*N+N, prefer N*N+M*N) * (RWorkspace: 0) * DO 20 I = 1, M, LDWRKU CHUNK = MIN( M-I+1, LDWRKU ) CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), \$ LDA, WORK( IR ), LDWRKR, CZERO, \$ WORK( IU ), LDWRKU ) CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, \$ A( I, 1 ), LDA ) 20 CONTINUE * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to VT, zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ), \$ LDVT ) * * Generate Q in A * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in VT * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: N) * CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in A by left vectors bidiagonalizing R * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, \$ WORK( ITAUQ ), A, LDA, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right vectors bidiagonalizing R in VT * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in A and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * END IF * ELSE IF( WNTUS ) THEN * IF( WNTVN ) THEN * * Path 4 (M much larger than N, JOBU='S', JOBVT='N') * N left singular vectors to be computed in U and * no right singular vectors to be computed * IF( LWORK.GE.N*N+3*N ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.WRKBL+LDA*N ) THEN * * WORK(IR) is LDA by N * LDWRKR = LDA ELSE * * WORK(IR) is N by N * LDWRKR = N END IF ITAU = IR + LDWRKR*N IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to WORK(IR), zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), \$ LDWRKR ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IR+1 ), LDWRKR ) * * Generate Q in A * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left vectors bidiagonalizing R in WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) * (CWorkspace: need N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, \$ 1, WORK( IR ), LDWRKR, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply Q in A by left singular vectors of R in * WORK(IR), storing result in U * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, \$ WORK( IR ), LDWRKR, CZERO, U, LDU ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Zero out below R in A * CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ A( 2, 1 ), LDA ) * * Bidiagonalize R in A * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in U by left vectors bidiagonalizing R * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, \$ WORK( ITAUQ ), U, LDU, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, \$ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * END IF * ELSE IF( WNTVO ) THEN * * Path 5 (M much larger than N, JOBU='S', JOBVT='O') * N left singular vectors to be computed in U and * N right singular vectors to be overwritten on A * IF( LWORK.GE.2*N*N+3*N ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN * * WORK(IU) is LDA by N and WORK(IR) is LDA by N * LDWRKU = LDA IR = IU + LDWRKU*N LDWRKR = LDA ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN * * WORK(IU) is LDA by N and WORK(IR) is N by N * LDWRKU = LDA IR = IU + LDWRKU*N LDWRKR = N ELSE * * WORK(IU) is N by N and WORK(IR) is N by N * LDWRKU = N IR = IU + LDWRKU*N LDWRKR = N END IF ITAU = IR + LDWRKR*N IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to WORK(IU), zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IU+1 ), LDWRKU ) * * Generate Q in A * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IU), copying result to * WORK(IR) * (CWorkspace: need 2*N*N+3*N, * prefer 2*N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, \$ WORK( IR ), LDWRKR ) * * Generate left bidiagonalizing vectors in WORK(IU) * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in WORK(IR) * (CWorkspace: need 2*N*N+3*N-1, * prefer 2*N*N+2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in WORK(IR) * (CWorkspace: need 2*N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), \$ WORK( IR ), LDWRKR, WORK( IU ), \$ LDWRKU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * * Multiply Q in A by left singular vectors of R in * WORK(IU), storing result in U * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, \$ WORK( IU ), LDWRKU, CZERO, U, LDU ) * * Copy right singular vectors of R to A * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, \$ LDA ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Zero out below R in A * CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ A( 2, 1 ), LDA ) * * Bidiagonalize R in A * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in U by left vectors bidiagonalizing R * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, \$ WORK( ITAUQ ), U, LDU, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right vectors bidiagonalizing R in A * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in A * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, \$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * END IF * ELSE IF( WNTVAS ) THEN * * Path 6 (M much larger than N, JOBU='S', JOBVT='S' * or 'A') * N left singular vectors to be computed in U and * N right singular vectors to be computed in VT * IF( LWORK.GE.N*N+3*N ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+LDA*N ) THEN * * WORK(IU) is LDA by N * LDWRKU = LDA ELSE * * WORK(IU) is N by N * LDWRKU = N END IF ITAU = IU + LDWRKU*N IWORK = ITAU + N * * Compute A=Q*R * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to WORK(IU), zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IU+1 ), LDWRKU ) * * Generate Q in A * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IU), copying result to VT * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, \$ LDVT ) * * Generate left bidiagonalizing vectors in WORK(IU) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in VT * (CWorkspace: need N*N+3*N-1, * prefer N*N+2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in VT * (CWorkspace: need N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, \$ LDVT, WORK( IU ), LDWRKU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply Q in A by left singular vectors of R in * WORK(IU), storing result in U * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, \$ WORK( IU ), LDWRKU, CZERO, U, LDU ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to VT, zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ VT( 2, 1 ), LDVT ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in VT * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in U by left bidiagonalizing vectors * in VT * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, \$ WORK( ITAUQ ), U, LDU, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in VT * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * END IF * ELSE IF( WNTUA ) THEN * IF( WNTVN ) THEN * * Path 7 (M much larger than N, JOBU='A', JOBVT='N') * M left singular vectors to be computed in U and * no right singular vectors to be computed * IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.WRKBL+LDA*N ) THEN * * WORK(IR) is LDA by N * LDWRKR = LDA ELSE * * WORK(IR) is N by N * LDWRKR = N END IF ITAU = IR + LDWRKR*N IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Copy R to WORK(IR), zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), \$ LDWRKR ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IR+1 ), LDWRKR ) * * Generate Q in U * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) * (RWorkspace: 0) * CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in WORK(IR) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) * (CWorkspace: need N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, \$ 1, WORK( IR ), LDWRKR, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply Q in U by left singular vectors of R in * WORK(IR), storing result in A * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, \$ WORK( IR ), LDWRKR, CZERO, A, LDA ) * * Copy left singular vectors of A from A to U * CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need N+M, prefer N+M*NB) * (RWorkspace: 0) * CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Zero out below R in A * CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ A( 2, 1 ), LDA ) * * Bidiagonalize R in A * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in U by left bidiagonalizing vectors * in A * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, \$ WORK( ITAUQ ), U, LDU, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, \$ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * END IF * ELSE IF( WNTVO ) THEN * * Path 8 (M much larger than N, JOBU='A', JOBVT='O') * M left singular vectors to be computed in U and * N right singular vectors to be overwritten on A * IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN * * WORK(IU) is LDA by N and WORK(IR) is LDA by N * LDWRKU = LDA IR = IU + LDWRKU*N LDWRKR = LDA ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN * * WORK(IU) is LDA by N and WORK(IR) is N by N * LDWRKU = LDA IR = IU + LDWRKU*N LDWRKR = N ELSE * * WORK(IU) is N by N and WORK(IR) is N by N * LDWRKU = N IR = IU + LDWRKU*N LDWRKR = N END IF ITAU = IR + LDWRKR*N IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) * (RWorkspace: 0) * CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to WORK(IU), zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IU+1 ), LDWRKU ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IU), copying result to * WORK(IR) * (CWorkspace: need 2*N*N+3*N, * prefer 2*N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, \$ WORK( IR ), LDWRKR ) * * Generate left bidiagonalizing vectors in WORK(IU) * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in WORK(IR) * (CWorkspace: need 2*N*N+3*N-1, * prefer 2*N*N+2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in WORK(IR) * (CWorkspace: need 2*N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), \$ WORK( IR ), LDWRKR, WORK( IU ), \$ LDWRKU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * * Multiply Q in U by left singular vectors of R in * WORK(IU), storing result in A * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, \$ WORK( IU ), LDWRKU, CZERO, A, LDA ) * * Copy left singular vectors of A from A to U * CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) * * Copy right singular vectors of R from WORK(IR) to A * CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, \$ LDA ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need N+M, prefer N+M*NB) * (RWorkspace: 0) * CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Zero out below R in A * CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ A( 2, 1 ), LDA ) * * Bidiagonalize R in A * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in U by left bidiagonalizing vectors * in A * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, \$ WORK( ITAUQ ), U, LDU, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in A * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in A * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, \$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * END IF * ELSE IF( WNTVAS ) THEN * * Path 9 (M much larger than N, JOBU='A', JOBVT='S' * or 'A') * M left singular vectors to be computed in U and * N right singular vectors to be computed in VT * IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+LDA*N ) THEN * * WORK(IU) is LDA by N * LDWRKU = LDA ELSE * * WORK(IU) is N by N * LDWRKU = N END IF ITAU = IU + LDWRKU*N IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) * (RWorkspace: 0) * CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R to WORK(IU), zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ WORK( IU+1 ), LDWRKU ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in WORK(IU), copying result to VT * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, \$ LDVT ) * * Generate left bidiagonalizing vectors in WORK(IU) * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in VT * (CWorkspace: need N*N+3*N-1, * prefer N*N+2*N+(N-1)*NB) * (RWorkspace: need 0) * CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in VT * (CWorkspace: need N*N) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, \$ LDVT, WORK( IU ), LDWRKU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply Q in U by left singular vectors of R in * WORK(IU), storing result in A * (CWorkspace: need N*N) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, \$ WORK( IU ), LDWRKU, CZERO, A, LDA ) * * Copy left singular vectors of A from A to U * CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + N * * Compute A=Q*R, copying result to U * (CWorkspace: need 2*N, prefer N+N*NB) * (RWorkspace: 0) * CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) * * Generate Q in U * (CWorkspace: need N+M, prefer N+M*NB) * (RWorkspace: 0) * CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy R from A to VT, zeroing out below it * CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, \$ VT( 2, 1 ), LDVT ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize R in VT * (CWorkspace: need 3*N, prefer 2*N+2*N*NB) * (RWorkspace: need N) * CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply Q in U by left bidiagonalizing vectors * in VT * (CWorkspace: need 2*N+M, prefer 2*N+M*NB) * (RWorkspace: 0) * CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, \$ WORK( ITAUQ ), U, LDU, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in VT * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + N * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * END IF * END IF * ELSE * * M .LT. MNTHR * * Path 10 (M at least N, but not much larger) * Reduce to bidiagonal form without QR decomposition * IE = 1 ITAUQ = 1 ITAUP = ITAUQ + N IWORK = ITAUP + N * * Bidiagonalize A * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) * (RWorkspace: need N) * CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, \$ IERR ) IF( WNTUAS ) THEN * * If left singular vectors desired in U, copy result to U * and generate left bidiagonalizing vectors in U * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) * (RWorkspace: 0) * CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) IF( WNTUS ) \$ NCU = N IF( WNTUA ) \$ NCU = M CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IF( WNTVAS ) THEN * * If right singular vectors desired in VT, copy result to * VT and generate right bidiagonalizing vectors in VT * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IF( WNTUO ) THEN * * If left singular vectors desired in A, generate left * bidiagonalizing vectors in A * (CWorkspace: need 3*N, prefer 2*N+N*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IF( WNTVO ) THEN * * If right singular vectors desired in A, generate right * bidiagonalizing vectors in A * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IRWORK = IE + N IF( WNTUAS .OR. WNTUO ) \$ NRU = M IF( WNTUN ) \$ NRU = 0 IF( WNTVAS .OR. WNTVO ) \$ NCVT = N IF( WNTVN ) \$ NCVT = 0 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN * * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, \$ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN * * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in A * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A, \$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) ELSE * * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in A and computing right singular * vectors in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, \$ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), \$ INFO ) END IF * END IF * ELSE * * A has more columns than rows. If A has sufficiently more * columns than rows, first reduce using the LQ decomposition (if * sufficient workspace available) * IF( N.GE.MNTHR ) THEN * IF( WNTVN ) THEN * * Path 1t(N much larger than M, JOBVT='N') * No right singular vectors to be computed * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Zero out above L * CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), \$ LDA ) IE = 1 ITAUQ = 1 ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in A * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, \$ IERR ) IF( WNTUO .OR. WNTUAS ) THEN * * If left singular vectors desired, generate Q * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IRWORK = IE + M NRU = 0 IF( WNTUO .OR. WNTUAS ) \$ NRU = M * * Perform bidiagonal QR iteration, computing left singular * vectors of A in A if desired * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1, \$ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) * * If left singular vectors desired in U, copy them there * IF( WNTUAS ) \$ CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) * ELSE IF( WNTVO .AND. WNTUN ) THEN * * Path 2t(N much larger than M, JOBU='N', JOBVT='O') * M right singular vectors to be overwritten on A and * no left singular vectors to be computed * IF( LWORK.GE.M*M+3*M ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN * * WORK(IU) is LDA by N and WORK(IR) is LDA by M * LDWRKU = LDA CHUNK = N LDWRKR = LDA ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN * * WORK(IU) is LDA by N and WORK(IR) is M by M * LDWRKU = LDA CHUNK = N LDWRKR = M ELSE * * WORK(IU) is M by CHUNK and WORK(IR) is M by M * LDWRKU = M CHUNK = ( LWORK-M*M ) / M LDWRKR = M END IF ITAU = IR + LDWRKR*M IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to WORK(IR) and zero out above it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IR+LDWRKR ), LDWRKR ) * * Generate Q in A * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IR) * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate right vectors bidiagonalizing L * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing right * singular vectors of L in WORK(IR) * (CWorkspace: need M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), \$ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, \$ RWORK( IRWORK ), INFO ) IU = ITAUQ * * Multiply right singular vectors of L in WORK(IR) by Q * in A, storing result in WORK(IU) and copying to A * (CWorkspace: need M*M+M, prefer M*M+M*N) * (RWorkspace: 0) * DO 30 I = 1, N, CHUNK BLK = MIN( N-I+1, CHUNK ) CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), \$ LDWRKR, A( 1, I ), LDA, CZERO, \$ WORK( IU ), LDWRKU ) CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, \$ A( 1, I ), LDA ) 30 CONTINUE * ELSE * * Insufficient workspace for a fast algorithm * IE = 1 ITAUQ = 1 ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize A * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) * (RWorkspace: need M) * CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate right vectors bidiagonalizing A * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing right * singular vectors of A in A * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA, \$ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTVO .AND. WNTUAS ) THEN * * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') * M right singular vectors to be overwritten on A and * M left singular vectors to be computed in U * IF( LWORK.GE.M*M+3*M ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN * * WORK(IU) is LDA by N and WORK(IR) is LDA by M * LDWRKU = LDA CHUNK = N LDWRKR = LDA ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN * * WORK(IU) is LDA by N and WORK(IR) is M by M * LDWRKU = LDA CHUNK = N LDWRKR = M ELSE * * WORK(IU) is M by CHUNK and WORK(IR) is M by M * LDWRKU = M CHUNK = ( LWORK-M*M ) / M LDWRKR = M END IF ITAU = IR + LDWRKR*M IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to U, zeroing about above it * CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), \$ LDU ) * * Generate Q in A * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in U, copying result to WORK(IR) * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) * * Generate right vectors bidiagonalizing L in WORK(IR) * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left vectors bidiagonalizing L in U * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of L in U, and computing right * singular vectors of L in WORK(IR) * (CWorkspace: need M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), \$ WORK( IR ), LDWRKR, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) IU = ITAUQ * * Multiply right singular vectors of L in WORK(IR) by Q * in A, storing result in WORK(IU) and copying to A * (CWorkspace: need M*M+M, prefer M*M+M*N)) * (RWorkspace: 0) * DO 40 I = 1, N, CHUNK BLK = MIN( N-I+1, CHUNK ) CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), \$ LDWRKR, A( 1, I ), LDA, CZERO, \$ WORK( IU ), LDWRKU ) CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, \$ A( 1, I ), LDA ) 40 CONTINUE * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to U, zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), \$ LDU ) * * Generate Q in A * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in U * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right vectors bidiagonalizing L by Q in A * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, \$ WORK( ITAUP ), A, LDA, WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left vectors bidiagonalizing L in U * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in A * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA, \$ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTVS ) THEN * IF( WNTUN ) THEN * * Path 4t(N much larger than M, JOBU='N', JOBVT='S') * M right singular vectors to be computed in VT and * no left singular vectors to be computed * IF( LWORK.GE.M*M+3*M ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.WRKBL+LDA*M ) THEN * * WORK(IR) is LDA by M * LDWRKR = LDA ELSE * * WORK(IR) is M by M * LDWRKR = M END IF ITAU = IR + LDWRKR*M IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to WORK(IR), zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), \$ LDWRKR ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IR+LDWRKR ), LDWRKR ) * * Generate Q in A * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IR) * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right vectors bidiagonalizing L in * WORK(IR) * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing right * singular vectors of L in WORK(IR) * (CWorkspace: need M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), \$ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply right singular vectors of L in WORK(IR) by * Q in A, storing result in VT * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), \$ LDWRKR, A, LDA, CZERO, VT, LDVT ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy result to VT * CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Zero out above L in A * CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ A( 1, 2 ), LDA ) * * Bidiagonalize L in A * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right vectors bidiagonalizing L by Q in VT * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, \$ WORK( ITAUP ), VT, LDVT, \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, \$ LDVT, CDUM, 1, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTUO ) THEN * * Path 5t(N much larger than M, JOBU='O', JOBVT='S') * M right singular vectors to be computed in VT and * M left singular vectors to be overwritten on A * IF( LWORK.GE.2*M*M+3*M ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN * * WORK(IU) is LDA by M and WORK(IR) is LDA by M * LDWRKU = LDA IR = IU + LDWRKU*M LDWRKR = LDA ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN * * WORK(IU) is LDA by M and WORK(IR) is M by M * LDWRKU = LDA IR = IU + LDWRKU*M LDWRKR = M ELSE * * WORK(IU) is M by M and WORK(IR) is M by M * LDWRKU = M IR = IU + LDWRKU*M LDWRKR = M END IF ITAU = IR + LDWRKR*M IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to WORK(IU), zeroing out below it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IU+LDWRKU ), LDWRKU ) * * Generate Q in A * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IU), copying result to * WORK(IR) * (CWorkspace: need 2*M*M+3*M, * prefer 2*M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, \$ WORK( IR ), LDWRKR ) * * Generate right bidiagonalizing vectors in WORK(IU) * (CWorkspace: need 2*M*M+3*M-1, * prefer 2*M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in WORK(IR) * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of L in WORK(IR) and computing * right singular vectors of L in WORK(IU) * (CWorkspace: need 2*M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), \$ WORK( IU ), LDWRKU, WORK( IR ), \$ LDWRKR, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * * Multiply right singular vectors of L in WORK(IU) by * Q in A, storing result in VT * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), \$ LDWRKU, A, LDA, CZERO, VT, LDVT ) * * Copy left singular vectors of L to A * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, \$ LDA ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Zero out above L in A * CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ A( 1, 2 ), LDA ) * * Bidiagonalize L in A * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right vectors bidiagonalizing L by Q in VT * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, \$ WORK( ITAUP ), VT, LDVT, \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors of L in A * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of A in A and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, A, LDA, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTUAS ) THEN * * Path 6t(N much larger than M, JOBU='S' or 'A', * JOBVT='S') * M right singular vectors to be computed in VT and * M left singular vectors to be computed in U * IF( LWORK.GE.M*M+3*M ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+LDA*M ) THEN * * WORK(IU) is LDA by N * LDWRKU = LDA ELSE * * WORK(IU) is LDA by M * LDWRKU = M END IF ITAU = IU + LDWRKU*M IWORK = ITAU + M * * Compute A=L*Q * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to WORK(IU), zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IU+LDWRKU ), LDWRKU ) * * Generate Q in A * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IU), copying result to U * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, \$ LDU ) * * Generate right bidiagonalizing vectors in WORK(IU) * (CWorkspace: need M*M+3*M-1, * prefer M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in U * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of L in U and computing right * singular vectors of L in WORK(IU) * (CWorkspace: need M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), \$ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply right singular vectors of L in WORK(IU) by * Q in A, storing result in VT * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), \$ LDWRKU, A, LDA, CZERO, VT, LDVT ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to U, zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ U( 1, 2 ), LDU ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in U * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right bidiagonalizing vectors in U by Q * in VT * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, \$ WORK( ITAUP ), VT, LDVT, \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in U * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * END IF * ELSE IF( WNTVA ) THEN * IF( WNTUN ) THEN * * Path 7t(N much larger than M, JOBU='N', JOBVT='A') * N right singular vectors to be computed in VT and * no left singular vectors to be computed * IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN * * Sufficient workspace for a fast algorithm * IR = 1 IF( LWORK.GE.WRKBL+LDA*M ) THEN * * WORK(IR) is LDA by M * LDWRKR = LDA ELSE * * WORK(IR) is M by M * LDWRKR = M END IF ITAU = IR + LDWRKR*M IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Copy L to WORK(IR), zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), \$ LDWRKR ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IR+LDWRKR ), LDWRKR ) * * Generate Q in VT * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) * (RWorkspace: 0) * CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IR) * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate right bidiagonalizing vectors in WORK(IR) * (CWorkspace: need M*M+3*M-1, * prefer M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing right * singular vectors of L in WORK(IR) * (CWorkspace: need M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), \$ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply right singular vectors of L in WORK(IR) by * Q in VT, storing result in A * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), \$ LDWRKR, VT, LDVT, CZERO, A, LDA ) * * Copy right singular vectors of A from A to VT * CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need M+N, prefer M+N*NB) * (RWorkspace: 0) * CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Zero out above L in A * CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ A( 1, 2 ), LDA ) * * Bidiagonalize L in A * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right bidiagonalizing vectors in A by Q * in VT * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, \$ WORK( ITAUP ), VT, LDVT, \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, \$ LDVT, CDUM, 1, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTUO ) THEN * * Path 8t(N much larger than M, JOBU='O', JOBVT='A') * N right singular vectors to be computed in VT and * M left singular vectors to be overwritten on A * IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN * * WORK(IU) is LDA by M and WORK(IR) is LDA by M * LDWRKU = LDA IR = IU + LDWRKU*M LDWRKR = LDA ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN * * WORK(IU) is LDA by M and WORK(IR) is M by M * LDWRKU = LDA IR = IU + LDWRKU*M LDWRKR = M ELSE * * WORK(IU) is M by M and WORK(IR) is M by M * LDWRKU = M IR = IU + LDWRKU*M LDWRKR = M END IF ITAU = IR + LDWRKR*M IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) * (RWorkspace: 0) * CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to WORK(IU), zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IU+LDWRKU ), LDWRKU ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IU), copying result to * WORK(IR) * (CWorkspace: need 2*M*M+3*M, * prefer 2*M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, \$ WORK( IR ), LDWRKR ) * * Generate right bidiagonalizing vectors in WORK(IU) * (CWorkspace: need 2*M*M+3*M-1, * prefer 2*M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in WORK(IR) * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, \$ WORK( ITAUQ ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of L in WORK(IR) and computing * right singular vectors of L in WORK(IU) * (CWorkspace: need 2*M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), \$ WORK( IU ), LDWRKU, WORK( IR ), \$ LDWRKR, CDUM, 1, RWORK( IRWORK ), \$ INFO ) * * Multiply right singular vectors of L in WORK(IU) by * Q in VT, storing result in A * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), \$ LDWRKU, VT, LDVT, CZERO, A, LDA ) * * Copy right singular vectors of A from A to VT * CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) * * Copy left singular vectors of A from WORK(IR) to A * CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, \$ LDA ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need M+N, prefer M+N*NB) * (RWorkspace: 0) * CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Zero out above L in A * CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ A( 1, 2 ), LDA ) * * Bidiagonalize L in A * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right bidiagonalizing vectors in A by Q * in VT * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, \$ WORK( ITAUP ), VT, LDVT, \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in A * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of A in A and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, A, LDA, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * ELSE IF( WNTUAS ) THEN * * Path 9t(N much larger than M, JOBU='S' or 'A', * JOBVT='A') * N right singular vectors to be computed in VT and * M left singular vectors to be computed in U * IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN * * Sufficient workspace for a fast algorithm * IU = 1 IF( LWORK.GE.WRKBL+LDA*M ) THEN * * WORK(IU) is LDA by M * LDWRKU = LDA ELSE * * WORK(IU) is M by M * LDWRKU = M END IF ITAU = IU + LDWRKU*M IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) * (RWorkspace: 0) * CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to WORK(IU), zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), \$ LDWRKU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ WORK( IU+LDWRKU ), LDWRKU ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in WORK(IU), copying result to U * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, \$ RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, \$ LDU ) * * Generate right bidiagonalizing vectors in WORK(IU) * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, \$ WORK( ITAUP ), WORK( IWORK ), \$ LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in U * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of L in U and computing right * singular vectors of L in WORK(IU) * (CWorkspace: need M*M) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), \$ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * * Multiply right singular vectors of L in WORK(IU) by * Q in VT, storing result in A * (CWorkspace: need M*M) * (RWorkspace: 0) * CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), \$ LDWRKU, VT, LDVT, CZERO, A, LDA ) * * Copy right singular vectors of A from A to VT * CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) * ELSE * * Insufficient workspace for a fast algorithm * ITAU = 1 IWORK = ITAU + M * * Compute A=L*Q, copying result to VT * (CWorkspace: need 2*M, prefer M+M*NB) * (RWorkspace: 0) * CALL CGELQF( M, N, A, LDA, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) * * Generate Q in VT * (CWorkspace: need M+N, prefer M+N*NB) * (RWorkspace: 0) * CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Copy L to U, zeroing out above it * CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, \$ U( 1, 2 ), LDU ) IE = 1 ITAUQ = ITAU ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize L in U * (CWorkspace: need 3*M, prefer 2*M+2*M*NB) * (RWorkspace: need M) * CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), \$ WORK( ITAUQ ), WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Multiply right bidiagonalizing vectors in U by Q * in VT * (CWorkspace: need 2*M+N, prefer 2*M+N*NB) * (RWorkspace: 0) * CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, \$ WORK( ITAUP ), VT, LDVT, \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) * * Generate left bidiagonalizing vectors in U * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) IRWORK = IE + M * * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, \$ LDVT, U, LDU, CDUM, 1, \$ RWORK( IRWORK ), INFO ) * END IF * END IF * END IF * ELSE * * N .LT. MNTHR * * Path 10t(N greater than M, but not much larger) * Reduce to bidiagonal form without LQ decomposition * IE = 1 ITAUQ = 1 ITAUP = ITAUQ + M IWORK = ITAUP + M * * Bidiagonalize A * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) * (RWorkspace: M) * CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), \$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, \$ IERR ) IF( WNTUAS ) THEN * * If left singular vectors desired in U, copy result to U * and generate left bidiagonalizing vectors in U * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IF( WNTVAS ) THEN * * If right singular vectors desired in VT, copy result to * VT and generate right bidiagonalizing vectors in VT * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) * (RWorkspace: 0) * CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) IF( WNTVA ) \$ NRVT = N IF( WNTVS ) \$ NRVT = M CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IF( WNTUO ) THEN * * If left singular vectors desired in A, generate left * bidiagonalizing vectors in A * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) * (RWorkspace: 0) * CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IF( WNTVO ) THEN * * If right singular vectors desired in A, generate right * bidiagonalizing vectors in A * (CWorkspace: need 3*M, prefer 2*M+M*NB) * (RWorkspace: 0) * CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), \$ WORK( IWORK ), LWORK-IWORK+1, IERR ) END IF IRWORK = IE + M IF( WNTUAS .OR. WNTUO ) \$ NRU = M IF( WNTUN ) \$ NRU = 0 IF( WNTVAS .OR. WNTVO ) \$ NCVT = N IF( WNTVN ) \$ NCVT = 0 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN * * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, \$ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN * * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in A * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A, \$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), \$ INFO ) ELSE * * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in A and computing right singular * vectors in VT * (CWorkspace: 0) * (RWorkspace: need BDSPAC) * CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, \$ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), \$ INFO ) END IF * END IF * END IF * * Undo scaling if necessary * IF( ISCL.EQ.1 ) THEN IF( ANRM.GT.BIGNUM ) \$ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, \$ IERR ) IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) \$ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, \$ RWORK( IE ), MINMN, IERR ) IF( ANRM.LT.SMLNUM ) \$ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, \$ IERR ) IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) \$ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, \$ RWORK( IE ), MINMN, IERR ) END IF * * Return optimal workspace in WORK(1) * WORK( 1 ) = MAXWRK * RETURN * * End of CGESVD * END