```      SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
\$                   LWORK, IWORK, INFO )
*
*  -- LAPACK driver routine (version 3.1) --
*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
*     November 2006
*
*     .. Scalar Arguments ..
CHARACTER          JOBZ
INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
*     ..
*     .. Array Arguments ..
INTEGER            IWORK( * )
REAL               A( LDA, * ), S( * ), U( LDU, * ),
\$                   VT( LDVT, * ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  SGESDD computes the singular value decomposition (SVD) of a real
*  M-by-N matrix A, optionally computing the left and right singular
*  vectors.  If singular vectors are desired, it uses a
*  divide-and-conquer algorithm.
*
*  The SVD is written
*
*       A = U * SIGMA * transpose(V)
*
*  where SIGMA is an M-by-N matrix which is zero except for its
*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
*  are the singular values of A; they are real and non-negative, and
*  are returned in descending order.  The first min(m,n) columns of
*  U and V are the left and right singular vectors of A.
*
*  Note that the routine returns VT = V**T, not V.
*
*  The divide and conquer algorithm makes very mild assumptions about
*  floating point arithmetic. It will work on machines with a guard
*  digit in add/subtract, or on those binary machines without guard
*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
*  without guard digits, but we know of none.
*
*  Arguments
*  =========
*
*  JOBZ    (input) CHARACTER*1
*          Specifies options for computing all or part of the matrix U:
*          = 'A':  all M columns of U and all N rows of V**T are
*                  returned in the arrays U and VT;
*          = 'S':  the first min(M,N) columns of U and the first
*                  min(M,N) rows of V**T are returned in the arrays U
*                  and VT;
*          = 'O':  If M >= N, the first N columns of U are overwritten
*                  on the array A and all rows of V**T are returned in
*                  the array VT;
*                  otherwise, all columns of U are returned in the
*                  array U and the first M rows of V**T are overwritten
*                  in the array A;
*          = 'N':  no columns of U or rows of V**T are computed.
*
*  M       (input) INTEGER
*          The number of rows of the input matrix A.  M >= 0.
*
*  N       (input) INTEGER
*          The number of columns of the input matrix A.  N >= 0.
*
*  A       (input/output) REAL array, dimension (LDA,N)
*          On entry, the M-by-N matrix A.
*          On exit,
*          if JOBZ = 'O',  A is overwritten with the first N columns
*                          of U (the left singular vectors, stored
*                          columnwise) if M >= N;
*                          A is overwritten with the first M rows
*                          of V**T (the right singular vectors, stored
*                          rowwise) otherwise.
*          if JOBZ .ne. 'O', the contents of A are destroyed.
*
*  LDA     (input) INTEGER
*          The leading dimension of the array A.  LDA >= max(1,M).
*
*  S       (output) REAL array, dimension (min(M,N))
*          The singular values of A, sorted so that S(i) >= S(i+1).
*
*  U       (output) REAL array, dimension (LDU,UCOL)
*          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
*          UCOL = min(M,N) if JOBZ = 'S'.
*          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
*          orthogonal matrix U;
*          if JOBZ = 'S', U contains the first min(M,N) columns of U
*          (the left singular vectors, stored columnwise);
*          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
*
*  LDU     (input) INTEGER
*          The leading dimension of the array U.  LDU >= 1; if
*          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
*
*  VT      (output) REAL array, dimension (LDVT,N)
*          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
*          N-by-N orthogonal matrix V**T;
*          if JOBZ = 'S', VT contains the first min(M,N) rows of
*          V**T (the right singular vectors, stored rowwise);
*          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
*
*  LDVT    (input) INTEGER
*          The leading dimension of the array VT.  LDVT >= 1; if
*          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
*          if JOBZ = 'S', LDVT >= min(M,N).
*
*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
*
*  LWORK   (input) INTEGER
*          The dimension of the array WORK. LWORK >= 1.
*          If JOBZ = 'N',
*            LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
*          If JOBZ = 'O',
*            LWORK >= 3*min(M,N)*min(M,N) +
*                     max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
*          If JOBZ = 'S' or 'A'
*            LWORK >= 3*min(M,N)*min(M,N) +
*                     max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
*          For good performance, LWORK should generally be larger.
*          If LWORK = -1 but other input arguments are legal, WORK(1)
*          returns the optimal LWORK.
*
*  IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
*
*  INFO    (output) INTEGER
*          = 0:  successful exit.
*          < 0:  if INFO = -i, the i-th argument had an illegal value.
*          > 0:  SBDSDC did not converge, updating process failed.
*
*  Further Details
*  ===============
*
*  Based on contributions by
*     Ming Gu and Huan Ren, Computer Science Division, University of
*     California at Berkeley, USA
*
*  =====================================================================
*
*     .. Parameters ..
REAL               ZERO, ONE
PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
*     ..
*     .. Local Scalars ..
LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
\$                   IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
\$                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
\$                   MNTHR, NWORK, WRKBL
REAL               ANRM, BIGNUM, EPS, SMLNUM
*     ..
*     .. Local Arrays ..
INTEGER            IDUM( 1 )
REAL               DUM( 1 )
*     ..
*     .. External Subroutines ..
EXTERNAL           SBDSDC, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
\$                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
\$                   XERBLA
*     ..
*     .. External Functions ..
LOGICAL            LSAME
INTEGER            ILAENV
REAL               SLAMCH, SLANGE
EXTERNAL           ILAENV, LSAME, SLAMCH, SLANGE
*     ..
*     .. Intrinsic Functions ..
INTRINSIC          INT, MAX, MIN, SQRT
*     ..
*     .. Executable Statements ..
*
*     Test the input arguments
*
INFO = 0
MINMN = MIN( M, N )
WNTQA = LSAME( JOBZ, 'A' )
WNTQS = LSAME( JOBZ, 'S' )
WNTQAS = WNTQA .OR. WNTQS
WNTQO = LSAME( JOBZ, 'O' )
WNTQN = LSAME( JOBZ, 'N' )
LQUERY = ( LWORK.EQ.-1 )
*
IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
INFO = -1
ELSE IF( M.LT.0 ) THEN
INFO = -2
ELSE IF( N.LT.0 ) THEN
INFO = -3
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -5
ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
\$         ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
INFO = -8
ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
\$         ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
\$         ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
INFO = -10
END IF
*
*     Compute workspace
*      (Note: Comments in the code beginning "Workspace:" describe the
*       minimal amount of workspace needed at that point in the code,
*       as well as the preferred amount for good performance.
*       NB refers to the optimal block size for the immediately
*       following subroutine, as returned by ILAENV.)
*
IF( INFO.EQ.0 ) THEN
MINWRK = 1
MAXWRK = 1
IF( M.GE.N .AND. MINMN.GT.0 ) THEN
*
*           Compute space needed for SBDSDC
*
MNTHR = INT( MINMN*11.0E0 / 6.0E0 )
IF( WNTQN ) THEN
BDSPAC = 7*N
ELSE
BDSPAC = 3*N*N + 4*N
END IF
IF( M.GE.MNTHR ) THEN
IF( WNTQN ) THEN
*
*                 Path 1 (M much larger than N, JOBZ='N')
*
WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1,
\$                    -1 )
WRKBL = MAX( WRKBL, 3*N+2*N*
\$                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
MAXWRK = MAX( WRKBL, BDSPAC+N )
MINWRK = BDSPAC + N
ELSE IF( WNTQO ) THEN
*
*                 Path 2 (M much larger than N, JOBZ='O')
*
WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
\$                    N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
\$                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*N )
MAXWRK = WRKBL + 2*N*N
MINWRK = BDSPAC + 2*N*N + 3*N
ELSE IF( WNTQS ) THEN
*
*                 Path 3 (M much larger than N, JOBZ='S')
*
WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
\$                    N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
\$                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*N )
MAXWRK = WRKBL + N*N
MINWRK = BDSPAC + N*N + 3*N
ELSE IF( WNTQA ) THEN
*
*                 Path 4 (M much larger than N, JOBZ='A')
*
WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
\$                    M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
\$                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*N )
MAXWRK = WRKBL + N*N
MINWRK = BDSPAC + N*N + 3*N
END IF
ELSE
*
*              Path 5 (M at least N, but not much larger)
*
WRKBL = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, -1,
\$                 -1 )
IF( WNTQN ) THEN
MAXWRK = MAX( WRKBL, BDSPAC+3*N )
MINWRK = 3*N + MAX( M, BDSPAC )
ELSE IF( WNTQO ) THEN
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*N )
MAXWRK = WRKBL + M*N
MINWRK = 3*N + MAX( M, N*N+BDSPAC )
ELSE IF( WNTQS ) THEN
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
MAXWRK = MAX( WRKBL, BDSPAC+3*N )
MINWRK = 3*N + MAX( M, BDSPAC )
ELSE IF( WNTQA ) THEN
WRKBL = MAX( WRKBL, 3*N+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
MAXWRK = MAX( MAXWRK, BDSPAC+3*N )
MINWRK = 3*N + MAX( M, BDSPAC )
END IF
END IF
ELSE IF ( MINMN.GT.0 ) THEN
*
*           Compute space needed for SBDSDC
*
MNTHR = INT( MINMN*11.0E0 / 6.0E0 )
IF( WNTQN ) THEN
BDSPAC = 7*M
ELSE
BDSPAC = 3*M*M + 4*M
END IF
IF( N.GE.MNTHR ) THEN
IF( WNTQN ) THEN
*
*                 Path 1t (N much larger than M, JOBZ='N')
*
WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1,
\$                    -1 )
WRKBL = MAX( WRKBL, 3*M+2*M*
\$                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
MAXWRK = MAX( WRKBL, BDSPAC+M )
MINWRK = BDSPAC + M
ELSE IF( WNTQO ) THEN
*
*                 Path 2t (N much larger than M, JOBZ='O')
*
WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
\$                    N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
\$                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*M )
MAXWRK = WRKBL + 2*M*M
MINWRK = BDSPAC + 2*M*M + 3*M
ELSE IF( WNTQS ) THEN
*
*                 Path 3t (N much larger than M, JOBZ='S')
*
WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
\$                    N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
\$                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*M )
MAXWRK = WRKBL + M*M
MINWRK = BDSPAC + M*M + 3*M
ELSE IF( WNTQA ) THEN
*
*                 Path 4t (N much larger than M, JOBZ='A')
*
WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
\$                    N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
\$                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*M )
MAXWRK = WRKBL + M*M
MINWRK = BDSPAC + M*M + 3*M
END IF
ELSE
*
*              Path 5t (N greater than M, but not much larger)
*
WRKBL = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, -1,
\$                 -1 )
IF( WNTQN ) THEN
MAXWRK = MAX( WRKBL, BDSPAC+3*M )
MINWRK = 3*M + MAX( N, BDSPAC )
ELSE IF( WNTQO ) THEN
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'PRT', M, N, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC+3*M )
MAXWRK = WRKBL + M*N
MINWRK = 3*M + MAX( N, M*M+BDSPAC )
ELSE IF( WNTQS ) THEN
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'PRT', M, N, M, -1 ) )
MAXWRK = MAX( WRKBL, BDSPAC+3*M )
MINWRK = 3*M + MAX( N, BDSPAC )
ELSE IF( WNTQA ) THEN
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
\$                    ILAENV( 1, 'SORMBR', 'PRT', N, N, M, -1 ) )
MAXWRK = MAX( WRKBL, BDSPAC+3*M )
MINWRK = 3*M + MAX( N, BDSPAC )
END IF
END IF
END IF
MAXWRK = MAX( MAXWRK, MINWRK )
WORK( 1 ) = MAXWRK
*
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
INFO = -12
END IF
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SGESDD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
*     Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
RETURN
END IF
*
*     Get machine constants
*
EPS = SLAMCH( 'P' )
SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
BIGNUM = ONE / SMLNUM
*
*     Scale A if max element outside range [SMLNUM,BIGNUM]
*
ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
ISCL = 0
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
ISCL = 1
CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
ELSE IF( ANRM.GT.BIGNUM ) THEN
ISCL = 1
CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
END IF
*
IF( M.GE.N ) THEN
*
*        A has at least as many rows as columns. If A has sufficiently
*        more rows than columns, first reduce using the QR
*        decomposition (if sufficient workspace available)
*
IF( M.GE.MNTHR ) THEN
*
IF( WNTQN ) THEN
*
*              Path 1 (M much larger than N, JOBZ='N')
*              No singular vectors to be computed
*
ITAU = 1
NWORK = ITAU + N
*
*              Compute A=Q*R
*              (Workspace: need 2*N, prefer N+N*NB)
*
CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Zero out below R
*
CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
*              Bidiagonalize R in A
*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
\$                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
\$                      IERR )
NWORK = IE + N
*
*              Perform bidiagonal SVD, computing singular values only
*              (Workspace: need N+BDSPAC)
*
CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
\$                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
*
ELSE IF( WNTQO ) THEN
*
*              Path 2 (M much larger than N, JOBZ = 'O')
*              N left singular vectors to be overwritten on A and
*              N right singular vectors to be computed in VT
*
IR = 1
*
*              WORK(IR) is LDWRKR by N
*
IF( LWORK.GE.LDA*N+N*N+3*N+BDSPAC ) THEN
LDWRKR = LDA
ELSE
LDWRKR = ( LWORK-N*N-3*N-BDSPAC ) / N
END IF
ITAU = IR + LDWRKR*N
NWORK = ITAU + N
*
*              Compute A=Q*R
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Copy R to WORK(IR), zeroing out below it
*
CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
\$                      LDWRKR )
*
*              Generate Q in A
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
*              Bidiagonalize R in VT, copying result to WORK(IR)
*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
\$                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              WORK(IU) is N by N
*
IU = NWORK
NWORK = IU + N*N
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in WORK(IU) and computing right
*              singular vectors of bidiagonal matrix in VT
*              (Workspace: need N+N*N+BDSPAC)
*
CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
\$                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Overwrite WORK(IU) by left singular vectors of R
*              and VT by right singular vectors of R
*              (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
*
CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
\$                      WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Multiply Q in A by left singular vectors of R in
*              WORK(IU), storing result in WORK(IR) and copying to A
*              (Workspace: need 2*N*N, prefer N*N+M*N)
*
DO 10 I = 1, M, LDWRKR
CHUNK = MIN( M-I+1, LDWRKR )
CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
\$                        LDA, WORK( IU ), N, ZERO, WORK( IR ),
\$                        LDWRKR )
CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
\$                         A( I, 1 ), LDA )
10          CONTINUE
*
ELSE IF( WNTQS ) THEN
*
*              Path 3 (M much larger than N, JOBZ='S')
*              N left singular vectors to be computed in U and
*              N right singular vectors to be computed in VT
*
IR = 1
*
*              WORK(IR) is N by N
*
LDWRKR = N
ITAU = IR + LDWRKR*N
NWORK = ITAU + N
*
*              Compute A=Q*R
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Copy R to WORK(IR), zeroing out below it
*
CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
\$                      LDWRKR )
*
*              Generate Q in A
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
*              Bidiagonalize R in WORK(IR)
*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
\$                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagoal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              (Workspace: need N+BDSPAC)
*
CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
\$                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Overwrite U by left singular vectors of R and VT
*              by right singular vectors of R
*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*
CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Multiply Q in A by left singular vectors of R in
*              WORK(IR), storing result in U
*              (Workspace: need N*N)
*
CALL SLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ),
\$                     LDWRKR, ZERO, U, LDU )
*
ELSE IF( WNTQA ) THEN
*
*              Path 4 (M much larger than N, JOBZ='A')
*              M left singular vectors to be computed in U and
*              N right singular vectors to be computed in VT
*
IU = 1
*
*              WORK(IU) is N by N
*
LDWRKU = N
ITAU = IU + LDWRKU*N
NWORK = ITAU + N
*
*              Compute A=Q*R, copying result to U
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
*
*              Generate Q in U
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*              Produce R in A, zeroing out other entries
*
CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
*              Bidiagonalize R in A
*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
\$                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
\$                      IERR )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in WORK(IU) and computing right
*              singular vectors of bidiagonal matrix in VT
*              (Workspace: need N+N*N+BDSPAC)
*
CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
\$                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Overwrite WORK(IU) by left singular vectors of R and VT
*              by right singular vectors of R
*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*
CALL SORMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
\$                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Multiply Q in U by left singular vectors of R in
*              WORK(IU), storing result in A
*              (Workspace: need N*N)
*
CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ),
\$                     LDWRKU, ZERO, A, LDA )
*
*              Copy left singular vectors of A from A to U
*
CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
*
END IF
*
ELSE
*
*           M .LT. MNTHR
*
*           Path 5 (M at least N, but not much larger)
*           Reduce to bidiagonal form without QR decomposition
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
NWORK = ITAUP + N
*
*           Bidiagonalize A
*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
*
CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
\$                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
\$                   IERR )
IF( WNTQN ) THEN
*
*              Perform bidiagonal SVD, only computing singular values
*              (Workspace: need N+BDSPAC)
*
CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
\$                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
ELSE IF( WNTQO ) THEN
IU = NWORK
IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
*
*                 WORK( IU ) is M by N
*
LDWRKU = M
NWORK = IU + LDWRKU*N
CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IU ),
\$                         LDWRKU )
ELSE
*
*                 WORK( IU ) is N by N
*
LDWRKU = N
NWORK = IU + LDWRKU*N
*
*                 WORK(IR) is LDWRKR by N
*
IR = NWORK
LDWRKR = ( LWORK-N*N-3*N ) / N
END IF
NWORK = IU + LDWRKU*N
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in WORK(IU) and computing right
*              singular vectors of bidiagonal matrix in VT
*              (Workspace: need N+N*N+BDSPAC)
*
CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ),
\$                      LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),
\$                      IWORK, INFO )
*
*              Overwrite VT by right singular vectors of A
*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
*
*                 Overwrite WORK(IU) by left singular vectors of A
*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
\$                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
\$                         WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*                 Copy left singular vectors of A from WORK(IU) to A
*
CALL SLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
ELSE
*
*                 Generate Q in A
*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
\$                         WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*                 Multiply Q in A by left singular vectors of
*                 bidiagonal matrix in WORK(IU), storing result in
*                 WORK(IR) and copying to A
*                 (Workspace: need 2*N*N, prefer N*N+M*N)
*
DO 20 I = 1, M, LDWRKR
CHUNK = MIN( M-I+1, LDWRKR )
CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
\$                           LDA, WORK( IU ), LDWRKU, ZERO,
\$                           WORK( IR ), LDWRKR )
CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
\$                            A( I, 1 ), LDA )
20             CONTINUE
END IF
*
ELSE IF( WNTQS ) THEN
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              (Workspace: need N+BDSPAC)
*
CALL SLASET( 'F', M, N, ZERO, ZERO, U, LDU )
CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
\$                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              (Workspace: need 3*N, prefer 2*N+N*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
ELSE IF( WNTQA ) THEN
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              (Workspace: need N+BDSPAC)
*
CALL SLASET( 'F', M, M, ZERO, ZERO, U, LDU )
CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
\$                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Set the right corner of U to identity matrix
*
IF( M.GT.N ) THEN
CALL SLASET( 'F', M-N, M-N, ZERO, ONE, U( N+1, N+1 ),
\$                         LDU )
END IF
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
END IF
*
END IF
*
ELSE
*
*        A has more columns than rows. If A has sufficiently more
*        columns than rows, first reduce using the LQ decomposition (if
*        sufficient workspace available)
*
IF( N.GE.MNTHR ) THEN
*
IF( WNTQN ) THEN
*
*              Path 1t (N much larger than M, JOBZ='N')
*              No singular vectors to be computed
*
ITAU = 1
NWORK = ITAU + M
*
*              Compute A=L*Q
*              (Workspace: need 2*M, prefer M+M*NB)
*
CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Zero out above L
*
CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
*              Bidiagonalize L in A
*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
\$                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
\$                      IERR )
NWORK = IE + M
*
*              Perform bidiagonal SVD, computing singular values only
*              (Workspace: need M+BDSPAC)
*
CALL SBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
\$                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
*
ELSE IF( WNTQO ) THEN
*
*              Path 2t (N much larger than M, JOBZ='O')
*              M right singular vectors to be overwritten on A and
*              M left singular vectors to be computed in U
*
IVT = 1
*
*              IVT is M by M
*
IL = IVT + M*M
IF( LWORK.GE.M*N+M*M+3*M+BDSPAC ) THEN
*
*                 WORK(IL) is M by N
*
LDWRKL = M
CHUNK = N
ELSE
LDWRKL = M
CHUNK = ( LWORK-M*M ) / M
END IF
ITAU = IL + LDWRKL*M
NWORK = ITAU + M
*
*              Compute A=L*Q
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Copy L to WORK(IL), zeroing about above it
*
CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
\$                      WORK( IL+LDWRKL ), LDWRKL )
*
*              Generate Q in A
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
*              Bidiagonalize L in WORK(IL)
*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
\$                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U, and computing right singular
*              vectors of bidiagonal matrix in WORK(IVT)
*              (Workspace: need M+M*M+BDSPAC)
*
CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
\$                      WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),
\$                      IWORK, INFO )
*
*              Overwrite U by left singular vectors of L and WORK(IVT)
*              by right singular vectors of L
*              (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
\$                      WORK( ITAUP ), WORK( IVT ), M,
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*              Multiply right singular vectors of L in WORK(IVT) by Q
*              in A, storing result in WORK(IL) and copying to A
*              (Workspace: need 2*M*M, prefer M*M+M*N)
*
DO 30 I = 1, N, CHUNK
BLK = MIN( N-I+1, CHUNK )
CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M,
\$                        A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )
CALL SLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
\$                         A( 1, I ), LDA )
30          CONTINUE
*
ELSE IF( WNTQS ) THEN
*
*              Path 3t (N much larger than M, JOBZ='S')
*              M right singular vectors to be computed in VT and
*              M left singular vectors to be computed in U
*
IL = 1
*
*              WORK(IL) is M by M
*
LDWRKL = M
ITAU = IL + LDWRKL*M
NWORK = ITAU + M
*
*              Compute A=L*Q
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Copy L to WORK(IL), zeroing out above it
*
CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
\$                      WORK( IL+LDWRKL ), LDWRKL )
*
*              Generate Q in A
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
*              Bidiagonalize L in WORK(IU), copying result to U
*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
\$                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              (Workspace: need M+BDSPAC)
*
CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT,
\$                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Overwrite U by left singular vectors of L and VT
*              by right singular vectors of L
*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
*              Multiply right singular vectors of L in WORK(IL) by
*              Q in A, storing result in VT
*              (Workspace: need M*M)
*
CALL SLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL,
\$                     A, LDA, ZERO, VT, LDVT )
*
ELSE IF( WNTQA ) THEN
*
*              Path 4t (N much larger than M, JOBZ='A')
*              N right singular vectors to be computed in VT and
*              M left singular vectors to be computed in U
*
IVT = 1
*
*              WORK(IVT) is M by M
*
LDWKVT = M
ITAU = IVT + LDWKVT*M
NWORK = ITAU + M
*
*              Compute A=L*Q, copying result to VT
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
*              Generate Q in VT
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*              Produce L in A, zeroing out other entries
*
CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
*              Bidiagonalize L in A
*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
\$                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
\$                      IERR )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in WORK(IVT)
*              (Workspace: need M+M*M+BDSPAC)
*
CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
\$                      WORK( IVT ), LDWKVT, DUM, IDUM,
\$                      WORK( NWORK ), IWORK, INFO )
*
*              Overwrite U by left singular vectors of L and WORK(IVT)
*              by right singular vectors of L
*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', M, M, M, A, LDA,
\$                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
\$                      WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*              Multiply right singular vectors of L in WORK(IVT) by
*              Q in VT, storing result in A
*              (Workspace: need M*M)
*
CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT,
\$                     VT, LDVT, ZERO, A, LDA )
*
*              Copy right singular vectors of A from A to VT
*
CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
*
END IF
*
ELSE
*
*           N .LT. MNTHR
*
*           Path 5t (N greater than M, but not much larger)
*           Reduce to bidiagonal form without LQ decomposition
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
NWORK = ITAUP + M
*
*           Bidiagonalize A
*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
\$                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
\$                   IERR )
IF( WNTQN ) THEN
*
*              Perform bidiagonal SVD, only computing singular values
*              (Workspace: need M+BDSPAC)
*
CALL SBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
\$                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
ELSE IF( WNTQO ) THEN
LDWKVT = M
IVT = NWORK
IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
*
*                 WORK( IVT ) is M by N
*
CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ),
\$                         LDWKVT )
NWORK = IVT + LDWKVT*N
ELSE
*
*                 WORK( IVT ) is M by M
*
NWORK = IVT + LDWKVT*M
IL = NWORK
*
*                 WORK(IL) is M by CHUNK
*
CHUNK = ( LWORK-M*M-3*M ) / M
END IF
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in WORK(IVT)
*              (Workspace: need M*M+BDSPAC)
*
CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU,
\$                      WORK( IVT ), LDWKVT, DUM, IDUM,
\$                      WORK( NWORK ), IWORK, INFO )
*
*              Overwrite U by left singular vectors of A
*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
*
IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
*
*                 Overwrite WORK(IVT) by left singular vectors of A
*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
\$                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
\$                         WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*                 Copy right singular vectors of A from WORK(IVT) to A
*
CALL SLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
ELSE
*
*                 Generate P**T in A
*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
\$                         WORK( NWORK ), LWORK-NWORK+1, IERR )
*
*                 Multiply Q in A by right singular vectors of
*                 bidiagonal matrix in WORK(IVT), storing result in
*                 WORK(IL) and copying to A
*                 (Workspace: need 2*M*M, prefer M*M+M*N)
*
DO 40 I = 1, N, CHUNK
BLK = MIN( N-I+1, CHUNK )
CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ),
\$                           LDWKVT, A( 1, I ), LDA, ZERO,
\$                           WORK( IL ), M )
CALL SLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ),
\$                            LDA )
40             CONTINUE
END IF
ELSE IF( WNTQS ) THEN
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              (Workspace: need M+BDSPAC)
*
CALL SLASET( 'F', M, N, ZERO, ZERO, VT, LDVT )
CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
\$                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              (Workspace: need 3*M, prefer 2*M+M*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
ELSE IF( WNTQA ) THEN
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              (Workspace: need M+BDSPAC)
*
CALL SLASET( 'F', N, N, ZERO, ZERO, VT, LDVT )
CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
\$                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
\$                      INFO )
*
*              Set the right corner of VT to identity matrix
*
IF( N.GT.M ) THEN
CALL SLASET( 'F', N-M, N-M, ZERO, ONE, VT( M+1, M+1 ),
\$                         LDVT )
END IF
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              (Workspace: need 2*M+N, prefer 2*M+N*NB)
*
CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
\$                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
\$                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
\$                      LWORK-NWORK+1, IERR )
END IF
*
END IF
*
END IF
*
*     Undo scaling if necessary
*
IF( ISCL.EQ.1 ) THEN
IF( ANRM.GT.BIGNUM )
\$      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
\$                   IERR )
IF( ANRM.LT.SMLNUM )
\$      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
\$                   IERR )
END IF
*
*     Return optimal workspace in WORK(1)
*
WORK( 1 ) = MAXWRK
*
RETURN
*
*     End of SGESDD
*
END

```