d5/d1f/cgesvd_8f_source.html

*> \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b>

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download CGESVD + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,

*                          WORK, LWORK, RWORK, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER          JOBU, JOBVT

*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N

*       ..

*       .. Array Arguments ..

*       REAL               RWORK( * ), S( * )

*       COMPLEX            A( LDA, * ), U( LDU, * ), VT( LDVT, * ),

*      $                   WORK( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> CGESVD computes the singular value decomposition (SVD) of a complex

*> M-by-N matrix A, optionally computing the left and/or right singular

*> vectors. The SVD is written

*>

*>      A = U * SIGMA * conjugate-transpose(V)

*>

*> where SIGMA is an M-by-N matrix which is zero except for its

*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and

*> V is an N-by-N unitary matrix.  The diagonal elements of SIGMA

*> are the singular values of A; they are real and non-negative, and

*> are returned in descending order.  The first min(m,n) columns of

*> U and V are the left and right singular vectors of A.

*>

*> Note that the routine returns V**H, not V.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] JOBU

*> \verbatim

*>          JOBU is CHARACTER*1

*>          Specifies options for computing all or part of the matrix U:

*>          = 'A':  all M columns of U are returned in array U:

*>          = 'S':  the first min(m,n) columns of U (the left singular

*>                  vectors) are returned in the array U;

*>          = 'O':  the first min(m,n) columns of U (the left singular

*>                  vectors) are overwritten on the array A;

*>          = 'N':  no columns of U (no left singular vectors) are

*>                  computed.

*> \endverbatim

*>

*> \param[in] JOBVT

*> \verbatim

*>          JOBVT is CHARACTER*1

*>          Specifies options for computing all or part of the matrix

*>          V**H:

*>          = 'A':  all N rows of V**H are returned in the array VT;

*>          = 'S':  the first min(m,n) rows of V**H (the right singular

*>                  vectors) are returned in the array VT;

*>          = 'O':  the first min(m,n) rows of V**H (the right singular

*>                  vectors) are overwritten on the array A;

*>          = 'N':  no rows of V**H (no right singular vectors) are

*>                  computed.

*>

*>          JOBVT and JOBU cannot both be 'O'.

*> \endverbatim

*>

*> \param[in] M

*> \verbatim

*>          M is INTEGER

*>          The number of rows of the input matrix A.  M >= 0.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The number of columns of the input matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is COMPLEX array, dimension (LDA,N)

*>          On entry, the M-by-N matrix A.

*>          On exit,

*>          if JOBU = 'O',  A is overwritten with the first min(m,n)

*>                          columns of U (the left singular vectors,

*>                          stored columnwise);

*>          if JOBVT = 'O', A is overwritten with the first min(m,n)

*>                          rows of V**H (the right singular vectors,

*>                          stored rowwise);

*>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A

*>                          are destroyed.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,M).

*> \endverbatim

*>

*> \param[out] S

*> \verbatim

*>          S is REAL array, dimension (min(M,N))

*>          The singular values of A, sorted so that S(i) >= S(i+1).

*> \endverbatim

*>

*> \param[out] U

*> \verbatim

*>          U is COMPLEX array, dimension (LDU,UCOL)

*>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.

*>          If JOBU = 'A', U contains the M-by-M unitary matrix U;

*>          if JOBU = 'S', U contains the first min(m,n) columns of U

*>          (the left singular vectors, stored columnwise);

*>          if JOBU = 'N' or 'O', U is not referenced.

*> \endverbatim

*>

*> \param[in] LDU

*> \verbatim

*>          LDU is INTEGER

*>          The leading dimension of the array U.  LDU >= 1; if

*>          JOBU = 'S' or 'A', LDU >= M.

*> \endverbatim

*>

*> \param[out] VT

*> \verbatim

*>          VT is COMPLEX array, dimension (LDVT,N)

*>          If JOBVT = 'A', VT contains the N-by-N unitary matrix

*>          V**H;

*>          if JOBVT = 'S', VT contains the first min(m,n) rows of

*>          V**H (the right singular vectors, stored rowwise);

*>          if JOBVT = 'N' or 'O', VT is not referenced.

*> \endverbatim

*>

*> \param[in] LDVT

*> \verbatim

*>          LDVT is INTEGER

*>          The leading dimension of the array VT.  LDVT >= 1; if

*>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is COMPLEX array, dimension (MAX(1,LWORK))

*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

*> \endverbatim

*>

*> \param[in] LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The dimension of the array WORK.

*>          LWORK >=  MAX(1,2*MIN(M,N)+MAX(M,N)).

*>          For good performance, LWORK should generally be larger.

*>

*>          If LWORK = -1, then a workspace query is assumed; the routine

*>          only calculates the optimal size of the WORK array, returns

*>          this value as the first entry of the WORK array, and no error

*>          message related to LWORK is issued by XERBLA.

*> \endverbatim

*>

*> \param[out] RWORK

*> \verbatim

*>          RWORK is REAL array, dimension (5*min(M,N))

*>          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the

*>          unconverged superdiagonal elements of an upper bidiagonal

*>          matrix B whose diagonal is in S (not necessarily sorted).

*>          B satisfies A = U * B * VT, so it has the same singular

*>          values as A, and singular vectors related by U and VT.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit.

*>          < 0:  if INFO = -i, the i-th argument had an illegal value.

*>          > 0:  if CBDSQR did not converge, INFO specifies how many

*>                superdiagonals of an intermediate bidiagonal form B

*>                did not converge to zero. See the description of RWORK

*>                above for details.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \date April 2012

*

*> \ingroup complexGEsing

*

*  =====================================================================

      SUBROUTINE cgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,

     $                   work, lwork, rwork, info )

*

*  -- LAPACK driver routine (version 3.4.1) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*     April 2012

*

*     .. Scalar Arguments ..

      CHARACTER          jobu, jobvt

      INTEGER            info, lda, ldu, ldvt, lwork, m, n

*     ..

*     .. Array Arguments ..

      REAL               rwork( * ), s( * )

      COMPLEX            a( lda, * ), u( ldu, * ), vt( ldvt, * ),

     $                   work( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      COMPLEX            czero, cone

      parameter( czero = ( 0.0e0, 0.0e0 ),

     $                   cone = ( 1.0e0, 0.0e0 ) )

      REAL               zero, one

      parameter( zero = 0.0e0, one = 1.0e0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            lquery, wntua, wntuas, wntun, wntuo, wntus,

     $                   wntva, wntvas, wntvn, wntvo, wntvs

      INTEGER            blk, chunk, i, ie, ierr, ir, irwork, iscl,

     $                   itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,

     $                   maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,

     $                   nrvt, wrkbl

      INTEGER            lwork_cgeqrf, lwork_cungqr_n, lwork_cungqr_m,

     $                   lwork_cgebrd, lwork_cungbr_p, lwork_cungbr_q,

     $                   lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m

      REAL               anrm, bignum, eps, smlnum

*     ..

*     .. Local Arrays ..

      REAL               dum( 1 )

      COMPLEX            cdum( 1 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf, clacpy,

     $                   clascl, claset, cungbr, cunglq, cungqr, cunmbr,

     $                   slascl, xerbla

*     ..

*     .. External Functions ..

      LOGICAL            lsame

      INTEGER            ilaenv

      REAL               clange, slamch

      EXTERNAL           lsame, ilaenv, clange, slamch

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, sqrt

*     ..

*     .. Executable Statements ..

*

*     Test the input arguments

*

      info = 0

      minmn = min( m, n )

      wntua = lsame( jobu, 'A' )

      wntus = lsame( jobu, 'S' )

      wntuas = wntua .OR. wntus

      wntuo = lsame( jobu, 'O' )

      wntun = lsame( jobu, 'N' )

      wntva = lsame( jobvt, 'A' )

      wntvs = lsame( jobvt, 'S' )

      wntvas = wntva .OR. wntvs

      wntvo = lsame( jobvt, 'O' )

      wntvn = lsame( jobvt, 'N' )

      lquery = ( lwork.EQ.-1 )

*

      IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN

         info = -1

      ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.

     $         ( wntvo .AND. wntuo ) ) THEN

         info = -2

      ELSE IF( m.LT.0 ) THEN

         info = -3

      ELSE IF( n.LT.0 ) THEN

         info = -4

      ELSE IF( lda.LT.max( 1, m ) ) THEN

         info = -6

      ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN

         info = -9

      ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.

     $         ( wntvs .AND. ldvt.LT.minmn ) ) THEN

         info = -11

      END IF

*

*     Compute workspace

*      (Note: Comments in the code beginning "Workspace:" describe the

*       minimal amount of workspace needed at that point in the code,

*       as well as the preferred amount for good performance.

*       CWorkspace refers to complex workspace, and RWorkspace to

*       real workspace. NB refers to the optimal block size for the

*       immediately following subroutine, as returned by ILAENV.)

*

      IF( info.EQ.0 ) THEN

         minwrk = 1

         maxwrk = 1

         IF( m.GE.n .AND. minmn.GT.0 ) THEN

*

*           Space needed for ZBDSQR is BDSPAC = 5*N

*

            mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )

*           Compute space needed for CGEQRF

            CALL cgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )

            lwork_cgeqrf=dum(1)

*           Compute space needed for CUNGQR

            CALL cungqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )

            lwork_cungqr_n=dum(1)

            CALL cungqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )

            lwork_cungqr_m=dum(1)

*           Compute space needed for CGEBRD

            CALL cgebrd( n, n, a, lda, s, dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

            lwork_cgebrd=dum(1)

*           Compute space needed for CUNGBR

            CALL cungbr( 'P', n, n, n, a, lda, dum(1),

     $                   dum(1), -1, ierr )

            lwork_cungbr_p=dum(1)

            CALL cungbr( 'Q', n, n, n, a, lda, dum(1),

     $                   dum(1), -1, ierr )

            lwork_cungbr_q=dum(1)

*

            mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )

            IF( m.GE.mnthr ) THEN

               IF( wntun ) THEN

*

*                 Path 1 (M much larger than N, JOBU='N')

*

                  maxwrk = n + lwork_cgeqrf

                  maxwrk = max( maxwrk, 2*n+lwork_cgebrd )

                  IF( wntvo .OR. wntvas )

     $               maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )

                  minwrk = 3*n

               ELSE IF( wntuo .AND. wntvn ) THEN

*

*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_n )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  maxwrk = max( n*n+wrkbl, n*n+m*n )

                  minwrk = 2*n + m

               ELSE IF( wntuo .AND. wntvas ) THEN

*

*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or

*                 'A')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_n )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )

                  maxwrk = max( n*n+wrkbl, n*n+m*n )

                  minwrk = 2*n + m

               ELSE IF( wntus .AND. wntvn ) THEN

*

*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_n )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  maxwrk = n*n + wrkbl

                  minwrk = 2*n + m

               ELSE IF( wntus .AND. wntvo ) THEN

*

*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_n )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )

                  maxwrk = 2*n*n + wrkbl

                  minwrk = 2*n + m

               ELSE IF( wntus .AND. wntvas ) THEN

*

*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or

*                 'A')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_n )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )

                  maxwrk = n*n + wrkbl

                  minwrk = 2*n + m

               ELSE IF( wntua .AND. wntvn ) THEN

*

*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_m )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  maxwrk = n*n + wrkbl

                  minwrk = 2*n + m

               ELSE IF( wntua .AND. wntvo ) THEN

*

*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_m )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )

                  maxwrk = 2*n*n + wrkbl

                  minwrk = 2*n + m

               ELSE IF( wntua .AND. wntvas ) THEN

*

*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or

*                 'A')

*

                  wrkbl = n + lwork_cgeqrf

                  wrkbl = max( wrkbl, n+lwork_cungqr_m )

                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )

                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )

                  maxwrk = n*n + wrkbl

                  minwrk = 2*n + m

               END IF

            ELSE

*

*              Path 10 (M at least N, but not much larger)

*

               CALL cgebrd( m, n, a, lda, s, dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

               lwork_cgebrd=dum(1)

               maxwrk = 2*n + lwork_cgebrd

               IF( wntus .OR. wntuo ) THEN

                  CALL cungbr( 'Q', m, n, n, a, lda, dum(1),

     $                   dum(1), -1, ierr )

                  lwork_cungbr_q=dum(1)

                  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )

               END IF

               IF( wntua ) THEN

                  CALL cungbr( 'Q', m, m, n, a, lda, dum(1),

     $                   dum(1), -1, ierr )

                  lwork_cungbr_q=dum(1)

                  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )

               END IF

               IF( .NOT.wntvn ) THEN

                  maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )

               minwrk = 2*n + m

               END IF

            END IF

         ELSE IF( minmn.GT.0 ) THEN

*

*           Space needed for CBDSQR is BDSPAC = 5*M

*

            mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )

*           Compute space needed for CGELQF

            CALL cgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )

            lwork_cgelqf=dum(1)

*           Compute space needed for CUNGLQ

            CALL cunglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )

            lwork_cunglq_n=dum(1)

            CALL cunglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )

            lwork_cunglq_m=dum(1)

*           Compute space needed for CGEBRD

            CALL cgebrd( m, m, a, lda, s, dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

            lwork_cgebrd=dum(1)

*            Compute space needed for CUNGBR P

            CALL cungbr( 'P', m, m, m, a, n, dum(1),

     $                   dum(1), -1, ierr )

            lwork_cungbr_p=dum(1)

*           Compute space needed for CUNGBR Q

            CALL cungbr( 'Q', m, m, m, a, n, dum(1),

     $                   dum(1), -1, ierr )

            lwork_cungbr_q=dum(1)

            IF( n.GE.mnthr ) THEN

               IF( wntvn ) THEN

*

*                 Path 1t(N much larger than M, JOBVT='N')

*

                  maxwrk = m + lwork_cgelqf

                  maxwrk = max( maxwrk, 2*m+lwork_cgebrd )

                  IF( wntuo .OR. wntuas )

     $               maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )

                  minwrk = 3*m

               ELSE IF( wntvo .AND. wntun ) THEN

*

*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_m )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  maxwrk = max( m*m+wrkbl, m*m+m*n )

                  minwrk = 2*m + n

               ELSE IF( wntvo .AND. wntuas ) THEN

*

*                 Path 3t(N much larger than M, JOBU='S' or 'A',

*                 JOBVT='O')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_m )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )

                  maxwrk = max( m*m+wrkbl, m*m+m*n )

                  minwrk = 2*m + n

               ELSE IF( wntvs .AND. wntun ) THEN

*

*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_m )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  maxwrk = m*m + wrkbl

                  minwrk = 2*m + n

               ELSE IF( wntvs .AND. wntuo ) THEN

*

*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_m )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )

                  maxwrk = 2*m*m + wrkbl

                  minwrk = 2*m + n

               ELSE IF( wntvs .AND. wntuas ) THEN

*

*                 Path 6t(N much larger than M, JOBU='S' or 'A',

*                 JOBVT='S')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_m )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )

                  maxwrk = m*m + wrkbl

                  minwrk = 2*m + n

               ELSE IF( wntva .AND. wntun ) THEN

*

*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_n )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  maxwrk = m*m + wrkbl

                  minwrk = 2*m + n

               ELSE IF( wntva .AND. wntuo ) THEN

*

*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_n )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )

                  maxwrk = 2*m*m + wrkbl

                  minwrk = 2*m + n

               ELSE IF( wntva .AND. wntuas ) THEN

*

*                 Path 9t(N much larger than M, JOBU='S' or 'A',

*                 JOBVT='A')

*

                  wrkbl = m + lwork_cgelqf

                  wrkbl = max( wrkbl, m+lwork_cunglq_n )

                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )

                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )

                  maxwrk = m*m + wrkbl

                  minwrk = 2*m + n

               END IF

            ELSE

*

*              Path 10t(N greater than M, but not much larger)

*

               CALL cgebrd( m, n, a, lda, s, dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

               lwork_cgebrd=dum(1)

               maxwrk = 2*m + lwork_cgebrd

               IF( wntvs .OR. wntvo ) THEN

*                Compute space needed for CUNGBR P

                 CALL cungbr( 'P', m, n, m, a, n, dum(1),

     $                   dum(1), -1, ierr )

                 lwork_cungbr_p=dum(1)

                 maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )

               END IF

               IF( wntva ) THEN

                 CALL cungbr( 'P', n,  n, m, a, n, dum(1),

     $                   dum(1), -1, ierr )

                 lwork_cungbr_p=dum(1)

                 maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )

               END IF

               IF( .NOT.wntun ) THEN

                  maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )

               minwrk = 2*m + n

               END IF

            END IF

         END IF

         maxwrk = max( minwrk, maxwrk )

         work( 1 ) = maxwrk

*

         IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN

            info = -13

         END IF

      END IF

*

      IF( info.NE.0 ) THEN

         CALL xerbla( 'CGESVD', -info )

         return

      ELSE IF( lquery ) THEN

         return

      END IF

*

*     Quick return if possible

*

      IF( m.EQ.0 .OR. n.EQ.0 ) THEN

         return

      END IF

*

*     Get machine constants

*

      eps = slamch( 'P' )

      smlnum = sqrt( slamch( 'S' ) ) / eps

      bignum = one / smlnum

*

*     Scale A if max element outside range [SMLNUM,BIGNUM]

*

      anrm = clange( 'M', m, n, a, lda, dum )

      iscl = 0

      IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN

         iscl = 1

         CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )

      ELSE IF( anrm.GT.bignum ) THEN

         iscl = 1

         CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )

      END IF

*

      IF( m.GE.n ) THEN

*

*        A has at least as many rows as columns. If A has sufficiently

*        more rows than columns, first reduce using the QR

*        decomposition (if sufficient workspace available)

*

         IF( m.GE.mnthr ) THEN

*

            IF( wntun ) THEN

*

*              Path 1 (M much larger than N, JOBU='N')

*              No left singular vectors to be computed

*

               itau = 1

               iwork = itau + n

*

*              Compute A=Q*R

*              (CWorkspace: need 2*N, prefer N+N*NB)

*              (RWorkspace: need 0)

*

               CALL cgeqrf( m, n, a, lda, work( itau ), work( iwork ),

     $                      lwork-iwork+1, ierr )

*

*              Zero out below R

*

               CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),

     $                      lda )

               ie = 1

               itauq = 1

               itaup = itauq + n

               iwork = itaup + n

*

*              Bidiagonalize R in A

*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*              (RWorkspace: need N)

*

               CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),

     $                      work( itaup ), work( iwork ), lwork-iwork+1,

     $                      ierr )

               ncvt = 0

               IF( wntvo .OR. wntvas ) THEN

*

*                 If right singular vectors desired, generate P'.

*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ncvt = n

               END IF

               irwork = ie + n

*

*              Perform bidiagonal QR iteration, computing right

*              singular vectors of A in A if desired

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,

     $                      cdum, 1, cdum, 1, rwork( irwork ), info )

*

*              If right singular vectors desired in VT, copy them there

*

               IF( wntvas )

     $            CALL clacpy( 'F', n, n, a, lda, vt, ldvt )

*

            ELSE IF( wntuo .AND. wntvn ) THEN

*

*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')

*              N left singular vectors to be overwritten on A and

*              no right singular vectors to be computed

*

               IF( lwork.GE.n*n+3*n ) THEN

*

*                 Sufficient workspace for a fast algorithm

*

                  ir = 1

                  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN

*

*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N

*

                     ldwrku = lda

                     ldwrkr = lda

                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN

*

*                    WORK(IU) is LDA by N, WORK(IR) is N by N

*

                     ldwrku = lda

                     ldwrkr = n

                  ELSE

*

*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N

*

                     ldwrku = ( lwork-n*n ) / n

                     ldwrkr = n

                  END IF

                  itau = ir + ldwrkr*n

                  iwork = itau + n

*

*                 Compute A=Q*R

*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cgeqrf( m, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Copy R to WORK(IR) and zero out below it

*

                  CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )

                  CALL claset( 'L', n-1, n-1, czero, czero,

     $                         work( ir+1 ), ldwrkr )

*

*                 Generate Q in A

*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cungqr( m, n, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ie = 1

                  itauq = itau

                  itaup = itauq + n

                  iwork = itaup + n

*

*                 Bidiagonalize R in WORK(IR)

*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)

*                 (RWorkspace: need N)

*

                  CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Generate left vectors bidiagonalizing R

*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)

*                 (RWorkspace: need 0)

*

                  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,

     $                         work( itauq ), work( iwork ),

     $                         lwork-iwork+1, ierr )

                  irwork = ie + n

*

*                 Perform bidiagonal QR iteration, computing left

*                 singular vectors of R in WORK(IR)

*                 (CWorkspace: need N*N)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,

     $                         work( ir ), ldwrkr, cdum, 1,

     $                         rwork( irwork ), info )

                  iu = itauq

*

*                 Multiply Q in A by left singular vectors of R in

*                 WORK(IR), storing result in WORK(IU) and copying to A

*                 (CWorkspace: need N*N+N, prefer N*N+M*N)

*                 (RWorkspace: 0)

*

                  DO 10 i = 1, m, ldwrku

                     chunk = min( m-i+1, ldwrku )

                     CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),

     $                           lda, work( ir ), ldwrkr, czero,

     $                           work( iu ), ldwrku )

                     CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,

     $                            a( i, 1 ), lda )

   10             continue

*

               ELSE

*

*                 Insufficient workspace for a fast algorithm

*

                  ie = 1

                  itauq = 1

                  itaup = itauq + n

                  iwork = itaup + n

*

*                 Bidiagonalize A

*                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)

*                 (RWorkspace: N)

*

                  CALL cgebrd( m, n, a, lda, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Generate left vectors bidiagonalizing A

*                 (CWorkspace: need 3*N, prefer 2*N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  irwork = ie + n

*

*                 Perform bidiagonal QR iteration, computing left

*                 singular vectors of A in A

*                 (CWorkspace: need 0)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,

     $                         a, lda, cdum, 1, rwork( irwork ), info )

*

               END IF

*

            ELSE IF( wntuo .AND. wntvas ) THEN

*

*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')

*              N left singular vectors to be overwritten on A and

*              N right singular vectors to be computed in VT

*

               IF( lwork.GE.n*n+3*n ) THEN

*

*                 Sufficient workspace for a fast algorithm

*

                  ir = 1

                  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN

*

*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N

*

                     ldwrku = lda

                     ldwrkr = lda

                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN

*

*                    WORK(IU) is LDA by N and WORK(IR) is N by N

*

                     ldwrku = lda

                     ldwrkr = n

                  ELSE

*

*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N

*

                     ldwrku = ( lwork-n*n ) / n

                     ldwrkr = n

                  END IF

                  itau = ir + ldwrkr*n

                  iwork = itau + n

*

*                 Compute A=Q*R

*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cgeqrf( m, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Copy R to VT, zeroing out below it

*

                  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )

                  IF( n.GT.1 )

     $               CALL claset( 'L', n-1, n-1, czero, czero,

     $                            vt( 2, 1 ), ldvt )

*

*                 Generate Q in A

*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cungqr( m, n, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ie = 1

                  itauq = itau

                  itaup = itauq + n

                  iwork = itaup + n

*

*                 Bidiagonalize R in VT, copying result to WORK(IR)

*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)

*                 (RWorkspace: need N)

*

                  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  CALL clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )

*

*                 Generate left vectors bidiagonalizing R in WORK(IR)

*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,

     $                         work( itauq ), work( iwork ),

     $                         lwork-iwork+1, ierr )

*

*                 Generate right vectors bidiagonalizing R in VT

*                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  irwork = ie + n

*

*                 Perform bidiagonal QR iteration, computing left

*                 singular vectors of R in WORK(IR) and computing right

*                 singular vectors of R in VT

*                 (CWorkspace: need N*N)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,

     $                         ldvt, work( ir ), ldwrkr, cdum, 1,

     $                         rwork( irwork ), info )

                  iu = itauq

*

*                 Multiply Q in A by left singular vectors of R in

*                 WORK(IR), storing result in WORK(IU) and copying to A

*                 (CWorkspace: need N*N+N, prefer N*N+M*N)

*                 (RWorkspace: 0)

*

                  DO 20 i = 1, m, ldwrku

                     chunk = min( m-i+1, ldwrku )

                     CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),

     $                           lda, work( ir ), ldwrkr, czero,

     $                           work( iu ), ldwrku )

                     CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,

     $                            a( i, 1 ), lda )

   20             continue

*

               ELSE

*

*                 Insufficient workspace for a fast algorithm

*

                  itau = 1

                  iwork = itau + n

*

*                 Compute A=Q*R

*                 (CWorkspace: need 2*N, prefer N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cgeqrf( m, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Copy R to VT, zeroing out below it

*

                  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )

                  IF( n.GT.1 )

     $               CALL claset( 'L', n-1, n-1, czero, czero,

     $                            vt( 2, 1 ), ldvt )

*

*                 Generate Q in A

*                 (CWorkspace: need 2*N, prefer N+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cungqr( m, n, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ie = 1

                  itauq = itau

                  itaup = itauq + n

                  iwork = itaup + n

*

*                 Bidiagonalize R in VT

*                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                 (RWorkspace: N)

*

                  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Multiply Q in A by left vectors bidiagonalizing R

*                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,

     $                         work( itauq ), a, lda, work( iwork ),

     $                         lwork-iwork+1, ierr )

*

*                 Generate right vectors bidiagonalizing R in VT

*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  irwork = ie + n

*

*                 Perform bidiagonal QR iteration, computing left

*                 singular vectors of A in A and computing right

*                 singular vectors of A in VT

*                 (CWorkspace: 0)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,

     $                         ldvt, a, lda, cdum, 1, rwork( irwork ),

     $                         info )

*

               END IF

*

            ELSE IF( wntus ) THEN

*

               IF( wntvn ) THEN

*

*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')

*                 N left singular vectors to be computed in U and

*                 no right singular vectors to be computed

*

                  IF( lwork.GE.n*n+3*n ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     ir = 1

                     IF( lwork.GE.wrkbl+lda*n ) THEN

*

*                       WORK(IR) is LDA by N

*

                        ldwrkr = lda

                     ELSE

*

*                       WORK(IR) is N by N

*

                        ldwrkr = n

                     END IF

                     itau = ir + ldwrkr*n

                     iwork = itau + n

*

*                    Compute A=Q*R

*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R to WORK(IR), zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, work( ir ),

     $                            ldwrkr )

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            work( ir+1 ), ldwrkr )

*

*                    Generate Q in A

*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, n, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in WORK(IR)

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, work( ir ), ldwrkr, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate left vectors bidiagonalizing R in WORK(IR)

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of R in WORK(IR)

*                    (CWorkspace: need N*N)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,

     $                            1, work( ir ), ldwrkr, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply Q in A by left singular vectors of R in

*                    WORK(IR), storing result in U

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,

     $                           work( ir ), ldwrkr, czero, u, ldu )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, n, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Zero out below R in A

*

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            a( 2, 1 ), lda )

*

*                    Bidiagonalize R in A

*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply Q in U by left vectors bidiagonalizing R

*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,

     $                            work( itauq ), u, ldu, work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,

     $                            1, u, ldu, cdum, 1, rwork( irwork ),

     $                            info )

*

                  END IF

*

               ELSE IF( wntvo ) THEN

*

*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')

*                 N left singular vectors to be computed in U and

*                 N right singular vectors to be overwritten on A

*

                  IF( lwork.GE.2*n*n+3*n ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+2*lda*n ) THEN

*

*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N

*

                        ldwrku = lda

                        ir = iu + ldwrku*n

                        ldwrkr = lda

                     ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN

*

*                       WORK(IU) is LDA by N and WORK(IR) is N by N

*

                        ldwrku = lda

                        ir = iu + ldwrku*n

                        ldwrkr = n

                     ELSE

*

*                       WORK(IU) is N by N and WORK(IR) is N by N

*

                        ldwrku = n

                        ir = iu + ldwrku*n

                        ldwrkr = n

                     END IF

                     itau = ir + ldwrkr*n

                     iwork = itau + n

*

*                    Compute A=Q*R

*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R to WORK(IU), zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            work( iu+1 ), ldwrku )

*

*                    Generate Q in A

*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, n, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in WORK(IU), copying result to

*                    WORK(IR)

*                    (CWorkspace: need   2*N*N+3*N,

*                                 prefer 2*N*N+2*N+2*N*NB)

*                    (RWorkspace: need   N)

*

                     CALL cgebrd( n, n, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'U', n, n, work( iu ), ldwrku,

     $                            work( ir ), ldwrkr )

*

*                    Generate left bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in WORK(IR)

*                    (CWorkspace: need   2*N*N+3*N-1,

*                                 prefer 2*N*N+2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of R in WORK(IU) and computing

*                    right singular vectors of R in WORK(IR)

*                    (CWorkspace: need 2*N*N)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),

     $                            work( ir ), ldwrkr, work( iu ),

     $                            ldwrku, cdum, 1, rwork( irwork ),

     $                            info )

*

*                    Multiply Q in A by left singular vectors of R in

*                    WORK(IU), storing result in U

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,

     $                           work( iu ), ldwrku, czero, u, ldu )

*

*                    Copy right singular vectors of R to A

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,

     $                            lda )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, n, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Zero out below R in A

*

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            a( 2, 1 ), lda )

*

*                    Bidiagonalize R in A

*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply Q in U by left vectors bidiagonalizing R

*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,

     $                            work( itauq ), u, ldu, work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right vectors bidiagonalizing R in A

*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U and computing right

*                    singular vectors of A in A

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,

     $                            lda, u, ldu, cdum, 1, rwork( irwork ),

     $                            info )

*

                  END IF

*

               ELSE IF( wntvas ) THEN

*

*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'

*                         or 'A')

*                 N left singular vectors to be computed in U and

*                 N right singular vectors to be computed in VT

*

                  IF( lwork.GE.n*n+3*n ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+lda*n ) THEN

*

*                       WORK(IU) is LDA by N

*

                        ldwrku = lda

                     ELSE

*

*                       WORK(IU) is N by N

*

                        ldwrku = n

                     END IF

                     itau = iu + ldwrku*n

                     iwork = itau + n

*

*                    Compute A=Q*R

*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R to WORK(IU), zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            work( iu+1 ), ldwrku )

*

*                    Generate Q in A

*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, n, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in WORK(IU), copying result to VT

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,

     $                            ldvt )

*

*                    Generate left bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in VT

*                    (CWorkspace: need   N*N+3*N-1,

*                                 prefer N*N+2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of R in WORK(IU) and computing

*                    right singular vectors of R in VT

*                    (CWorkspace: need N*N)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,

     $                            ldvt, work( iu ), ldwrku, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply Q in A by left singular vectors of R in

*                    WORK(IU), storing result in U

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,

     $                           work( iu ), ldwrku, czero, u, ldu )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, n, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R to VT, zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, vt, ldvt )

                     IF( n.GT.1 )

     $                  CALL claset( 'L', n-1, n-1, czero, czero,

     $                               vt( 2, 1 ), ldvt )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in VT

*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply Q in U by left bidiagonalizing vectors

*                    in VT

*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,

     $                            work( itauq ), u, ldu, work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in VT

*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U and computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,

     $                            ldvt, u, ldu, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               END IF

*

            ELSE IF( wntua ) THEN

*

               IF( wntvn ) THEN

*

*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')

*                 M left singular vectors to be computed in U and

*                 no right singular vectors to be computed

*

                  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     ir = 1

                     IF( lwork.GE.wrkbl+lda*n ) THEN

*

*                       WORK(IR) is LDA by N

*

                        ldwrkr = lda

                     ELSE

*

*                       WORK(IR) is N by N

*

                        ldwrkr = n

                     END IF

                     itau = ir + ldwrkr*n

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Copy R to WORK(IR), zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, work( ir ),

     $                            ldwrkr )

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            work( ir+1 ), ldwrkr )

*

*                    Generate Q in U

*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, m, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in WORK(IR)

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, work( ir ), ldwrkr, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in WORK(IR)

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of R in WORK(IR)

*                    (CWorkspace: need N*N)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,

     $                            1, work( ir ), ldwrkr, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply Q in U by left singular vectors of R in

*                    WORK(IR), storing result in A

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,

     $                           work( ir ), ldwrkr, czero, a, lda )

*

*                    Copy left singular vectors of A from A to U

*

                     CALL clacpy( 'F', m, n, a, lda, u, ldu )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need N+M, prefer N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, m, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Zero out below R in A

*

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            a( 2, 1 ), lda )

*

*                    Bidiagonalize R in A

*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply Q in U by left bidiagonalizing vectors

*                    in A

*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,

     $                            work( itauq ), u, ldu, work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,

     $                            1, u, ldu, cdum, 1, rwork( irwork ),

     $                            info )

*

                  END IF

*

               ELSE IF( wntvo ) THEN

*

*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')

*                 M left singular vectors to be computed in U and

*                 N right singular vectors to be overwritten on A

*

                  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+2*lda*n ) THEN

*

*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N

*

                        ldwrku = lda

                        ir = iu + ldwrku*n

                        ldwrkr = lda

                     ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN

*

*                       WORK(IU) is LDA by N and WORK(IR) is N by N

*

                        ldwrku = lda

                        ir = iu + ldwrku*n

                        ldwrkr = n

                     ELSE

*

*                       WORK(IU) is N by N and WORK(IR) is N by N

*

                        ldwrku = n

                        ir = iu + ldwrku*n

                        ldwrkr = n

                     END IF

                     itau = ir + ldwrkr*n

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, m, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R to WORK(IU), zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            work( iu+1 ), ldwrku )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in WORK(IU), copying result to

*                    WORK(IR)

*                    (CWorkspace: need   2*N*N+3*N,

*                                 prefer 2*N*N+2*N+2*N*NB)

*                    (RWorkspace: need   N)

*

                     CALL cgebrd( n, n, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'U', n, n, work( iu ), ldwrku,

     $                            work( ir ), ldwrkr )

*

*                    Generate left bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in WORK(IR)

*                    (CWorkspace: need   2*N*N+3*N-1,

*                                 prefer 2*N*N+2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of R in WORK(IU) and computing

*                    right singular vectors of R in WORK(IR)

*                    (CWorkspace: need 2*N*N)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),

     $                            work( ir ), ldwrkr, work( iu ),

     $                            ldwrku, cdum, 1, rwork( irwork ),

     $                            info )

*

*                    Multiply Q in U by left singular vectors of R in

*                    WORK(IU), storing result in A

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,

     $                           work( iu ), ldwrku, czero, a, lda )

*

*                    Copy left singular vectors of A from A to U

*

                     CALL clacpy( 'F', m, n, a, lda, u, ldu )

*

*                    Copy right singular vectors of R from WORK(IR) to A

*

                     CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,

     $                            lda )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need N+M, prefer N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, m, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Zero out below R in A

*

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            a( 2, 1 ), lda )

*

*                    Bidiagonalize R in A

*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply Q in U by left bidiagonalizing vectors

*                    in A

*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,

     $                            work( itauq ), u, ldu, work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in A

*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U and computing right

*                    singular vectors of A in A

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,

     $                            lda, u, ldu, cdum, 1, rwork( irwork ),

     $                            info )

*

                  END IF

*

               ELSE IF( wntvas ) THEN

*

*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'

*                         or 'A')

*                 M left singular vectors to be computed in U and

*                 N right singular vectors to be computed in VT

*

                  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+lda*n ) THEN

*

*                       WORK(IU) is LDA by N

*

                        ldwrku = lda

                     ELSE

*

*                       WORK(IU) is N by N

*

                        ldwrku = n

                     END IF

                     itau = iu + ldwrku*n

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, m, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R to WORK(IU), zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'L', n-1, n-1, czero, czero,

     $                            work( iu+1 ), ldwrku )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in WORK(IU), copying result to VT

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,

     $                            ldvt )

*

*                    Generate left bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in VT

*                    (CWorkspace: need   N*N+3*N-1,

*                                 prefer N*N+2*N+(N-1)*NB)

*                    (RWorkspace: need   0)

*

                     CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of R in WORK(IU) and computing

*                    right singular vectors of R in VT

*                    (CWorkspace: need N*N)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,

     $                            ldvt, work( iu ), ldwrku, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply Q in U by left singular vectors of R in

*                    WORK(IU), storing result in A

*                    (CWorkspace: need N*N)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,

     $                           work( iu ), ldwrku, czero, a, lda )

*

*                    Copy left singular vectors of A from A to U

*

                     CALL clacpy( 'F', m, n, a, lda, u, ldu )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + n

*

*                    Compute A=Q*R, copying result to U

*                    (CWorkspace: need 2*N, prefer N+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cgeqrf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, n, a, lda, u, ldu )

*

*                    Generate Q in U

*                    (CWorkspace: need N+M, prefer N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungqr( m, m, n, u, ldu, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy R from A to VT, zeroing out below it

*

                     CALL clacpy( 'U', n, n, a, lda, vt, ldvt )

                     IF( n.GT.1 )

     $                  CALL claset( 'L', n-1, n-1, czero, czero,

     $                               vt( 2, 1 ), ldvt )

                     ie = 1

                     itauq = itau

                     itaup = itauq + n

                     iwork = itaup + n

*

*                    Bidiagonalize R in VT

*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)

*                    (RWorkspace: need N)

*

                     CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply Q in U by left bidiagonalizing vectors

*                    in VT

*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,

     $                            work( itauq ), u, ldu, work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in VT

*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + n

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U and computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,

     $                            ldvt, u, ldu, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           M .LT. MNTHR

*

*           Path 10 (M at least N, but not much larger)

*           Reduce to bidiagonal form without QR decomposition

*

            ie = 1

            itauq = 1

            itaup = itauq + n

            iwork = itaup + n

*

*           Bidiagonalize A

*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)

*           (RWorkspace: need N)

*

            CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),

     $                   work( itaup ), work( iwork ), lwork-iwork+1,

     $                   ierr )

            IF( wntuas ) THEN

*

*              If left singular vectors desired in U, copy result to U

*              and generate left bidiagonalizing vectors in U

*              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)

*              (RWorkspace: 0)

*

               CALL clacpy( 'L', m, n, a, lda, u, ldu )

               IF( wntus )

     $            ncu = n

               IF( wntua )

     $            ncu = m

               CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            IF( wntvas ) THEN

*

*              If right singular vectors desired in VT, copy result to

*              VT and generate right bidiagonalizing vectors in VT

*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*              (RWorkspace: 0)

*

               CALL clacpy( 'U', n, n, a, lda, vt, ldvt )

               CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            IF( wntuo ) THEN

*

*              If left singular vectors desired in A, generate left

*              bidiagonalizing vectors in A

*              (CWorkspace: need 3*N, prefer 2*N+N*NB)

*              (RWorkspace: 0)

*

               CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            IF( wntvo ) THEN

*

*              If right singular vectors desired in A, generate right

*              bidiagonalizing vectors in A

*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)

*              (RWorkspace: 0)

*

               CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            irwork = ie + n

            IF( wntuas .OR. wntuo )

     $         nru = m

            IF( wntun )

     $         nru = 0

            IF( wntvas .OR. wntvo )

     $         ncvt = n

            IF( wntvn )

     $         ncvt = 0

            IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN

*

*              Perform bidiagonal QR iteration, if desired, computing

*              left singular vectors in U and computing right singular

*              vectors in VT

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,

     $                      ldvt, u, ldu, cdum, 1, rwork( irwork ),

     $                      info )

            ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN

*

*              Perform bidiagonal QR iteration, if desired, computing

*              left singular vectors in U and computing right singular

*              vectors in A

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,

     $                      lda, u, ldu, cdum, 1, rwork( irwork ),

     $                      info )

            ELSE

*

*              Perform bidiagonal QR iteration, if desired, computing

*              left singular vectors in A and computing right singular

*              vectors in VT

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,

     $                      ldvt, a, lda, cdum, 1, rwork( irwork ),

     $                      info )

            END IF

*

         END IF

*

      ELSE

*

*        A has more columns than rows. If A has sufficiently more

*        columns than rows, first reduce using the LQ decomposition (if

*        sufficient workspace available)

*

         IF( n.GE.mnthr ) THEN

*

            IF( wntvn ) THEN

*

*              Path 1t(N much larger than M, JOBVT='N')

*              No right singular vectors to be computed

*

               itau = 1

               iwork = itau + m

*

*              Compute A=L*Q

*              (CWorkspace: need 2*M, prefer M+M*NB)

*              (RWorkspace: 0)

*

               CALL cgelqf( m, n, a, lda, work( itau ), work( iwork ),

     $                      lwork-iwork+1, ierr )

*

*              Zero out above L

*

               CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),

     $                      lda )

               ie = 1

               itauq = 1

               itaup = itauq + m

               iwork = itaup + m

*

*              Bidiagonalize L in A

*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*              (RWorkspace: need M)

*

               CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),

     $                      work( itaup ), work( iwork ), lwork-iwork+1,

     $                      ierr )

               IF( wntuo .OR. wntuas ) THEN

*

*                 If left singular vectors desired, generate Q

*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),

     $                         work( iwork ), lwork-iwork+1, ierr )

               END IF

               irwork = ie + m

               nru = 0

               IF( wntuo .OR. wntuas )

     $            nru = m

*

*              Perform bidiagonal QR iteration, computing left singular

*              vectors of A in A if desired

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,

     $                      a, lda, cdum, 1, rwork( irwork ), info )

*

*              If left singular vectors desired in U, copy them there

*

               IF( wntuas )

     $            CALL clacpy( 'F', m, m, a, lda, u, ldu )

*

            ELSE IF( wntvo .AND. wntun ) THEN

*

*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')

*              M right singular vectors to be overwritten on A and

*              no left singular vectors to be computed

*

               IF( lwork.GE.m*m+3*m ) THEN

*

*                 Sufficient workspace for a fast algorithm

*

                  ir = 1

                  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN

*

*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M

*

                     ldwrku = lda

                     chunk = n

                     ldwrkr = lda

                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN

*

*                    WORK(IU) is LDA by N and WORK(IR) is M by M

*

                     ldwrku = lda

                     chunk = n

                     ldwrkr = m

                  ELSE

*

*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M

*

                     ldwrku = m

                     chunk = ( lwork-m*m ) / m

                     ldwrkr = m

                  END IF

                  itau = ir + ldwrkr*m

                  iwork = itau + m

*

*                 Compute A=L*Q

*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cgelqf( m, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Copy L to WORK(IR) and zero out above it

*

                  CALL clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )

                  CALL claset( 'U', m-1, m-1, czero, czero,

     $                         work( ir+ldwrkr ), ldwrkr )

*

*                 Generate Q in A

*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cunglq( m, n, m, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ie = 1

                  itauq = itau

                  itaup = itauq + m

                  iwork = itaup + m

*

*                 Bidiagonalize L in WORK(IR)

*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)

*                 (RWorkspace: need M)

*

                  CALL cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Generate right vectors bidiagonalizing L

*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,

     $                         work( itaup ), work( iwork ),

     $                         lwork-iwork+1, ierr )

                  irwork = ie + m

*

*                 Perform bidiagonal QR iteration, computing right

*                 singular vectors of L in WORK(IR)

*                 (CWorkspace: need M*M)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),

     $                         work( ir ), ldwrkr, cdum, 1, cdum, 1,

     $                         rwork( irwork ), info )

                  iu = itauq

*

*                 Multiply right singular vectors of L in WORK(IR) by Q

*                 in A, storing result in WORK(IU) and copying to A

*                 (CWorkspace: need M*M+M, prefer M*M+M*N)

*                 (RWorkspace: 0)

*

                  DO 30 i = 1, n, chunk

                     blk = min( n-i+1, chunk )

                     CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),

     $                           ldwrkr, a( 1, i ), lda, czero,

     $                           work( iu ), ldwrku )

                     CALL clacpy( 'F', m, blk, work( iu ), ldwrku,

     $                            a( 1, i ), lda )

   30             continue

*

               ELSE

*

*                 Insufficient workspace for a fast algorithm

*

                  ie = 1

                  itauq = 1

                  itaup = itauq + m

                  iwork = itaup + m

*

*                 Bidiagonalize A

*                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)

*                 (RWorkspace: need M)

*

                  CALL cgebrd( m, n, a, lda, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Generate right vectors bidiagonalizing A

*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  irwork = ie + m

*

*                 Perform bidiagonal QR iteration, computing right

*                 singular vectors of A in A

*                 (CWorkspace: 0)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,

     $                         cdum, 1, cdum, 1, rwork( irwork ), info )

*

               END IF

*

            ELSE IF( wntvo .AND. wntuas ) THEN

*

*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')

*              M right singular vectors to be overwritten on A and

*              M left singular vectors to be computed in U

*

               IF( lwork.GE.m*m+3*m ) THEN

*

*                 Sufficient workspace for a fast algorithm

*

                  ir = 1

                  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN

*

*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M

*

                     ldwrku = lda

                     chunk = n

                     ldwrkr = lda

                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN

*

*                    WORK(IU) is LDA by N and WORK(IR) is M by M

*

                     ldwrku = lda

                     chunk = n

                     ldwrkr = m

                  ELSE

*

*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M

*

                     ldwrku = m

                     chunk = ( lwork-m*m ) / m

                     ldwrkr = m

                  END IF

                  itau = ir + ldwrkr*m

                  iwork = itau + m

*

*                 Compute A=L*Q

*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cgelqf( m, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Copy L to U, zeroing about above it

*

                  CALL clacpy( 'L', m, m, a, lda, u, ldu )

                  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),

     $                         ldu )

*

*                 Generate Q in A

*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cunglq( m, n, m, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ie = 1

                  itauq = itau

                  itaup = itauq + m

                  iwork = itaup + m

*

*                 Bidiagonalize L in U, copying result to WORK(IR)

*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)

*                 (RWorkspace: need M)

*

                  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  CALL clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )

*

*                 Generate right vectors bidiagonalizing L in WORK(IR)

*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,

     $                         work( itaup ), work( iwork ),

     $                         lwork-iwork+1, ierr )

*

*                 Generate left vectors bidiagonalizing L in U

*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  irwork = ie + m

*

*                 Perform bidiagonal QR iteration, computing left

*                 singular vectors of L in U, and computing right

*                 singular vectors of L in WORK(IR)

*                 (CWorkspace: need M*M)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),

     $                         work( ir ), ldwrkr, u, ldu, cdum, 1,

     $                         rwork( irwork ), info )

                  iu = itauq

*

*                 Multiply right singular vectors of L in WORK(IR) by Q

*                 in A, storing result in WORK(IU) and copying to A

*                 (CWorkspace: need M*M+M, prefer M*M+M*N))

*                 (RWorkspace: 0)

*

                  DO 40 i = 1, n, chunk

                     blk = min( n-i+1, chunk )

                     CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),

     $                           ldwrkr, a( 1, i ), lda, czero,

     $                           work( iu ), ldwrku )

                     CALL clacpy( 'F', m, blk, work( iu ), ldwrku,

     $                            a( 1, i ), lda )

   40             continue

*

               ELSE

*

*                 Insufficient workspace for a fast algorithm

*

                  itau = 1

                  iwork = itau + m

*

*                 Compute A=L*Q

*                 (CWorkspace: need 2*M, prefer M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cgelqf( m, n, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Copy L to U, zeroing out above it

*

                  CALL clacpy( 'L', m, m, a, lda, u, ldu )

                  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),

     $                         ldu )

*

*                 Generate Q in A

*                 (CWorkspace: need 2*M, prefer M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cunglq( m, n, m, a, lda, work( itau ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  ie = 1

                  itauq = itau

                  itaup = itauq + m

                  iwork = itaup + m

*

*                 Bidiagonalize L in U

*                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                 (RWorkspace: need M)

*

                  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),

     $                         work( itauq ), work( itaup ),

     $                         work( iwork ), lwork-iwork+1, ierr )

*

*                 Multiply right vectors bidiagonalizing L by Q in A

*                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                 (RWorkspace: 0)

*

                  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,

     $                         work( itaup ), a, lda, work( iwork ),

     $                         lwork-iwork+1, ierr )

*

*                 Generate left vectors bidiagonalizing L in U

*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                 (RWorkspace: 0)

*

                  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),

     $                         work( iwork ), lwork-iwork+1, ierr )

                  irwork = ie + m

*

*                 Perform bidiagonal QR iteration, computing left

*                 singular vectors of A in U and computing right

*                 singular vectors of A in A

*                 (CWorkspace: 0)

*                 (RWorkspace: need BDSPAC)

*

                  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,

     $                         u, ldu, cdum, 1, rwork( irwork ), info )

*

               END IF

*

            ELSE IF( wntvs ) THEN

*

               IF( wntun ) THEN

*

*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')

*                 M right singular vectors to be computed in VT and

*                 no left singular vectors to be computed

*

                  IF( lwork.GE.m*m+3*m ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     ir = 1

                     IF( lwork.GE.wrkbl+lda*m ) THEN

*

*                       WORK(IR) is LDA by M

*

                        ldwrkr = lda

                     ELSE

*

*                       WORK(IR) is M by M

*

                        ldwrkr = m

                     END IF

                     itau = ir + ldwrkr*m

                     iwork = itau + m

*

*                    Compute A=L*Q

*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to WORK(IR), zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, work( ir ),

     $                            ldwrkr )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            work( ir+ldwrkr ), ldwrkr )

*

*                    Generate Q in A

*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( m, n, m, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in WORK(IR)

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, work( ir ), ldwrkr, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right vectors bidiagonalizing L in

*                    WORK(IR)

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing right

*                    singular vectors of L in WORK(IR)

*                    (CWorkspace: need M*M)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),

     $                            work( ir ), ldwrkr, cdum, 1, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply right singular vectors of L in WORK(IR) by

*                    Q in A, storing result in VT

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),

     $                           ldwrkr, a, lda, czero, vt, ldvt )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + m

*

*                    Compute A=L*Q

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy result to VT

*

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( m, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Zero out above L in A

*

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            a( 1, 2 ), lda )

*

*                    Bidiagonalize L in A

*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply right vectors bidiagonalizing L by Q in VT

*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,

     $                            work( itaup ), vt, ldvt,

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,

     $                            ldvt, cdum, 1, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               ELSE IF( wntuo ) THEN

*

*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')

*                 M right singular vectors to be computed in VT and

*                 M left singular vectors to be overwritten on A

*

                  IF( lwork.GE.2*m*m+3*m ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+2*lda*m ) THEN

*

*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M

*

                        ldwrku = lda

                        ir = iu + ldwrku*m

                        ldwrkr = lda

                     ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN

*

*                       WORK(IU) is LDA by M and WORK(IR) is M by M

*

                        ldwrku = lda

                        ir = iu + ldwrku*m

                        ldwrkr = m

                     ELSE

*

*                       WORK(IU) is M by M and WORK(IR) is M by M

*

                        ldwrku = m

                        ir = iu + ldwrku*m

                        ldwrkr = m

                     END IF

                     itau = ir + ldwrkr*m

                     iwork = itau + m

*

*                    Compute A=L*Q

*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to WORK(IU), zeroing out below it

*

                     CALL clacpy( 'L', m, m, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            work( iu+ldwrku ), ldwrku )

*

*                    Generate Q in A

*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( m, n, m, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in WORK(IU), copying result to

*                    WORK(IR)

*                    (CWorkspace: need   2*M*M+3*M,

*                                 prefer 2*M*M+2*M+2*M*NB)

*                    (RWorkspace: need   M)

*

                     CALL cgebrd( m, m, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, m, work( iu ), ldwrku,

     $                            work( ir ), ldwrkr )

*

*                    Generate right bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need   2*M*M+3*M-1,

*                                 prefer 2*M*M+2*M+(M-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in WORK(IR)

*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of L in WORK(IR) and computing

*                    right singular vectors of L in WORK(IU)

*                    (CWorkspace: need 2*M*M)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),

     $                            work( iu ), ldwrku, work( ir ),

     $                            ldwrkr, cdum, 1, rwork( irwork ),

     $                            info )

*

*                    Multiply right singular vectors of L in WORK(IU) by

*                    Q in A, storing result in VT

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),

     $                           ldwrku, a, lda, czero, vt, ldvt )

*

*                    Copy left singular vectors of L to A

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,

     $                            lda )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( m, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Zero out above L in A

*

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            a( 1, 2 ), lda )

*

*                    Bidiagonalize L in A

*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply right vectors bidiagonalizing L by Q in VT

*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,

     $                            work( itaup ), vt, ldvt,

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors of L in A

*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in A and computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,

     $                            ldvt, a, lda, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               ELSE IF( wntuas ) THEN

*

*                 Path 6t(N much larger than M, JOBU='S' or 'A',

*                         JOBVT='S')

*                 M right singular vectors to be computed in VT and

*                 M left singular vectors to be computed in U

*

                  IF( lwork.GE.m*m+3*m ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+lda*m ) THEN

*

*                       WORK(IU) is LDA by N

*

                        ldwrku = lda

                     ELSE

*

*                       WORK(IU) is LDA by M

*

                        ldwrku = m

                     END IF

                     itau = iu + ldwrku*m

                     iwork = itau + m

*

*                    Compute A=L*Q

*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to WORK(IU), zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            work( iu+ldwrku ), ldwrku )

*

*                    Generate Q in A

*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( m, n, m, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in WORK(IU), copying result to U

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,

     $                            ldu )

*

*                    Generate right bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need   M*M+3*M-1,

*                                 prefer M*M+2*M+(M-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in U

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of L in U and computing right

*                    singular vectors of L in WORK(IU)

*                    (CWorkspace: need M*M)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),

     $                            work( iu ), ldwrku, u, ldu, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply right singular vectors of L in WORK(IU) by

*                    Q in A, storing result in VT

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),

     $                           ldwrku, a, lda, czero, vt, ldvt )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( m, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to U, zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, u, ldu )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            u( 1, 2 ), ldu )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in U

*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, u, ldu, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply right bidiagonalizing vectors in U by Q

*                    in VT

*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,

     $                            work( itaup ), vt, ldvt,

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in U

*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U and computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,

     $                            ldvt, u, ldu, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               END IF

*

            ELSE IF( wntva ) THEN

*

               IF( wntun ) THEN

*

*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')

*                 N right singular vectors to be computed in VT and

*                 no left singular vectors to be computed

*

                  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     ir = 1

                     IF( lwork.GE.wrkbl+lda*m ) THEN

*

*                       WORK(IR) is LDA by M

*

                        ldwrkr = lda

                     ELSE

*

*                       WORK(IR) is M by M

*

                        ldwrkr = m

                     END IF

                     itau = ir + ldwrkr*m

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Copy L to WORK(IR), zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, work( ir ),

     $                            ldwrkr )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            work( ir+ldwrkr ), ldwrkr )

*

*                    Generate Q in VT

*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in WORK(IR)

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, work( ir ), ldwrkr, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate right bidiagonalizing vectors in WORK(IR)

*                    (CWorkspace: need   M*M+3*M-1,

*                                 prefer M*M+2*M+(M-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing right

*                    singular vectors of L in WORK(IR)

*                    (CWorkspace: need M*M)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),

     $                            work( ir ), ldwrkr, cdum, 1, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply right singular vectors of L in WORK(IR) by

*                    Q in VT, storing result in A

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),

     $                           ldwrkr, vt, ldvt, czero, a, lda )

*

*                    Copy right singular vectors of A from A to VT

*

                     CALL clacpy( 'F', m, n, a, lda, vt, ldvt )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need M+N, prefer M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Zero out above L in A

*

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            a( 1, 2 ), lda )

*

*                    Bidiagonalize L in A

*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply right bidiagonalizing vectors in A by Q

*                    in VT

*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,

     $                            work( itaup ), vt, ldvt,

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,

     $                            ldvt, cdum, 1, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               ELSE IF( wntuo ) THEN

*

*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')

*                 N right singular vectors to be computed in VT and

*                 M left singular vectors to be overwritten on A

*

                  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+2*lda*m ) THEN

*

*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M

*

                        ldwrku = lda

                        ir = iu + ldwrku*m

                        ldwrkr = lda

                     ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN

*

*                       WORK(IU) is LDA by M and WORK(IR) is M by M

*

                        ldwrku = lda

                        ir = iu + ldwrku*m

                        ldwrkr = m

                     ELSE

*

*                       WORK(IU) is M by M and WORK(IR) is M by M

*

                        ldwrku = m

                        ir = iu + ldwrku*m

                        ldwrkr = m

                     END IF

                     itau = ir + ldwrkr*m

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to WORK(IU), zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            work( iu+ldwrku ), ldwrku )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in WORK(IU), copying result to

*                    WORK(IR)

*                    (CWorkspace: need   2*M*M+3*M,

*                                 prefer 2*M*M+2*M+2*M*NB)

*                    (RWorkspace: need   M)

*

                     CALL cgebrd( m, m, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, m, work( iu ), ldwrku,

     $                            work( ir ), ldwrkr )

*

*                    Generate right bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need   2*M*M+3*M-1,

*                                 prefer 2*M*M+2*M+(M-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in WORK(IR)

*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,

     $                            work( itauq ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of L in WORK(IR) and computing

*                    right singular vectors of L in WORK(IU)

*                    (CWorkspace: need 2*M*M)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),

     $                            work( iu ), ldwrku, work( ir ),

     $                            ldwrkr, cdum, 1, rwork( irwork ),

     $                            info )

*

*                    Multiply right singular vectors of L in WORK(IU) by

*                    Q in VT, storing result in A

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),

     $                           ldwrku, vt, ldvt, czero, a, lda )

*

*                    Copy right singular vectors of A from A to VT

*

                     CALL clacpy( 'F', m, n, a, lda, vt, ldvt )

*

*                    Copy left singular vectors of A from WORK(IR) to A

*

                     CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,

     $                            lda )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need M+N, prefer M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Zero out above L in A

*

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            a( 1, 2 ), lda )

*

*                    Bidiagonalize L in A

*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply right bidiagonalizing vectors in A by Q

*                    in VT

*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,

     $                            work( itaup ), vt, ldvt,

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in A

*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in A and computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,

     $                            ldvt, a, lda, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               ELSE IF( wntuas ) THEN

*

*                 Path 9t(N much larger than M, JOBU='S' or 'A',

*                         JOBVT='A')

*                 N right singular vectors to be computed in VT and

*                 M left singular vectors to be computed in U

*

                  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN

*

*                    Sufficient workspace for a fast algorithm

*

                     iu = 1

                     IF( lwork.GE.wrkbl+lda*m ) THEN

*

*                       WORK(IU) is LDA by M

*

                        ldwrku = lda

                     ELSE

*

*                       WORK(IU) is M by M

*

                        ldwrku = m

                     END IF

                     itau = iu + ldwrku*m

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to WORK(IU), zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, work( iu ),

     $                            ldwrku )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            work( iu+ldwrku ), ldwrku )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in WORK(IU), copying result to U

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, work( iu ), ldwrku, s,

     $                            rwork( ie ), work( itauq ),

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

                     CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,

     $                            ldu )

*

*                    Generate right bidiagonalizing vectors in WORK(IU)

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,

     $                            work( itaup ), work( iwork ),

     $                            lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in U

*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of L in U and computing right

*                    singular vectors of L in WORK(IU)

*                    (CWorkspace: need M*M)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),

     $                            work( iu ), ldwrku, u, ldu, cdum, 1,

     $                            rwork( irwork ), info )

*

*                    Multiply right singular vectors of L in WORK(IU) by

*                    Q in VT, storing result in A

*                    (CWorkspace: need M*M)

*                    (RWorkspace: 0)

*

                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),

     $                           ldwrku, vt, ldvt, czero, a, lda )

*

*                    Copy right singular vectors of A from A to VT

*

                     CALL clacpy( 'F', m, n, a, lda, vt, ldvt )

*

                  ELSE

*

*                    Insufficient workspace for a fast algorithm

*

                     itau = 1

                     iwork = itau + m

*

*                    Compute A=L*Q, copying result to VT

*                    (CWorkspace: need 2*M, prefer M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cgelqf( m, n, a, lda, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

*

*                    Generate Q in VT

*                    (CWorkspace: need M+N, prefer M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Copy L to U, zeroing out above it

*

                     CALL clacpy( 'L', m, m, a, lda, u, ldu )

                     CALL claset( 'U', m-1, m-1, czero, czero,

     $                            u( 1, 2 ), ldu )

                     ie = 1

                     itauq = itau

                     itaup = itauq + m

                     iwork = itaup + m

*

*                    Bidiagonalize L in U

*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)

*                    (RWorkspace: need M)

*

                     CALL cgebrd( m, m, u, ldu, s, rwork( ie ),

     $                            work( itauq ), work( itaup ),

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Multiply right bidiagonalizing vectors in U by Q

*                    in VT

*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)

*                    (RWorkspace: 0)

*

                     CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,

     $                            work( itaup ), vt, ldvt,

     $                            work( iwork ), lwork-iwork+1, ierr )

*

*                    Generate left bidiagonalizing vectors in U

*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)

*                    (RWorkspace: 0)

*

                     CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),

     $                            work( iwork ), lwork-iwork+1, ierr )

                     irwork = ie + m

*

*                    Perform bidiagonal QR iteration, computing left

*                    singular vectors of A in U and computing right

*                    singular vectors of A in VT

*                    (CWorkspace: 0)

*                    (RWorkspace: need BDSPAC)

*

                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,

     $                            ldvt, u, ldu, cdum, 1,

     $                            rwork( irwork ), info )

*

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           N .LT. MNTHR

*

*           Path 10t(N greater than M, but not much larger)

*           Reduce to bidiagonal form without LQ decomposition

*

            ie = 1

            itauq = 1

            itaup = itauq + m

            iwork = itaup + m

*

*           Bidiagonalize A

*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)

*           (RWorkspace: M)

*

            CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),

     $                   work( itaup ), work( iwork ), lwork-iwork+1,

     $                   ierr )

            IF( wntuas ) THEN

*

*              If left singular vectors desired in U, copy result to U

*              and generate left bidiagonalizing vectors in U

*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)

*              (RWorkspace: 0)

*

               CALL clacpy( 'L', m, m, a, lda, u, ldu )

               CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            IF( wntvas ) THEN

*

*              If right singular vectors desired in VT, copy result to

*              VT and generate right bidiagonalizing vectors in VT

*              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)

*              (RWorkspace: 0)

*

               CALL clacpy( 'U', m, n, a, lda, vt, ldvt )

               IF( wntva )

     $            nrvt = n

               IF( wntvs )

     $            nrvt = m

               CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            IF( wntuo ) THEN

*

*              If left singular vectors desired in A, generate left

*              bidiagonalizing vectors in A

*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)

*              (RWorkspace: 0)

*

               CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            IF( wntvo ) THEN

*

*              If right singular vectors desired in A, generate right

*              bidiagonalizing vectors in A

*              (CWorkspace: need 3*M, prefer 2*M+M*NB)

*              (RWorkspace: 0)

*

               CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),

     $                      work( iwork ), lwork-iwork+1, ierr )

            END IF

            irwork = ie + m

            IF( wntuas .OR. wntuo )

     $         nru = m

            IF( wntun )

     $         nru = 0

            IF( wntvas .OR. wntvo )

     $         ncvt = n

            IF( wntvn )

     $         ncvt = 0

            IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN

*

*              Perform bidiagonal QR iteration, if desired, computing

*              left singular vectors in U and computing right singular

*              vectors in VT

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,

     $                      ldvt, u, ldu, cdum, 1, rwork( irwork ),

     $                      info )

            ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN

*

*              Perform bidiagonal QR iteration, if desired, computing

*              left singular vectors in U and computing right singular

*              vectors in A

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,

     $                      lda, u, ldu, cdum, 1, rwork( irwork ),

     $                      info )

            ELSE

*

*              Perform bidiagonal QR iteration, if desired, computing

*              left singular vectors in A and computing right singular

*              vectors in VT

*              (CWorkspace: 0)

*              (RWorkspace: need BDSPAC)

*

               CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,

     $                      ldvt, a, lda, cdum, 1, rwork( irwork ),

     $                      info )

            END IF

*

         END IF

*

      END IF

*

*     Undo scaling if necessary

*

      IF( iscl.EQ.1 ) THEN

         IF( anrm.GT.bignum )

     $      CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,

     $                   ierr )

         IF( info.NE.0 .AND. anrm.GT.bignum )

     $      CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,

     $                   rwork( ie ), minmn, ierr )

         IF( anrm.LT.smlnum )

     $      CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,

     $                   ierr )

         IF( info.NE.0 .AND. anrm.LT.smlnum )

     $      CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,

     $                   rwork( ie ), minmn, ierr )

      END IF

*

*     Return optimal workspace in WORK(1)

*

      work( 1 ) = maxwrk

*

      return

*

*     End of CGESVD

*

      END