LAPACK  3.6.0
LAPACK: Linear Algebra PACKage
Collaboration diagram for complex16:

Functions

subroutine zgejsv (JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, M, N, A, LDA, SVA, U, LDU, V, LDV, CWORK, LWORK, RWORK, LRWORK, IWORK, INFO)
 ZGEJSV More...
 
subroutine zgesdd (JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, IWORK, INFO)
 ZGESDD More...
 
subroutine zgesvd (JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO)
  ZGESVD computes the singular value decomposition (SVD) for GE matrices More...
 
subroutine zgesvdx (JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, IL, IU, NS, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, IWORK, INFO)
  ZGESVDX computes the singular value decomposition (SVD) for GE matrices More...
 

Detailed Description

This is the group of complex16 singular value driver functions for GE matrices

Function Documentation

subroutine zgejsv ( character*1  JOBA,
character*1  JOBU,
character*1  JOBV,
character*1  JOBR,
character*1  JOBT,
character*1  JOBP,
integer  M,
integer  N,
double complex, dimension( lda, * )  A,
integer  LDA,
double precision, dimension( n )  SVA,
double complex, dimension( ldu, * )  U,
integer  LDU,
double complex, dimension( ldv, * )  V,
integer  LDV,
double complex, dimension( lwork )  CWORK,
integer  LWORK,
double precision, dimension( * )  RWORK,
integer  LRWORK,
integer, dimension( * )  IWORK,
integer  INFO 
)

ZGEJSV

Download ZGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 \param[in] JOBA
@verbatim 
          JOBA is CHARACTER*1
         Specifies the level of accuracy:
       = 'C': This option works well (high relative accuracy) if A = B * D,
              with well-conditioned B and arbitrary diagonal matrix D.
              The accuracy cannot be spoiled by COLUMN scaling. The
              accuracy of the computed output depends on the condition of
              B, and the procedure aims at the best theoretical accuracy.
              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
              bounded by f(M,N)*epsilon* cond(B), independent of D.
              The input matrix is preprocessed with the QRF with column
              pivoting. This initial preprocessing and preconditioning by
              a rank revealing QR factorization is common for all values of
              JOBA. Additional actions are specified as follows:
       = 'E': Computation as with 'C' with an additional estimate of the
              condition number of B. It provides a realistic error bound.
       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
              D1, D2, and well-conditioned matrix C, this option gives
              higher accuracy than the 'C' option. If the structure of the
              input matrix is not known, and relative accuracy is
              desirable, then this option is advisable. The input matrix A
              is preprocessed with QR factorization with FULL (row and
              column) pivoting.
       = 'G'  Computation as with 'F' with an additional estimate of the
              condition number of B, where A=D*B. If A has heavily weighted
              rows, then using this condition number gives too pessimistic
              error bound.
       = 'A': Small singular values are the noise and the matrix is treated
              as numerically rank defficient. The error in the computed
              singular values is bounded by f(m,n)*epsilon*||A||.
              The computed SVD A = U * S * V^* restores A up to
              f(m,n)*epsilon*||A||.
              This gives the procedure the licence to discard (set to zero)
              all singular values below N*epsilon*||A||.
       = 'R': Similar as in 'A'. Rank revealing property of the initial
              QR factorization is used do reveal (using triangular factor)
              a gap sigma_{r+1} < epsilon * sigma_r in which case the
              numerical RANK is declared to be r. The SVD is computed with
              absolute error bounds, but more accurately than with 'A'.
Parameters
[in]JOBU
          JOBU is CHARACTER*1
         Specifies whether to compute the columns of U:
       = 'U': N columns of U are returned in the array U.
       = 'F': full set of M left sing. vectors is returned in the array U.
       = 'W': U may be used as workspace of length M*N. See the description
              of U.
       = 'N': U is not computed.
[in]JOBV
          JOBV is CHARACTER*1
         Specifies whether to compute the matrix V:
       = 'V': N columns of V are returned in the array V; Jacobi rotations
              are not explicitly accumulated.
       = 'J': N columns of V are returned in the array V, but they are
              computed as the product of Jacobi rotations. This option is
              allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
       = 'W': V may be used as workspace of length N*N. See the description
              of V.
       = 'N': V is not computed.
[in]JOBR
          JOBR is CHARACTER*1
         Specifies the RANGE for the singular values. Issues the licence to
         set to zero small positive singular values if they are outside
         specified range. If A .NE. 0 is scaled so that the largest singular
         value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues
         the licence to kill columns of A whose norm in c*A is less than
         SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
         where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E').
       = 'N': Do not kill small columns of c*A. This option assumes that
              BLAS and QR factorizations and triangular solvers are
              implemented to work in that range. If the condition of A
              is greater than BIG, use ZGESVJ.
       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
              (roughly, as described above). This option is recommended.
                                             ===========================
         For computing the singular values in the FULL range [SFMIN,BIG]
         use ZGESVJ.
[in]JOBT
          JOBT is CHARACTER*1
         If the matrix is square then the procedure may determine to use
         transposed A if A^* seems to be better with respect to convergence.
         If the matrix is not square, JOBT is ignored. This is subject to
         changes in the future.
         The decision is based on two values of entropy over the adjoint
         orbit of A^* * A. See the descriptions of WORK(6) and WORK(7).
       = 'T': transpose if entropy test indicates possibly faster
         convergence of Jacobi process if A^* is taken as input. If A is
         replaced with A^*, then the row pivoting is included automatically.
       = 'N': do not speculate.
         This option can be used to compute only the singular values, or the
         full SVD (U, SIGMA and V). For only one set of singular vectors
         (U or V), the caller should provide both U and V, as one of the
         matrices is used as workspace if the matrix A is transposed.
         The implementer can easily remove this constraint and make the
         code more complicated. See the descriptions of U and V.
[in]JOBP
          JOBP is CHARACTER*1
         Issues the licence to introduce structured perturbations to drown
         denormalized numbers. This licence should be active if the
         denormals are poorly implemented, causing slow computation,
         especially in cases of fast convergence (!). For details see [1,2].
         For the sake of simplicity, this perturbations are included only
         when the full SVD or only the singular values are requested. The
         implementer/user can easily add the perturbation for the cases of
         computing one set of singular vectors.
       = 'P': introduce perturbation
       = 'N': do not perturb
[in]M
          M is INTEGER
         The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
         The number of columns of the input matrix A. M >= N >= 0.
[in,out]A
          A is DOUBLE COMPLEX array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]SVA
          SVA is DOUBLE PRECISION array, dimension (N)
          On exit,
          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
            computation SVA contains Euclidean column norms of the
            iterated matrices in the array A.
          - For WORK(1) .NE. WORK(2): The singular values of A are
            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
            sigma_max(A) overflows or if small singular values have been
            saved from underflow by scaling the input matrix A.
          - If JOBR='R' then some of the singular values may be returned
            as exact zeros obtained by "set to zero" because they are
            below the numerical rank threshold or are denormalized numbers.
[out]U
          U is DOUBLE COMPLEX array, dimension ( LDU, N )
          If JOBU = 'U', then U contains on exit the M-by-N matrix of
                         the left singular vectors.
          If JOBU = 'F', then U contains on exit the M-by-M matrix of
                         the left singular vectors, including an ONB
                         of the orthogonal complement of the Range(A).
          If JOBU = 'W'  .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
                         then U is used as workspace if the procedure
                         replaces A with A^*. In that case, [V] is computed
                         in U as left singular vectors of A^* and then
                         copied back to the V array. This 'W' option is just
                         a reminder to the caller that in this case U is
                         reserved as workspace of length N*N.
          If JOBU = 'N'  U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U,  LDU >= 1.
          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
[out]V
          V is DOUBLE COMPLEX array, dimension ( LDV, N )
          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
                         the right singular vectors;
          If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
                         then V is used as workspace if the pprocedure
                         replaces A with A^*. In that case, [U] is computed
                         in V as right singular vectors of A^* and then
                         copied back to the U array. This 'W' option is just
                         a reminder to the caller that in this case V is
                         reserved as workspace of length N*N.
          If JOBV = 'N'  V is not referenced.
[in]LDV
          LDV is INTEGER
          The leading dimension of the array V,  LDV >= 1.
          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
[out]CWORK
 CWORK (workspace)
          CWORK is DOUBLE COMPLEX array, dimension at least LWORK.     
[in]LWORK
          LWORK is INTEGER
          Length of CWORK to confirm proper allocation of workspace.
          LWORK depends on the job:

          1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
            1.1 .. no scaled condition estimate required (JOBE.EQ.'N'):
               LWORK >= 2*N+1. This is the minimal requirement.
               ->> For optimal performance (blocked code) the optimal value
               is LWORK >= N + (N+1)*NB. Here NB is the optimal
               block size for ZGEQP3 and ZGEQRF.
               In general, optimal LWORK is computed as 
               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF)).        
            1.2. .. an estimate of the scaled condition number of A is
               required (JOBA='E', or 'G'). In this case, LWORK the minimal
               requirement is LWORK >= N*N + 3*N.
               ->> For optimal performance (blocked code) the optimal value 
               is LWORK >= max(N+(N+1)*NB, N*N+3*N).
               In general, the optimal length LWORK is computed as
               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), 
                                                     N+N*N+LWORK(CPOCON)).

          2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
             (JOBU.EQ.'N')
            -> the minimal requirement is LWORK >= 3*N.
            -> For optimal performance, LWORK >= max(N+(N+1)*NB, 3*N,2*N+N*NB),
               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ,
               CUNMLQ. In general, the optimal length LWORK is computed as
               LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(CPOCON), N+LWORK(ZGESVJ),
                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(CUNMLQ)).

          3. If SIGMA and the left singular vectors are needed
            -> the minimal requirement is LWORK >= 3*N.
            -> For optimal performance:
               if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB),
               where NB is the optimal block size for ZGEQP3, ZGEQRF, CUNMQR.
               In general, the optimal length LWORK is computed as
               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(CPOCON),
                        2*N+LWORK(ZGEQRF), N+LWORK(CUNMQR)). 
               
          4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and 
            4.1. if JOBV.EQ.'V'  
               the minimal requirement is LWORK >= 5*N+2*N*N. 
            4.2. if JOBV.EQ.'J' the minimal requirement is 
               LWORK >= 4*N+N*N.
            In both cases, the allocated CWORK can accomodate blocked runs
            of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, CUNMLQ.
[out]RWORK
          RWORK is DOUBLE PRECISION array, dimension at least LRWORK.
          On exit,
          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
                    such that SCALE*SVA(1:N) are the computed singular values
                    of A. (See the description of SVA().)
          RWORK(2) = See the description of RWORK(1).
          RWORK(3) = SCONDA is an estimate for the condition number of
                    column equilibrated A. (If JOBA .EQ. 'E' or 'G')
                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
                    It is computed using SPOCON. It holds
                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
                    where R is the triangular factor from the QRF of A.
                    However, if R is truncated and the numerical rank is
                    determined to be strictly smaller than N, SCONDA is
                    returned as -1, thus indicating that the smallest
                    singular values might be lost.

          If full SVD is needed, the following two condition numbers are
          useful for the analysis of the algorithm. They are provied for
          a developer/implementer who is familiar with the details of
          the method.

          RWORK(4) = an estimate of the scaled condition number of the
                    triangular factor in the first QR factorization.
          RWORK(5) = an estimate of the scaled condition number of the
                    triangular factor in the second QR factorization.
          The following two parameters are computed if JOBT .EQ. 'T'.
          They are provided for a developer/implementer who is familiar
          with the details of the method.
          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
                    probability simplex.
          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
[in]LRWORK
          LRWORK is INTEGER
          Length of RWORK to confirm proper allocation of workspace.
          LRWORK depends on the job:

       1. If only singular values are requested i.e. if 
          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') 
          then:
          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
          then LRWORK = max( 7, N + 2 * M ). 
          1.2. Otherwise, LRWORK  = max( 7, 2 * N ).
       2. If singular values with the right singular vectors are requested
          i.e. if 
          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. 
          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
          then:
          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
          then LRWORK = max( 7, N + 2 * M ). 
          2.2. Otherwise, LRWORK  = max( 7, 2 * N ).      
       3. If singular values with the left singular vectors are requested, i.e. if    
          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
          then:
          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
          then LRWORK = max( 7, N + 2 * M ). 
          3.2. Otherwise, LRWORK  = max( 7, 2 * N ).    
       4. If singular values with both the left and the right singular vectors 
          are requested, i.e. if     
          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
          then:
          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
          then LRWORK = max( 7, N + 2 * M ). 
          4.2. Otherwise, LRWORK  = max( 7, 2 * N ).    
[out]IWORK
          IWORK is INTEGER array, of dimension:
                If LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), then 
                the dimension of IWORK is max( 3, 2 * N + M ).
                Otherwise, the dimension of IWORK is 
                -> max( 3, 2*N ) for full SVD
                -> max( 3, N ) for singular values only or singular
                   values with one set of singular vectors (left or right)
          On exit,
          IWORK(1) = the numerical rank determined after the initial
                     QR factorization with pivoting. See the descriptions
                     of JOBA and JOBR.
          IWORK(2) = the number of the computed nonzero singular values
          IWORK(3) = if nonzero, a warning message:
                     If IWORK(3).EQ.1 then some of the column norms of A
                     were denormalized floats. The requested high accuracy
                     is not warranted by the data.
[out]INFO
          INFO is INTEGER
           < 0  : if INFO = -i, then the i-th argument had an illegal value.
           = 0 :  successfull exit;
           > 0 :  ZGEJSV  did not converge in the maximal allowed number
                  of sweeps. The computed values may be inaccurate.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
November 2015
Further Details:
  ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3,
  ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an
  additional row pivoting can be used as a preprocessor, which in some
  cases results in much higher accuracy. An example is matrix A with the
  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
  diagonal matrices and C is well-conditioned matrix. In that case, complete
  pivoting in the first QR factorizations provides accuracy dependent on the
  condition number of C, and independent of D1, D2. Such higher accuracy is
  not completely understood theoretically, but it works well in practice.
  Further, if A can be written as A = B*D, with well-conditioned B and some
  diagonal D, then the high accuracy is guaranteed, both theoretically and
  in software, independent of D. For more details see [1], [2].
     The computational range for the singular values can be the full range
  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
  & LAPACK routines called by ZGEJSV are implemented to work in that range.
  If that is not the case, then the restriction for safe computation with
  the singular values in the range of normalized IEEE numbers is that the
  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
  overflow. This code (ZGEJSV) is best used in this restricted range,
  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
  returned as zeros. See JOBR for details on this.
     Further, this implementation is somewhat slower than the one described
  in [1,2] due to replacement of some non-LAPACK components, and because
  the choice of some tuning parameters in the iterative part (ZGESVJ) is
  left to the implementer on a particular machine.
     The rank revealing QR factorization (in this code: ZGEQP3) should be
  implemented as in [3]. We have a new version of ZGEQP3 under development
  that is more robust than the current one in LAPACK, with a cleaner cut in
  rank defficient cases. It will be available in the SIGMA library [4].
  If M is much larger than N, it is obvious that the inital QRF with
  column pivoting can be preprocessed by the QRF without pivoting. That
  well known trick is not used in ZGEJSV because in some cases heavy row
  weighting can be treated with complete pivoting. The overhead in cases
  M much larger than N is then only due to pivoting, but the benefits in
  terms of accuracy have prevailed. The implementer/user can incorporate
  this extra QRF step easily. The implementer can also improve data movement
  (matrix transpose, matrix copy, matrix transposed copy) - this
  implementation of ZGEJSV uses only the simplest, naive data movement.  \par Contributors: 
  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)  \par References: 
@verbatim  
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 519 of file zgejsv.f.

519 *
520 * -- LAPACK computational routine (version 3.6.0) --
521 * -- LAPACK is a software package provided by Univ. of Tennessee, --
522 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
523 * November 2015
524 *
525 * .. Scalar Arguments ..
526  IMPLICIT NONE
527  INTEGER info, lda, ldu, ldv, lwork, lrwork, m, n
528 * ..
529 * .. Array Arguments ..
530  DOUBLE COMPLEX a( lda, * ), u( ldu, * ), v( ldv, * ),
531  $ cwork( lwork )
532  DOUBLE PRECISION sva( n ), rwork( * )
533  INTEGER iwork( * )
534  CHARACTER*1 joba, jobp, jobr, jobt, jobu, jobv
535 * ..
536 *
537 * ===========================================================================
538 *
539 * .. Local Parameters ..
540  DOUBLE PRECISION zero, one
541  parameter( zero = 0.0d0, one = 1.0d0 )
542  DOUBLE COMPLEX czero, cone
543  parameter( czero = ( 0.0d0, 0.0d0 ), cone = ( 1.0d0, 0.0d0 ) )
544 * ..
545 * .. Local Scalars ..
546  DOUBLE COMPLEX ctemp
547  DOUBLE PRECISION aapp, aaqq, aatmax, aatmin, big, big1,
548  $ cond_ok, condr1, condr2, entra, entrat, epsln,
549  $ maxprj, scalem, sconda, sfmin, small, temp1,
550  $ uscal1, uscal2, xsc
551  INTEGER ierr, n1, nr, numrank, p, q, warning
552  LOGICAL almort, defr, errest, goscal, jracc, kill, lsvec,
553  $ l2aber, l2kill, l2pert, l2rank, l2tran,
554  $ noscal, rowpiv, rsvec, transp
555 * ..
556 * .. Intrinsic Functions ..
557  INTRINSIC abs, dcmplx, dconjg, dlog, dmax1, dmin1, dfloat,
558  $ max0, min0, nint, dsqrt
559 * ..
560 * .. External Functions ..
561  DOUBLE PRECISION dlamch, dznrm2
562  INTEGER idamax
563  LOGICAL lsame
564  EXTERNAL idamax, lsame, dlamch, dznrm2
565 * ..
566 * .. External Subroutines ..
567  EXTERNAL zcopy, zgelqf, zgeqp3, zgeqrf, zlacpy, zlascl,
570 *
571  EXTERNAL zgesvj
572 * ..
573 *
574 * Test the input arguments
575 *
576 
577  lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
578  jracc = lsame( jobv, 'J' )
579  rsvec = lsame( jobv, 'V' ) .OR. jracc
580  rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
581  l2rank = lsame( joba, 'R' )
582  l2aber = lsame( joba, 'A' )
583  errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
584  l2tran = lsame( jobt, 'T' )
585  l2kill = lsame( jobr, 'R' )
586  defr = lsame( jobr, 'N' )
587  l2pert = lsame( jobp, 'P' )
588 *
589  IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
590  $ errest .OR. lsame( joba, 'C' ) )) THEN
591  info = - 1
592  ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
593  $ lsame( jobu, 'W' )) ) THEN
594  info = - 2
595  ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
596  $ lsame( jobv, 'W' )) .OR. ( jracc .AND. (.NOT.lsvec) ) ) THEN
597  info = - 3
598  ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
599  info = - 4
600  ELSE IF ( .NOT. ( l2tran .OR. lsame( jobt, 'N' ) ) ) THEN
601  info = - 5
602  ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
603  info = - 6
604  ELSE IF ( m .LT. 0 ) THEN
605  info = - 7
606  ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
607  info = - 8
608  ELSE IF ( lda .LT. m ) THEN
609  info = - 10
610  ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
611  info = - 13
612  ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
613  info = - 15
614  ELSE IF ( (.NOT.(lsvec .OR. rsvec .OR. errest).AND.
615  $ (lwork .LT. 2*n+1)) .OR.
616  $ (.NOT.(lsvec .OR. rsvec) .AND. errest .AND.
617  $ (lwork .LT. n*n+3*n)) .OR.
618  $ (lsvec .AND. (.NOT.rsvec) .AND. (lwork .LT. 3*n))
619  $ .OR.
620  $ (rsvec .AND. (.NOT.lsvec) .AND. (lwork .LT. 3*n))
621  $ .OR.
622  $ (lsvec .AND. rsvec .AND. (.NOT.jracc) .AND.
623  $ (lwork.LT.5*n+2*n*n))
624  $ .OR. (lsvec .AND. rsvec .AND. jracc .AND.
625  $ lwork.LT.4*n+n*n))
626  $ THEN
627  info = - 17
628  ELSE IF ( lrwork.LT. max0(n+2*m,7)) THEN
629  info = -19
630  ELSE
631 * #:)
632  info = 0
633  END IF
634 *
635  IF ( info .NE. 0 ) THEN
636 * #:(
637  CALL xerbla( 'ZGEJSV', - info )
638  RETURN
639  END IF
640 *
641 * Quick return for void matrix (Y3K safe)
642 * #:)
643  IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) RETURN
644 *
645 * Determine whether the matrix U should be M x N or M x M
646 *
647  IF ( lsvec ) THEN
648  n1 = n
649  IF ( lsame( jobu, 'F' ) ) n1 = m
650  END IF
651 *
652 * Set numerical parameters
653 *
654 *! NOTE: Make sure DLAMCH() does not fail on the target architecture.
655 *
656  epsln = dlamch('Epsilon')
657  sfmin = dlamch('SafeMinimum')
658  small = sfmin / epsln
659  big = dlamch('O')
660 * BIG = ONE / SFMIN
661 *
662 * Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
663 *
664 *(!) If necessary, scale SVA() to protect the largest norm from
665 * overflow. It is possible that this scaling pushes the smallest
666 * column norm left from the underflow threshold (extreme case).
667 *
668  scalem = one / dsqrt(dfloat(m)*dfloat(n))
669  noscal = .true.
670  goscal = .true.
671  DO 1874 p = 1, n
672  aapp = zero
673  aaqq = one
674  CALL zlassq( m, a(1,p), 1, aapp, aaqq )
675  IF ( aapp .GT. big ) THEN
676  info = - 9
677  CALL xerbla( 'ZGEJSV', -info )
678  RETURN
679  END IF
680  aaqq = dsqrt(aaqq)
681  IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
682  sva(p) = aapp * aaqq
683  ELSE
684  noscal = .false.
685  sva(p) = aapp * ( aaqq * scalem )
686  IF ( goscal ) THEN
687  goscal = .false.
688  CALL dscal( p-1, scalem, sva, 1 )
689  END IF
690  END IF
691  1874 CONTINUE
692 *
693  IF ( noscal ) scalem = one
694 *
695  aapp = zero
696  aaqq = big
697  DO 4781 p = 1, n
698  aapp = dmax1( aapp, sva(p) )
699  IF ( sva(p) .NE. zero ) aaqq = dmin1( aaqq, sva(p) )
700  4781 CONTINUE
701 *
702 * Quick return for zero M x N matrix
703 * #:)
704  IF ( aapp .EQ. zero ) THEN
705  IF ( lsvec ) CALL zlaset( 'G', m, n1, czero, cone, u, ldu )
706  IF ( rsvec ) CALL zlaset( 'G', n, n, czero, cone, v, ldv )
707  rwork(1) = one
708  rwork(2) = one
709  IF ( errest ) rwork(3) = one
710  IF ( lsvec .AND. rsvec ) THEN
711  rwork(4) = one
712  rwork(5) = one
713  END IF
714  IF ( l2tran ) THEN
715  rwork(6) = zero
716  rwork(7) = zero
717  END IF
718  iwork(1) = 0
719  iwork(2) = 0
720  iwork(3) = 0
721  RETURN
722  END IF
723 *
724 * Issue warning if denormalized column norms detected. Override the
725 * high relative accuracy request. Issue licence to kill columns
726 * (set them to zero) whose norm is less than sigma_max / BIG (roughly).
727 * #:(
728  warning = 0
729  IF ( aaqq .LE. sfmin ) THEN
730  l2rank = .true.
731  l2kill = .true.
732  warning = 1
733  END IF
734 *
735 * Quick return for one-column matrix
736 * #:)
737  IF ( n .EQ. 1 ) THEN
738 *
739  IF ( lsvec ) THEN
740  CALL zlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
741  CALL zlacpy( 'A', m, 1, a, lda, u, ldu )
742 * computing all M left singular vectors of the M x 1 matrix
743  IF ( n1 .NE. n ) THEN
744  CALL zgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr )
745  CALL zungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr )
746  CALL zcopy( m, a(1,1), 1, u(1,1), 1 )
747  END IF
748  END IF
749  IF ( rsvec ) THEN
750  v(1,1) = cone
751  END IF
752  IF ( sva(1) .LT. (big*scalem) ) THEN
753  sva(1) = sva(1) / scalem
754  scalem = one
755  END IF
756  rwork(1) = one / scalem
757  rwork(2) = one
758  IF ( sva(1) .NE. zero ) THEN
759  iwork(1) = 1
760  IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
761  iwork(2) = 1
762  ELSE
763  iwork(2) = 0
764  END IF
765  ELSE
766  iwork(1) = 0
767  iwork(2) = 0
768  END IF
769  iwork(3) = 0
770  IF ( errest ) rwork(3) = one
771  IF ( lsvec .AND. rsvec ) THEN
772  rwork(4) = one
773  rwork(5) = one
774  END IF
775  IF ( l2tran ) THEN
776  rwork(6) = zero
777  rwork(7) = zero
778  END IF
779  RETURN
780 *
781  END IF
782 *
783  transp = .false.
784  l2tran = l2tran .AND. ( m .EQ. n )
785 *
786  aatmax = -one
787  aatmin = big
788  IF ( rowpiv .OR. l2tran ) THEN
789 *
790 * Compute the row norms, needed to determine row pivoting sequence
791 * (in the case of heavily row weighted A, row pivoting is strongly
792 * advised) and to collect information needed to compare the
793 * structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
794 *
795  IF ( l2tran ) THEN
796  DO 1950 p = 1, m
797  xsc = zero
798  temp1 = one
799  CALL zlassq( n, a(p,1), lda, xsc, temp1 )
800 * ZLASSQ gets both the ell_2 and the ell_infinity norm
801 * in one pass through the vector
802  rwork(m+n+p) = xsc * scalem
803  rwork(n+p) = xsc * (scalem*dsqrt(temp1))
804  aatmax = dmax1( aatmax, rwork(n+p) )
805  IF (rwork(n+p) .NE. zero)
806  $ aatmin = dmin1(aatmin,rwork(n+p))
807  1950 CONTINUE
808  ELSE
809  DO 1904 p = 1, m
810  rwork(m+n+p) = scalem*abs( a(p,idamax(n,a(p,1),lda)) )
811  aatmax = dmax1( aatmax, rwork(m+n+p) )
812  aatmin = dmin1( aatmin, rwork(m+n+p) )
813  1904 CONTINUE
814  END IF
815 *
816  END IF
817 *
818 * For square matrix A try to determine whether A^* would be better
819 * input for the preconditioned Jacobi SVD, with faster convergence.
820 * The decision is based on an O(N) function of the vector of column
821 * and row norms of A, based on the Shannon entropy. This should give
822 * the right choice in most cases when the difference actually matters.
823 * It may fail and pick the slower converging side.
824 *
825  entra = zero
826  entrat = zero
827  IF ( l2tran ) THEN
828 *
829  xsc = zero
830  temp1 = one
831  CALL zlassq( n, sva, 1, xsc, temp1 )
832  temp1 = one / temp1
833 *
834  entra = zero
835  DO 1113 p = 1, n
836  big1 = ( ( sva(p) / xsc )**2 ) * temp1
837  IF ( big1 .NE. zero ) entra = entra + big1 * dlog(big1)
838  1113 CONTINUE
839  entra = - entra / dlog(dfloat(n))
840 *
841 * Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
842 * It is derived from the diagonal of A^* * A. Do the same with the
843 * diagonal of A * A^*, compute the entropy of the corresponding
844 * probability distribution. Note that A * A^* and A^* * A have the
845 * same trace.
846 *
847  entrat = zero
848  DO 1114 p = n+1, n+m
849  big1 = ( ( rwork(p) / xsc )**2 ) * temp1
850  IF ( big1 .NE. zero ) entrat = entrat + big1 * dlog(big1)
851  1114 CONTINUE
852  entrat = - entrat / dlog(dfloat(m))
853 *
854 * Analyze the entropies and decide A or A^*. Smaller entropy
855 * usually means better input for the algorithm.
856 *
857  transp = ( entrat .LT. entra )
858  transp = .true.
859 *
860 * If A^* is better than A, take the adjoint of A.
861 *
862  IF ( transp ) THEN
863 * In an optimal implementation, this trivial transpose
864 * should be replaced with faster transpose.
865  DO 1115 p = 1, n - 1
866  a(p,p) = dconjg(a(p,p))
867  DO 1116 q = p + 1, n
868  ctemp = dconjg(a(q,p))
869  a(q,p) = dconjg(a(p,q))
870  a(p,q) = ctemp
871  1116 CONTINUE
872  1115 CONTINUE
873  a(n,n) = dconjg(a(n,n))
874  DO 1117 p = 1, n
875  rwork(m+n+p) = sva(p)
876  sva(p) = rwork(n+p)
877 * previously computed row 2-norms are now column 2-norms
878 * of the transposed matrix
879  1117 CONTINUE
880  temp1 = aapp
881  aapp = aatmax
882  aatmax = temp1
883  temp1 = aaqq
884  aaqq = aatmin
885  aatmin = temp1
886  kill = lsvec
887  lsvec = rsvec
888  rsvec = kill
889  IF ( lsvec ) n1 = n
890 *
891  rowpiv = .true.
892  END IF
893 *
894  END IF
895 * END IF L2TRAN
896 *
897 * Scale the matrix so that its maximal singular value remains less
898 * than SQRT(BIG) -- the matrix is scaled so that its maximal column
899 * has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
900 * SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and
901 * BLAS routines that, in some implementations, are not capable of
902 * working in the full interval [SFMIN,BIG] and that they may provoke
903 * overflows in the intermediate results. If the singular values spread
904 * from SFMIN to BIG, then ZGESVJ will compute them. So, in that case,
905 * one should use ZGESVJ instead of ZGEJSV.
906 *
907  big1 = dsqrt( big )
908  temp1 = dsqrt( big / dfloat(n) )
909 *
910  CALL zlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
911  IF ( aaqq .GT. (aapp * sfmin) ) THEN
912  aaqq = ( aaqq / aapp ) * temp1
913  ELSE
914  aaqq = ( aaqq * temp1 ) / aapp
915  END IF
916  temp1 = temp1 * scalem
917  CALL zlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
918 *
919 * To undo scaling at the end of this procedure, multiply the
920 * computed singular values with USCAL2 / USCAL1.
921 *
922  uscal1 = temp1
923  uscal2 = aapp
924 *
925  IF ( l2kill ) THEN
926 * L2KILL enforces computation of nonzero singular values in
927 * the restricted range of condition number of the initial A,
928 * sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
929  xsc = dsqrt( sfmin )
930  ELSE
931  xsc = small
932 *
933 * Now, if the condition number of A is too big,
934 * sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
935 * as a precaution measure, the full SVD is computed using ZGESVJ
936 * with accumulated Jacobi rotations. This provides numerically
937 * more robust computation, at the cost of slightly increased run
938 * time. Depending on the concrete implementation of BLAS and LAPACK
939 * (i.e. how they behave in presence of extreme ill-conditioning) the
940 * implementor may decide to remove this switch.
941  IF ( ( aaqq.LT.dsqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
942  jracc = .true.
943  END IF
944 *
945  END IF
946  IF ( aaqq .LT. xsc ) THEN
947  DO 700 p = 1, n
948  IF ( sva(p) .LT. xsc ) THEN
949  CALL zlaset( 'A', m, 1, czero, czero, a(1,p), lda )
950  sva(p) = zero
951  END IF
952  700 CONTINUE
953  END IF
954 *
955 * Preconditioning using QR factorization with pivoting
956 *
957  IF ( rowpiv ) THEN
958 * Optional row permutation (Bjoerck row pivoting):
959 * A result by Cox and Higham shows that the Bjoerck's
960 * row pivoting combined with standard column pivoting
961 * has similar effect as Powell-Reid complete pivoting.
962 * The ell-infinity norms of A are made nonincreasing.
963  DO 1952 p = 1, m - 1
964  q = idamax( m-p+1, rwork(m+n+p), 1 ) + p - 1
965  iwork(2*n+p) = q
966  IF ( p .NE. q ) THEN
967  temp1 = rwork(m+n+p)
968  rwork(m+n+p) = rwork(m+n+q)
969  rwork(m+n+q) = temp1
970  END IF
971  1952 CONTINUE
972  CALL zlaswp( n, a, lda, 1, m-1, iwork(2*n+1), 1 )
973  END IF
974 
975 *
976 * End of the preparation phase (scaling, optional sorting and
977 * transposing, optional flushing of small columns).
978 *
979 * Preconditioning
980 *
981 * If the full SVD is needed, the right singular vectors are computed
982 * from a matrix equation, and for that we need theoretical analysis
983 * of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF.
984 * In all other cases the first RR QRF can be chosen by other criteria
985 * (eg speed by replacing global with restricted window pivoting, such
986 * as in xGEQPX from TOMS # 782). Good results will be obtained using
987 * xGEQPX with properly (!) chosen numerical parameters.
988 * Any improvement of ZGEQP3 improves overal performance of ZGEJSV.
989 *
990 * A * P1 = Q1 * [ R1^* 0]^*:
991  DO 1963 p = 1, n
992 * .. all columns are free columns
993  iwork(p) = 0
994  1963 CONTINUE
995  CALL zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,
996  $ rwork, ierr )
997 *
998 * The upper triangular matrix R1 from the first QRF is inspected for
999 * rank deficiency and possibilities for deflation, or possible
1000 * ill-conditioning. Depending on the user specified flag L2RANK,
1001 * the procedure explores possibilities to reduce the numerical
1002 * rank by inspecting the computed upper triangular factor. If
1003 * L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of
1004 * A + dA, where ||dA|| <= f(M,N)*EPSLN.
1005 *
1006  nr = 1
1007  IF ( l2aber ) THEN
1008 * Standard absolute error bound suffices. All sigma_i with
1009 * sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
1010 * agressive enforcement of lower numerical rank by introducing a
1011 * backward error of the order of N*EPSLN*||A||.
1012  temp1 = dsqrt(dfloat(n))*epsln
1013  DO 3001 p = 2, n
1014  IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
1015  nr = nr + 1
1016  ELSE
1017  GO TO 3002
1018  END IF
1019  3001 CONTINUE
1020  3002 CONTINUE
1021  ELSE IF ( l2rank ) THEN
1022 * .. similarly as above, only slightly more gentle (less agressive).
1023 * Sudden drop on the diagonal of R1 is used as the criterion for
1024 * close-to-rank-defficient.
1025  temp1 = dsqrt(sfmin)
1026  DO 3401 p = 2, n
1027  IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
1028  $ ( abs(a(p,p)) .LT. small ) .OR.
1029  $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
1030  nr = nr + 1
1031  3401 CONTINUE
1032  3402 CONTINUE
1033 *
1034  ELSE
1035 * The goal is high relative accuracy. However, if the matrix
1036 * has high scaled condition number the relative accuracy is in
1037 * general not feasible. Later on, a condition number estimator
1038 * will be deployed to estimate the scaled condition number.
1039 * Here we just remove the underflowed part of the triangular
1040 * factor. This prevents the situation in which the code is
1041 * working hard to get the accuracy not warranted by the data.
1042  temp1 = dsqrt(sfmin)
1043  DO 3301 p = 2, n
1044  IF ( ( abs(a(p,p)) .LT. small ) .OR.
1045  $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
1046  nr = nr + 1
1047  3301 CONTINUE
1048  3302 CONTINUE
1049 *
1050  END IF
1051 *
1052  almort = .false.
1053  IF ( nr .EQ. n ) THEN
1054  maxprj = one
1055  DO 3051 p = 2, n
1056  temp1 = abs(a(p,p)) / sva(iwork(p))
1057  maxprj = dmin1( maxprj, temp1 )
1058  3051 CONTINUE
1059  IF ( maxprj**2 .GE. one - dfloat(n)*epsln ) almort = .true.
1060  END IF
1061 *
1062 *
1063  sconda = - one
1064  condr1 = - one
1065  condr2 = - one
1066 *
1067  IF ( errest ) THEN
1068  IF ( n .EQ. nr ) THEN
1069  IF ( rsvec ) THEN
1070 * .. V is available as workspace
1071  CALL zlacpy( 'U', n, n, a, lda, v, ldv )
1072  DO 3053 p = 1, n
1073  temp1 = sva(iwork(p))
1074  CALL zdscal( p, one/temp1, v(1,p), 1 )
1075  3053 CONTINUE
1076  CALL zpocon( 'U', n, v, ldv, one, temp1,
1077  $ cwork(n+1), rwork, ierr )
1078 *
1079  ELSE IF ( lsvec ) THEN
1080 * .. U is available as workspace
1081  CALL zlacpy( 'U', n, n, a, lda, u, ldu )
1082  DO 3054 p = 1, n
1083  temp1 = sva(iwork(p))
1084  CALL zdscal( p, one/temp1, u(1,p), 1 )
1085  3054 CONTINUE
1086  CALL zpocon( 'U', n, u, ldu, one, temp1,
1087  $ cwork(n+1), rwork, ierr )
1088  ELSE
1089  CALL zlacpy( 'U', n, n, a, lda, cwork(n+1), n )
1090  DO 3052 p = 1, n
1091  temp1 = sva(iwork(p))
1092  CALL zdscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 )
1093  3052 CONTINUE
1094 * .. the columns of R are scaled to have unit Euclidean lengths.
1095  CALL zpocon( 'U', n, cwork(n+1), n, one, temp1,
1096  $ cwork(n+n*n+1), rwork, ierr )
1097 *
1098  END IF
1099  sconda = one / dsqrt(temp1)
1100 * SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
1101 * N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1102  ELSE
1103  sconda = - one
1104  END IF
1105  END IF
1106 *
1107  l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. dsqrt(big1) )
1108 * If there is no violent scaling, artificial perturbation is not needed.
1109 *
1110 * Phase 3:
1111 *
1112  IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1113 *
1114 * Singular Values only
1115 *
1116 * .. transpose A(1:NR,1:N)
1117  DO 1946 p = 1, min0( n-1, nr )
1118  CALL zcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1119  CALL zlacgv( n-p+1, a(p,p), 1 )
1120  1946 CONTINUE
1121  IF ( nr .EQ. n ) a(n,n) = dconjg(a(n,n))
1122 *
1123 * The following two DO-loops introduce small relative perturbation
1124 * into the strict upper triangle of the lower triangular matrix.
1125 * Small entries below the main diagonal are also changed.
1126 * This modification is useful if the computing environment does not
1127 * provide/allow FLUSH TO ZERO underflow, for it prevents many
1128 * annoying denormalized numbers in case of strongly scaled matrices.
1129 * The perturbation is structured so that it does not introduce any
1130 * new perturbation of the singular values, and it does not destroy
1131 * the job done by the preconditioner.
1132 * The licence for this perturbation is in the variable L2PERT, which
1133 * should be .FALSE. if FLUSH TO ZERO underflow is active.
1134 *
1135  IF ( .NOT. almort ) THEN
1136 *
1137  IF ( l2pert ) THEN
1138 * XSC = SQRT(SMALL)
1139  xsc = epsln / dfloat(n)
1140  DO 4947 q = 1, nr
1141  ctemp = dcmplx(xsc*abs(a(q,q)),zero)
1142  DO 4949 p = 1, n
1143  IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1144  $ .OR. ( p .LT. q ) )
1145 * $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1146  $ a(p,q) = ctemp
1147  4949 CONTINUE
1148  4947 CONTINUE
1149  ELSE
1150  CALL zlaset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda )
1151  END IF
1152 *
1153 * .. second preconditioning using the QR factorization
1154 *
1155  CALL zgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr )
1156 *
1157 * .. and transpose upper to lower triangular
1158  DO 1948 p = 1, nr - 1
1159  CALL zcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1160  CALL zlacgv( nr-p+1, a(p,p), 1 )
1161  1948 CONTINUE
1162 *
1163  END IF
1164 *
1165 * Row-cyclic Jacobi SVD algorithm with column pivoting
1166 *
1167 * .. again some perturbation (a "background noise") is added
1168 * to drown denormals
1169  IF ( l2pert ) THEN
1170 * XSC = SQRT(SMALL)
1171  xsc = epsln / dfloat(n)
1172  DO 1947 q = 1, nr
1173  ctemp = dcmplx(xsc*abs(a(q,q)),zero)
1174  DO 1949 p = 1, nr
1175  IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1176  $ .OR. ( p .LT. q ) )
1177 * $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1178  $ a(p,q) = ctemp
1179  1949 CONTINUE
1180  1947 CONTINUE
1181  ELSE
1182  CALL zlaset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda )
1183  END IF
1184 *
1185 * .. and one-sided Jacobi rotations are started on a lower
1186 * triangular matrix (plus perturbation which is ignored in
1187 * the part which destroys triangular form (confusing?!))
1188 *
1189  CALL zgesvj( 'L', 'NoU', 'NoV', nr, nr, a, lda, sva,
1190  $ n, v, ldv, cwork, lwork, rwork, lrwork, info )
1191 *
1192  scalem = rwork(1)
1193  numrank = nint(rwork(2))
1194 *
1195 *
1196  ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1197 *
1198 * -> Singular Values and Right Singular Vectors <-
1199 *
1200  IF ( almort ) THEN
1201 *
1202 * .. in this case NR equals N
1203  DO 1998 p = 1, nr
1204  CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1205  CALL zlacgv( n-p+1, v(p,p), 1 )
1206  1998 CONTINUE
1207  CALL zlaset( 'Upper', nr-1,nr-1, czero, czero, v(1,2), ldv )
1208 *
1209  CALL zgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,
1210  $ cwork, lwork, rwork, lrwork, info )
1211  scalem = rwork(1)
1212  numrank = nint(rwork(2))
1213 
1214  ELSE
1215 *
1216 * .. two more QR factorizations ( one QRF is not enough, two require
1217 * accumulated product of Jacobi rotations, three are perfect )
1218 *
1219  CALL zlaset( 'Lower', nr-1,nr-1, czero, czero, a(2,1), lda )
1220  CALL zgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr)
1221  CALL zlacpy( 'Lower', nr, nr, a, lda, v, ldv )
1222  CALL zlaset( 'Upper', nr-1,nr-1, czero, czero, v(1,2), ldv )
1223  CALL zgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1224  $ lwork-2*n, ierr )
1225  DO 8998 p = 1, nr
1226  CALL zcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1227  CALL zlacgv( nr-p+1, v(p,p), 1 )
1228  8998 CONTINUE
1229  CALL zlaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1230 *
1231  CALL zgesvj( 'Lower', 'U','N', nr, nr, v,ldv, sva, nr, u,
1232  $ ldu, cwork(n+1), lwork-n, rwork, lrwork, info )
1233  scalem = rwork(1)
1234  numrank = nint(rwork(2))
1235  IF ( nr .LT. n ) THEN
1236  CALL zlaset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv )
1237  CALL zlaset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv )
1238  CALL zlaset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv )
1239  END IF
1240 *
1241  CALL zunmlq( 'Left', 'C', n, n, nr, a, lda, cwork,
1242  $ v, ldv, cwork(n+1), lwork-n, ierr )
1243 *
1244  END IF
1245 *
1246  DO 8991 p = 1, n
1247  CALL zcopy( n, v(p,1), ldv, a(iwork(p),1), lda )
1248  8991 CONTINUE
1249  CALL zlacpy( 'All', n, n, a, lda, v, ldv )
1250 *
1251  IF ( transp ) THEN
1252  CALL zlacpy( 'All', n, n, v, ldv, u, ldu )
1253  END IF
1254 *
1255  ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1256 *
1257 * .. Singular Values and Left Singular Vectors ..
1258 *
1259 * .. second preconditioning step to avoid need to accumulate
1260 * Jacobi rotations in the Jacobi iterations.
1261  DO 1965 p = 1, nr
1262  CALL zcopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1263  CALL zlacgv( n-p+1, u(p,p), 1 )
1264  1965 CONTINUE
1265  CALL zlaset( 'Upper', nr-1, nr-1, czero, czero, u(1,2), ldu )
1266 *
1267  CALL zgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),
1268  $ lwork-2*n, ierr )
1269 *
1270  DO 1967 p = 1, nr - 1
1271  CALL zcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1272  CALL zlacgv( n-p+1, u(p,p), 1 )
1273  1967 CONTINUE
1274  CALL zlaset( 'Upper', nr-1, nr-1, czero, czero, u(1,2), ldu )
1275 *
1276  CALL zgesvj( 'Lower', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1277  $ lda, cwork(n+1), lwork-n, rwork, lrwork, info )
1278  scalem = rwork(1)
1279  numrank = nint(rwork(2))
1280 *
1281  IF ( nr .LT. m ) THEN
1282  CALL zlaset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu )
1283  IF ( nr .LT. n1 ) THEN
1284  CALL zlaset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu )
1285  CALL zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu )
1286  END IF
1287  END IF
1288 *
1289  CALL zunmqr( 'Left', 'No Tr', m, n1, n, a, lda, cwork, u,
1290  $ ldu, cwork(n+1), lwork-n, ierr )
1291 *
1292  IF ( rowpiv )
1293  $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1294 *
1295  DO 1974 p = 1, n1
1296  xsc = one / dznrm2( m, u(1,p), 1 )
1297  CALL zdscal( m, xsc, u(1,p), 1 )
1298  1974 CONTINUE
1299 *
1300  IF ( transp ) THEN
1301  CALL zlacpy( 'All', n, n, u, ldu, v, ldv )
1302  END IF
1303 *
1304  ELSE
1305 *
1306 * .. Full SVD ..
1307 *
1308  IF ( .NOT. jracc ) THEN
1309 *
1310  IF ( .NOT. almort ) THEN
1311 *
1312 * Second Preconditioning Step (QRF [with pivoting])
1313 * Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1314 * equivalent to an LQF CALL. Since in many libraries the QRF
1315 * seems to be better optimized than the LQF, we do explicit
1316 * transpose and use the QRF. This is subject to changes in an
1317 * optimized implementation of ZGEJSV.
1318 *
1319  DO 1968 p = 1, nr
1320  CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1321  CALL zlacgv( n-p+1, v(p,p), 1 )
1322  1968 CONTINUE
1323 *
1324 * .. the following two loops perturb small entries to avoid
1325 * denormals in the second QR factorization, where they are
1326 * as good as zeros. This is done to avoid painfully slow
1327 * computation with denormals. The relative size of the perturbation
1328 * is a parameter that can be changed by the implementer.
1329 * This perturbation device will be obsolete on machines with
1330 * properly implemented arithmetic.
1331 * To switch it off, set L2PERT=.FALSE. To remove it from the
1332 * code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1333 * The following two loops should be blocked and fused with the
1334 * transposed copy above.
1335 *
1336  IF ( l2pert ) THEN
1337  xsc = dsqrt(small)
1338  DO 2969 q = 1, nr
1339  ctemp = dcmplx(xsc*abs( v(q,q) ),zero)
1340  DO 2968 p = 1, n
1341  IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1342  $ .OR. ( p .LT. q ) )
1343 * $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1344  $ v(p,q) = ctemp
1345  IF ( p .LT. q ) v(p,q) = - v(p,q)
1346  2968 CONTINUE
1347  2969 CONTINUE
1348  ELSE
1349  CALL zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
1350  END IF
1351 *
1352 * Estimate the row scaled condition number of R1
1353 * (If R1 is rectangular, N > NR, then the condition number
1354 * of the leading NR x NR submatrix is estimated.)
1355 *
1356  CALL zlacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr )
1357  DO 3950 p = 1, nr
1358  temp1 = dznrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1)
1359  CALL zdscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1)
1360  3950 CONTINUE
1361  CALL zpocon('Lower',nr,cwork(2*n+1),nr,one,temp1,
1362  $ cwork(2*n+nr*nr+1),rwork,ierr)
1363  condr1 = one / dsqrt(temp1)
1364 * .. here need a second oppinion on the condition number
1365 * .. then assume worst case scenario
1366 * R1 is OK for inverse <=> CONDR1 .LT. DFLOAT(N)
1367 * more conservative <=> CONDR1 .LT. SQRT(DFLOAT(N))
1368 *
1369  cond_ok = dsqrt(dsqrt(dfloat(nr)))
1370 *[TP] COND_OK is a tuning parameter.
1371 *
1372  IF ( condr1 .LT. cond_ok ) THEN
1373 * .. the second QRF without pivoting. Note: in an optimized
1374 * implementation, this QRF should be implemented as the QRF
1375 * of a lower triangular matrix.
1376 * R1^* = Q2 * R2
1377  CALL zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1378  $ lwork-2*n, ierr )
1379 *
1380  IF ( l2pert ) THEN
1381  xsc = dsqrt(small)/epsln
1382  DO 3959 p = 2, nr
1383  DO 3958 q = 1, p - 1
1384  ctemp=dcmplx(xsc*dmin1(abs(v(p,p)),abs(v(q,q))),
1385  $ zero)
1386  IF ( abs(v(q,p)) .LE. temp1 )
1387 * $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1388  $ v(q,p) = ctemp
1389  3958 CONTINUE
1390  3959 CONTINUE
1391  END IF
1392 *
1393  IF ( nr .NE. n )
1394  $ CALL zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1395 * .. save ...
1396 *
1397 * .. this transposed copy should be better than naive
1398  DO 1969 p = 1, nr - 1
1399  CALL zcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1400  CALL zlacgv(nr-p+1, v(p,p), 1 )
1401  1969 CONTINUE
1402  v(nr,nr)=dconjg(v(nr,nr))
1403 *
1404  condr2 = condr1
1405 *
1406  ELSE
1407 *
1408 * .. ill-conditioned case: second QRF with pivoting
1409 * Note that windowed pivoting would be equaly good
1410 * numerically, and more run-time efficient. So, in
1411 * an optimal implementation, the next call to ZGEQP3
1412 * should be replaced with eg. CALL ZGEQPX (ACM TOMS #782)
1413 * with properly (carefully) chosen parameters.
1414 *
1415 * R1^* * P2 = Q2 * R2
1416  DO 3003 p = 1, nr
1417  iwork(n+p) = 0
1418  3003 CONTINUE
1419  CALL zgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),
1420  $ cwork(2*n+1), lwork-2*n, rwork, ierr )
1421 ** CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
1422 ** $ LWORK-2*N, IERR )
1423  IF ( l2pert ) THEN
1424  xsc = dsqrt(small)
1425  DO 3969 p = 2, nr
1426  DO 3968 q = 1, p - 1
1427  ctemp=dcmplx(xsc*dmin1(abs(v(p,p)),abs(v(q,q))),
1428  $ zero)
1429  IF ( abs(v(q,p)) .LE. temp1 )
1430 * $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1431  $ v(q,p) = ctemp
1432  3968 CONTINUE
1433  3969 CONTINUE
1434  END IF
1435 *
1436  CALL zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1437 *
1438  IF ( l2pert ) THEN
1439  xsc = dsqrt(small)
1440  DO 8970 p = 2, nr
1441  DO 8971 q = 1, p - 1
1442  ctemp=dcmplx(xsc*dmin1(abs(v(p,p)),abs(v(q,q))),
1443  $ zero)
1444 * V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
1445  v(p,q) = - ctemp
1446  8971 CONTINUE
1447  8970 CONTINUE
1448  ELSE
1449  CALL zlaset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv )
1450  END IF
1451 * Now, compute R2 = L3 * Q3, the LQ factorization.
1452  CALL zgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),
1453  $ cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1454 * .. and estimate the condition number
1455  CALL zlacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr )
1456  DO 4950 p = 1, nr
1457  temp1 = dznrm2( p, cwork(2*n+n*nr+nr+p), nr )
1458  CALL zdscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr )
1459  4950 CONTINUE
1460  CALL zpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,
1461  $ cwork(2*n+n*nr+nr+nr*nr+1),rwork,ierr )
1462  condr2 = one / dsqrt(temp1)
1463 *
1464 *
1465  IF ( condr2 .GE. cond_ok ) THEN
1466 * .. save the Householder vectors used for Q3
1467 * (this overwrittes the copy of R2, as it will not be
1468 * needed in this branch, but it does not overwritte the
1469 * Huseholder vectors of Q2.).
1470  CALL zlacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n )
1471 * .. and the rest of the information on Q3 is in
1472 * WORK(2*N+N*NR+1:2*N+N*NR+N)
1473  END IF
1474 *
1475  END IF
1476 *
1477  IF ( l2pert ) THEN
1478  xsc = dsqrt(small)
1479  DO 4968 q = 2, nr
1480  ctemp = xsc * v(q,q)
1481  DO 4969 p = 1, q - 1
1482 * V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
1483  v(p,q) = - ctemp
1484  4969 CONTINUE
1485  4968 CONTINUE
1486  ELSE
1487  CALL zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1488  END IF
1489 *
1490 * Second preconditioning finished; continue with Jacobi SVD
1491 * The input matrix is lower trinagular.
1492 *
1493 * Recover the right singular vectors as solution of a well
1494 * conditioned triangular matrix equation.
1495 *
1496  IF ( condr1 .LT. cond_ok ) THEN
1497 *
1498  CALL zgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,
1499  $ cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,rwork,
1500  $ lrwork, info )
1501  scalem = rwork(1)
1502  numrank = nint(rwork(2))
1503  DO 3970 p = 1, nr
1504  CALL zcopy( nr, v(1,p), 1, u(1,p), 1 )
1505  CALL zdscal( nr, sva(p), v(1,p), 1 )
1506  3970 CONTINUE
1507 
1508 * .. pick the right matrix equation and solve it
1509 *
1510  IF ( nr .EQ. n ) THEN
1511 * :)) .. best case, R1 is inverted. The solution of this matrix
1512 * equation is Q2*V2 = the product of the Jacobi rotations
1513 * used in ZGESVJ, premultiplied with the orthogonal matrix
1514 * from the second QR factorization.
1515  CALL ztrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv)
1516  ELSE
1517 * .. R1 is well conditioned, but non-square. Adjoint of R2
1518 * is inverted to get the product of the Jacobi rotations
1519 * used in ZGESVJ. The Q-factor from the second QR
1520 * factorization is then built in explicitly.
1521  CALL ztrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),
1522  $ n,v,ldv)
1523  IF ( nr .LT. n ) THEN
1524  CALL zlaset('A',n-nr,nr,zero,czero,v(nr+1,1),ldv)
1525  CALL zlaset('A',nr,n-nr,zero,czero,v(1,nr+1),ldv)
1526  CALL zlaset('A',n-nr,n-nr,zero,cone,v(nr+1,nr+1),ldv)
1527  END IF
1528  CALL zunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1529  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1530  END IF
1531 *
1532  ELSE IF ( condr2 .LT. cond_ok ) THEN
1533 *
1534 * The matrix R2 is inverted. The solution of the matrix equation
1535 * is Q3^* * V3 = the product of the Jacobi rotations (appplied to
1536 * the lower triangular L3 from the LQ factorization of
1537 * R2=L3*Q3), pre-multiplied with the transposed Q3.
1538  CALL zgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1539  $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1540  $ rwork, lrwork, info )
1541  scalem = rwork(1)
1542  numrank = nint(rwork(2))
1543  DO 3870 p = 1, nr
1544  CALL zcopy( nr, v(1,p), 1, u(1,p), 1 )
1545  CALL zdscal( nr, sva(p), u(1,p), 1 )
1546  3870 CONTINUE
1547  CALL ztrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,
1548  $ u,ldu)
1549 * .. apply the permutation from the second QR factorization
1550  DO 873 q = 1, nr
1551  DO 872 p = 1, nr
1552  cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1553  872 CONTINUE
1554  DO 874 p = 1, nr
1555  u(p,q) = cwork(2*n+n*nr+nr+p)
1556  874 CONTINUE
1557  873 CONTINUE
1558  IF ( nr .LT. n ) THEN
1559  CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1560  CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1561  CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1562  END IF
1563  CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1564  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1565  ELSE
1566 * Last line of defense.
1567 * #:( This is a rather pathological case: no scaled condition
1568 * improvement after two pivoted QR factorizations. Other
1569 * possibility is that the rank revealing QR factorization
1570 * or the condition estimator has failed, or the COND_OK
1571 * is set very close to ONE (which is unnecessary). Normally,
1572 * this branch should never be executed, but in rare cases of
1573 * failure of the RRQR or condition estimator, the last line of
1574 * defense ensures that ZGEJSV completes the task.
1575 * Compute the full SVD of L3 using ZGESVJ with explicit
1576 * accumulation of Jacobi rotations.
1577  CALL zgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1578  $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1579  $ rwork, lrwork, info )
1580  scalem = rwork(1)
1581  numrank = nint(rwork(2))
1582  IF ( nr .LT. n ) THEN
1583  CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1584  CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1585  CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1586  END IF
1587  CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1588  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1589 *
1590  CALL zunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,
1591  $ cwork(2*n+n*nr+1), u, ldu, cwork(2*n+n*nr+nr+1),
1592  $ lwork-2*n-n*nr-nr, ierr )
1593  DO 773 q = 1, nr
1594  DO 772 p = 1, nr
1595  cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1596  772 CONTINUE
1597  DO 774 p = 1, nr
1598  u(p,q) = cwork(2*n+n*nr+nr+p)
1599  774 CONTINUE
1600  773 CONTINUE
1601 *
1602  END IF
1603 *
1604 * Permute the rows of V using the (column) permutation from the
1605 * first QRF. Also, scale the columns to make them unit in
1606 * Euclidean norm. This applies to all cases.
1607 *
1608  temp1 = dsqrt(dfloat(n)) * epsln
1609  DO 1972 q = 1, n
1610  DO 972 p = 1, n
1611  cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1612  972 CONTINUE
1613  DO 973 p = 1, n
1614  v(p,q) = cwork(2*n+n*nr+nr+p)
1615  973 CONTINUE
1616  xsc = one / dznrm2( n, v(1,q), 1 )
1617  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1618  $ CALL zdscal( n, xsc, v(1,q), 1 )
1619  1972 CONTINUE
1620 * At this moment, V contains the right singular vectors of A.
1621 * Next, assemble the left singular vector matrix U (M x N).
1622  IF ( nr .LT. m ) THEN
1623  CALL zlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu)
1624  IF ( nr .LT. n1 ) THEN
1625  CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1626  CALL zlaset('A',m-nr,n1-nr,czero,cone,
1627  $ u(nr+1,nr+1),ldu)
1628  END IF
1629  END IF
1630 *
1631 * The Q matrix from the first QRF is built into the left singular
1632 * matrix U. This applies to all cases.
1633 *
1634  CALL zunmqr( 'Left', 'No_Tr', m, n1, n, a, lda, cwork, u,
1635  $ ldu, cwork(n+1), lwork-n, ierr )
1636 
1637 * The columns of U are normalized. The cost is O(M*N) flops.
1638  temp1 = dsqrt(dfloat(m)) * epsln
1639  DO 1973 p = 1, nr
1640  xsc = one / dznrm2( m, u(1,p), 1 )
1641  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1642  $ CALL zdscal( m, xsc, u(1,p), 1 )
1643  1973 CONTINUE
1644 *
1645 * If the initial QRF is computed with row pivoting, the left
1646 * singular vectors must be adjusted.
1647 *
1648  IF ( rowpiv )
1649  $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1650 *
1651  ELSE
1652 *
1653 * .. the initial matrix A has almost orthogonal columns and
1654 * the second QRF is not needed
1655 *
1656  CALL zlacpy( 'Upper', n, n, a, lda, cwork(n+1), n )
1657  IF ( l2pert ) THEN
1658  xsc = dsqrt(small)
1659  DO 5970 p = 2, n
1660  ctemp = xsc * cwork( n + (p-1)*n + p )
1661  DO 5971 q = 1, p - 1
1662 * CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
1663 * $ ABS(CWORK(N+(p-1)*N+q)) )
1664  cwork(n+(q-1)*n+p)=-ctemp
1665  5971 CONTINUE
1666  5970 CONTINUE
1667  ELSE
1668  CALL zlaset( 'Lower',n-1,n-1,czero,czero,cwork(n+2),n )
1669  END IF
1670 *
1671  CALL zgesvj( 'Upper', 'U', 'N', n, n, cwork(n+1), n, sva,
1672  $ n, u, ldu, cwork(n+n*n+1), lwork-n-n*n, rwork, lrwork,
1673  $ info )
1674 *
1675  scalem = rwork(1)
1676  numrank = nint(rwork(2))
1677  DO 6970 p = 1, n
1678  CALL zcopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 )
1679  CALL zdscal( n, sva(p), cwork(n+(p-1)*n+1), 1 )
1680  6970 CONTINUE
1681 *
1682  CALL ztrsm( 'Left', 'Upper', 'NoTrans', 'No UD', n, n,
1683  $ cone, a, lda, cwork(n+1), n )
1684  DO 6972 p = 1, n
1685  CALL zcopy( n, cwork(n+p), n, v(iwork(p),1), ldv )
1686  6972 CONTINUE
1687  temp1 = dsqrt(dfloat(n))*epsln
1688  DO 6971 p = 1, n
1689  xsc = one / dznrm2( n, v(1,p), 1 )
1690  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1691  $ CALL zdscal( n, xsc, v(1,p), 1 )
1692  6971 CONTINUE
1693 *
1694 * Assemble the left singular vector matrix U (M x N).
1695 *
1696  IF ( n .LT. m ) THEN
1697  CALL zlaset( 'A', m-n, n, czero, czero, u(n+1,1), ldu )
1698  IF ( n .LT. n1 ) THEN
1699  CALL zlaset('A',n, n1-n, czero, czero, u(1,n+1),ldu)
1700  CALL zlaset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu)
1701  END IF
1702  END IF
1703  CALL zunmqr( 'Left', 'No Tr', m, n1, n, a, lda, cwork, u,
1704  $ ldu, cwork(n+1), lwork-n, ierr )
1705  temp1 = dsqrt(dfloat(m))*epsln
1706  DO 6973 p = 1, n1
1707  xsc = one / dznrm2( m, u(1,p), 1 )
1708  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1709  $ CALL zdscal( m, xsc, u(1,p), 1 )
1710  6973 CONTINUE
1711 *
1712  IF ( rowpiv )
1713  $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1714 *
1715  END IF
1716 *
1717 * end of the >> almost orthogonal case << in the full SVD
1718 *
1719  ELSE
1720 *
1721 * This branch deploys a preconditioned Jacobi SVD with explicitly
1722 * accumulated rotations. It is included as optional, mainly for
1723 * experimental purposes. It does perfom well, and can also be used.
1724 * In this implementation, this branch will be automatically activated
1725 * if the condition number sigma_max(A) / sigma_min(A) is predicted
1726 * to be greater than the overflow threshold. This is because the
1727 * a posteriori computation of the singular vectors assumes robust
1728 * implementation of BLAS and some LAPACK procedures, capable of working
1729 * in presence of extreme values. Since that is not always the case, ...
1730 *
1731  DO 7968 p = 1, nr
1732  CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1733  CALL zlacgv( n-p+1, v(p,p), 1 )
1734  7968 CONTINUE
1735 *
1736  IF ( l2pert ) THEN
1737  xsc = dsqrt(small/epsln)
1738  DO 5969 q = 1, nr
1739  ctemp = dcmplx(xsc*abs( v(q,q) ),zero)
1740  DO 5968 p = 1, n
1741  IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1742  $ .OR. ( p .LT. q ) )
1743 * $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1744  $ v(p,q) = ctemp
1745  IF ( p .LT. q ) v(p,q) = - v(p,q)
1746  5968 CONTINUE
1747  5969 CONTINUE
1748  ELSE
1749  CALL zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
1750  END IF
1751 
1752  CALL zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1753  $ lwork-2*n, ierr )
1754  CALL zlacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n )
1755 *
1756  DO 7969 p = 1, nr
1757  CALL zcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
1758  CALL zlacgv( nr-p+1, u(p,p), 1 )
1759  7969 CONTINUE
1760 
1761  IF ( l2pert ) THEN
1762  xsc = dsqrt(small/epsln)
1763  DO 9970 q = 2, nr
1764  DO 9971 p = 1, q - 1
1765  ctemp = dcmplx(xsc * dmin1(abs(u(p,p)),abs(u(q,q))),
1766  $ zero)
1767 * U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
1768  u(p,q) = - ctemp
1769  9971 CONTINUE
1770  9970 CONTINUE
1771  ELSE
1772  CALL zlaset('U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1773  END IF
1774 
1775  CALL zgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
1776  $ n, v, ldv, cwork(2*n+n*nr+1), lwork-2*n-n*nr,
1777  $ rwork, lrwork, info )
1778  scalem = rwork(1)
1779  numrank = nint(rwork(2))
1780 
1781  IF ( nr .LT. n ) THEN
1782  CALL zlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1783  CALL zlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1784  CALL zlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1785  END IF
1786 
1787  CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1788  $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1789 *
1790 * Permute the rows of V using the (column) permutation from the
1791 * first QRF. Also, scale the columns to make them unit in
1792 * Euclidean norm. This applies to all cases.
1793 *
1794  temp1 = dsqrt(dfloat(n)) * epsln
1795  DO 7972 q = 1, n
1796  DO 8972 p = 1, n
1797  cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1798  8972 CONTINUE
1799  DO 8973 p = 1, n
1800  v(p,q) = cwork(2*n+n*nr+nr+p)
1801  8973 CONTINUE
1802  xsc = one / dznrm2( n, v(1,q), 1 )
1803  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1804  $ CALL zdscal( n, xsc, v(1,q), 1 )
1805  7972 CONTINUE
1806 *
1807 * At this moment, V contains the right singular vectors of A.
1808 * Next, assemble the left singular vector matrix U (M x N).
1809 *
1810  IF ( nr .LT. m ) THEN
1811  CALL zlaset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu )
1812  IF ( nr .LT. n1 ) THEN
1813  CALL zlaset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu)
1814  CALL zlaset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu)
1815  END IF
1816  END IF
1817 *
1818  CALL zunmqr( 'Left', 'No Tr', m, n1, n, a, lda, cwork, u,
1819  $ ldu, cwork(n+1), lwork-n, ierr )
1820 *
1821  IF ( rowpiv )
1822  $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1823 *
1824 *
1825  END IF
1826  IF ( transp ) THEN
1827 * .. swap U and V because the procedure worked on A^*
1828  DO 6974 p = 1, n
1829  CALL zswap( n, u(1,p), 1, v(1,p), 1 )
1830  6974 CONTINUE
1831  END IF
1832 *
1833  END IF
1834 * end of the full SVD
1835 *
1836 * Undo scaling, if necessary (and possible)
1837 *
1838  IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
1839  CALL zlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
1840  uscal1 = one
1841  uscal2 = one
1842  END IF
1843 *
1844  IF ( nr .LT. n ) THEN
1845  DO 3004 p = nr+1, n
1846  sva(p) = zero
1847  3004 CONTINUE
1848  END IF
1849 *
1850  rwork(1) = uscal2 * scalem
1851  rwork(2) = uscal1
1852  IF ( errest ) rwork(3) = sconda
1853  IF ( lsvec .AND. rsvec ) THEN
1854  rwork(4) = condr1
1855  rwork(5) = condr2
1856  END IF
1857  IF ( l2tran ) THEN
1858  rwork(6) = entra
1859  rwork(7) = entrat
1860  END IF
1861 *
1862  iwork(1) = nr
1863  iwork(2) = numrank
1864  iwork(3) = warning
1865 *
1866  RETURN
1867 * ..
1868 * .. END OF ZGEJSV
1869 * ..
subroutine zlassq(N, X, INCX, SCALE, SUMSQ)
ZLASSQ updates a sum of squares represented in scaled form.
Definition: zlassq.f:108
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:105
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:52
subroutine zunmlq(SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
ZUNMLQ
Definition: zunmlq.f:169
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine zgeqp3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, INFO)
ZGEQP3
Definition: zgeqp3.f:161
subroutine zgesvj(JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO)
ZGESVJ
Definition: zgesvj.f:344
subroutine zungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGQR
Definition: zungqr.f:130
subroutine zlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: zlascl.f:141
subroutine zlacgv(N, X, INCX)
ZLACGV conjugates a complex vector.
Definition: zlacgv.f:76
double precision function dlamch(CMACH)
DLAMCH
Definition: dlamch.f:65
subroutine zgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGELQF
Definition: zgelqf.f:137
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: zlaset.f:108
double precision function dznrm2(N, X, INCX)
DZNRM2
Definition: dznrm2.f:56
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine ztrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRSM
Definition: ztrsm.f:182
subroutine zgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition: zgeqrf.f:151
subroutine zlaswp(N, A, LDA, K1, K2, IPIV, INCX)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: zlaswp.f:116
integer function idamax(N, DX, INCX)
IDAMAX
Definition: idamax.f:53
subroutine zunmqr(SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
ZUNMQR
Definition: zunmqr.f:169
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:52
subroutine zpocon(UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, INFO)
ZPOCON
Definition: zpocon.f:123
subroutine dscal(N, DA, DX, INCX)
DSCAL
Definition: dscal.f:55
subroutine zdscal(N, DA, ZX, INCX)
ZDSCAL
Definition: zdscal.f:54

Here is the call graph for this function:

Here is the caller graph for this function:

subroutine zgesdd ( character  JOBZ,
integer  M,
integer  N,
complex*16, dimension( lda, * )  A,
integer  LDA,
double precision, dimension( * )  S,
complex*16, dimension( ldu, * )  U,
integer  LDU,
complex*16, dimension( ldvt, * )  VT,
integer  LDVT,
complex*16, dimension( * )  WORK,
integer  LWORK,
double precision, dimension( * )  RWORK,
integer, dimension( * )  IWORK,
integer  INFO 
)

ZGESDD

Download ZGESDD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZGESDD computes the singular value decomposition (SVD) of a complex
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors, by using divide-and-conquer method. The SVD is written

      A = U * SIGMA * conjugate-transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
 V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns VT = V**H, not V.

 The divide and conquer algorithm makes very mild assumptions about
 floating point arithmetic. It will work on machines with a guard
 digit in add/subtract, or on those binary machines without guard
 digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
 Cray-2. It could conceivably fail on hexadecimal or decimal machines
 without guard digits, but we know of none.
Parameters
[in]JOBZ
          JOBZ is CHARACTER*1
          Specifies options for computing all or part of the matrix U:
          = 'A':  all M columns of U and all N rows of V**H are
                  returned in the arrays U and VT;
          = 'S':  the first min(M,N) columns of U and the first
                  min(M,N) rows of V**H are returned in the arrays U
                  and VT;
          = 'O':  If M >= N, the first N columns of U are overwritten
                  in the array A and all rows of V**H are returned in
                  the array VT;
                  otherwise, all columns of U are returned in the
                  array U and the first M rows of V**H are overwritten
                  in the array A;
          = 'N':  no columns of U or rows of V**H are computed.
[in]M
          M is INTEGER
          The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.  N >= 0.
[in,out]A
          A is COMPLEX*16 array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          if JOBZ = 'O',  A is overwritten with the first N columns
                          of U (the left singular vectors, stored
                          columnwise) if M >= N;
                          A is overwritten with the first M rows
                          of V**H (the right singular vectors, stored
                          rowwise) otherwise.
          if JOBZ .ne. 'O', the contents of A are destroyed.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]S
          S is DOUBLE PRECISION array, dimension (min(M,N))
          The singular values of A, sorted so that S(i) >= S(i+1).
[out]U
          U is COMPLEX*16 array, dimension (LDU,UCOL)
          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
          UCOL = min(M,N) if JOBZ = 'S'.
          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
          unitary matrix U;
          if JOBZ = 'S', U contains the first min(M,N) columns of U
          (the left singular vectors, stored columnwise);
          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U.  LDU >= 1; if
          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
[out]VT
          VT is COMPLEX*16 array, dimension (LDVT,N)
          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
          N-by-N unitary matrix V**H;
          if JOBZ = 'S', VT contains the first min(M,N) rows of
          V**H (the right singular vectors, stored rowwise);
          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
[in]LDVT
          LDVT is INTEGER
          The leading dimension of the array VT.  LDVT >= 1; if
          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
          if JOBZ = 'S', LDVT >= min(M,N).
[out]WORK
          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK. LWORK >= 1.
          if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
          if JOBZ = 'O',
                LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
          if JOBZ = 'S' or 'A',
                LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
          For good performance, LWORK should generally be larger.

          If LWORK = -1, a workspace query is assumed.  The optimal
          size for the WORK array is calculated and stored in WORK(1),
          and no other work except argument checking is performed.
[out]RWORK
          RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK))
          If JOBZ = 'N', LRWORK >= 7*min(M,N).
          Otherwise,
          LRWORK >= min(M,N)*max(5*min(M,N)+7,2*max(M,N)+2*min(M,N)+1)
[out]IWORK
          IWORK is INTEGER array, dimension (8*min(M,N))
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  The updating process of DBDSDC did not converge.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
November 2015
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA

Definition at line 224 of file zgesdd.f.

224 *
225 * -- LAPACK driver routine (version 3.6.0) --
226 * -- LAPACK is a software package provided by Univ. of Tennessee, --
227 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
228 * November 2015
229 *
230 * .. Scalar Arguments ..
231  CHARACTER jobz
232  INTEGER info, lda, ldu, ldvt, lwork, m, n
233 * ..
234 * .. Array Arguments ..
235  INTEGER iwork( * )
236  DOUBLE PRECISION rwork( * ), s( * )
237  COMPLEX*16 a( lda, * ), u( ldu, * ), vt( ldvt, * ),
238  $ work( * )
239 * ..
240 *
241 * =====================================================================
242 *
243 * .. Parameters ..
244  INTEGER lquerv
245  parameter( lquerv = -1 )
246  COMPLEX*16 czero, cone
247  parameter( czero = ( 0.0d+0, 0.0d+0 ),
248  $ cone = ( 1.0d+0, 0.0d+0 ) )
249  DOUBLE PRECISION zero, one
250  parameter( zero = 0.0d+0, one = 1.0d+0 )
251 * ..
252 * .. Local Scalars ..
253  LOGICAL wntqa, wntqas, wntqn, wntqo, wntqs
254  INTEGER blk, chunk, i, ie, ierr, il, ir, iru, irvt,
255  $ iscl, itau, itaup, itauq, iu, ivt, ldwkvt,
256  $ ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,
257  $ mnthr1, mnthr2, nrwork, nwork, wrkbl
258  DOUBLE PRECISION anrm, bignum, eps, smlnum
259 * ..
260 * .. Local Arrays ..
261  INTEGER idum( 1 )
262  DOUBLE PRECISION dum( 1 )
263 * ..
264 * .. External Subroutines ..
265  EXTERNAL dbdsdc, dlascl, xerbla, zgebrd, zgelqf, zgemm,
268 * ..
269 * .. External Functions ..
270  LOGICAL lsame
271  INTEGER ilaenv
272  DOUBLE PRECISION dlamch, zlange
273  EXTERNAL lsame, ilaenv, dlamch, zlange
274 * ..
275 * .. Intrinsic Functions ..
276  INTRINSIC int, max, min, sqrt
277 * ..
278 * .. Executable Statements ..
279 *
280 * Test the input arguments
281 *
282  info = 0
283  minmn = min( m, n )
284  mnthr1 = int( minmn*17.0d0 / 9.0d0 )
285  mnthr2 = int( minmn*5.0d0 / 3.0d0 )
286  wntqa = lsame( jobz, 'A' )
287  wntqs = lsame( jobz, 'S' )
288  wntqas = wntqa .OR. wntqs
289  wntqo = lsame( jobz, 'O' )
290  wntqn = lsame( jobz, 'N' )
291  minwrk = 1
292  maxwrk = 1
293 *
294  IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
295  info = -1
296  ELSE IF( m.LT.0 ) THEN
297  info = -2
298  ELSE IF( n.LT.0 ) THEN
299  info = -3
300  ELSE IF( lda.LT.max( 1, m ) ) THEN
301  info = -5
302  ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
303  $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
304  info = -8
305  ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
306  $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
307  $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
308  info = -10
309  END IF
310 *
311 * Compute workspace
312 * (Note: Comments in the code beginning "Workspace:" describe the
313 * minimal amount of workspace needed at that point in the code,
314 * as well as the preferred amount for good performance.
315 * CWorkspace refers to complex workspace, and RWorkspace to
316 * real workspace. NB refers to the optimal block size for the
317 * immediately following subroutine, as returned by ILAENV.)
318 *
319  IF( info.EQ.0 .AND. m.GT.0 .AND. n.GT.0 ) THEN
320  IF( m.GE.n ) THEN
321 *
322 * There is no complex work space needed for bidiagonal SVD
323 * The real work space needed for bidiagonal SVD is BDSPAC
324 * for computing singular values and singular vectors; BDSPAN
325 * for computing singular values only.
326 * BDSPAC = 5*N*N + 7*N
327 * BDSPAN = MAX(7*N+4, 3*N+2+SMLSIZ*(SMLSIZ+8))
328 *
329  IF( m.GE.mnthr1 ) THEN
330  IF( wntqn ) THEN
331 *
332 * Path 1 (M much larger than N, JOBZ='N')
333 *
334  maxwrk = n + n*ilaenv( 1, 'ZGEQRF', ' ', m, n, -1,
335  $ -1 )
336  maxwrk = max( maxwrk, 2*n+2*n*
337  $ ilaenv( 1, 'ZGEBRD', ' ', n, n, -1, -1 ) )
338  minwrk = 3*n
339  ELSE IF( wntqo ) THEN
340 *
341 * Path 2 (M much larger than N, JOBZ='O')
342 *
343  wrkbl = n + n*ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 )
344  wrkbl = max( wrkbl, n+n*ilaenv( 1, 'ZUNGQR', ' ', m,
345  $ n, n, -1 ) )
346  wrkbl = max( wrkbl, 2*n+2*n*
347  $ ilaenv( 1, 'ZGEBRD', ' ', n, n, -1, -1 ) )
348  wrkbl = max( wrkbl, 2*n+n*
349  $ ilaenv( 1, 'ZUNMBR', 'QLN', n, n, n, -1 ) )
350  wrkbl = max( wrkbl, 2*n+n*
351  $ ilaenv( 1, 'ZUNMBR', 'PRC', n, n, n, -1 ) )
352  maxwrk = m*n + n*n + wrkbl
353  minwrk = 2*n*n + 3*n
354  ELSE IF( wntqs ) THEN
355 *
356 * Path 3 (M much larger than N, JOBZ='S')
357 *
358  wrkbl = n + n*ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 )
359  wrkbl = max( wrkbl, n+n*ilaenv( 1, 'ZUNGQR', ' ', m,
360  $ n, n, -1 ) )
361  wrkbl = max( wrkbl, 2*n+2*n*
362  $ ilaenv( 1, 'ZGEBRD', ' ', n, n, -1, -1 ) )
363  wrkbl = max( wrkbl, 2*n+n*
364  $ ilaenv( 1, 'ZUNMBR', 'QLN', n, n, n, -1 ) )
365  wrkbl = max( wrkbl, 2*n+n*
366  $ ilaenv( 1, 'ZUNMBR', 'PRC', n, n, n, -1 ) )
367  maxwrk = n*n + wrkbl
368  minwrk = n*n + 3*n
369  ELSE IF( wntqa ) THEN
370 *
371 * Path 4 (M much larger than N, JOBZ='A')
372 *
373  wrkbl = n + n*ilaenv( 1, 'ZGEQRF', ' ', m, n, -1, -1 )
374  wrkbl = max( wrkbl, n+m*ilaenv( 1, 'ZUNGQR', ' ', m,
375  $ m, n, -1 ) )
376  wrkbl = max( wrkbl, 2*n+2*n*
377  $ ilaenv( 1, 'ZGEBRD', ' ', n, n, -1, -1 ) )
378  wrkbl = max( wrkbl, 2*n+n*
379  $ ilaenv( 1, 'ZUNMBR', 'QLN', n, n, n, -1 ) )
380  wrkbl = max( wrkbl, 2*n+n*
381  $ ilaenv( 1, 'ZUNMBR', 'PRC', n, n, n, -1 ) )
382  maxwrk = n*n + wrkbl
383  minwrk = n*n + 2*n + m
384  END IF
385  ELSE IF( m.GE.mnthr2 ) THEN
386 *
387 * Path 5 (M much larger than N, but not as much as MNTHR1)
388 *
389  maxwrk = 2*n + ( m+n )*ilaenv( 1, 'ZGEBRD', ' ', m, n,
390  $ -1, -1 )
391  minwrk = 2*n + m
392  IF( wntqo ) THEN
393  maxwrk = max( maxwrk, 2*n+n*
394  $ ilaenv( 1, 'ZUNGBR', 'P', n, n, n, -1 ) )
395  maxwrk = max( maxwrk, 2*n+n*
396  $ ilaenv( 1, 'ZUNGBR', 'Q', m, n, n, -1 ) )
397  maxwrk = maxwrk + m*n
398  minwrk = minwrk + n*n
399  ELSE IF( wntqs ) THEN
400  maxwrk = max( maxwrk, 2*n+n*
401  $ ilaenv( 1, 'ZUNGBR', 'P', n, n, n, -1 ) )
402  maxwrk = max( maxwrk, 2*n+n*
403  $ ilaenv( 1, 'ZUNGBR', 'Q', m, n, n, -1 ) )
404  ELSE IF( wntqa ) THEN
405  maxwrk = max( maxwrk, 2*n+n*
406  $ ilaenv( 1, 'ZUNGBR', 'P', n, n, n, -1 ) )
407  maxwrk = max( maxwrk, 2*n+m*
408  $ ilaenv( 1, 'ZUNGBR', 'Q', m, m, n, -1 ) )
409  END IF
410  ELSE
411 *
412 * Path 6 (M at least N, but not much larger)
413 *
414  maxwrk = 2*n + ( m+n )*ilaenv( 1, 'ZGEBRD', ' ', m, n,
415  $ -1, -1 )
416  minwrk = 2*n + m
417  IF( wntqo ) THEN
418  maxwrk = max( maxwrk, 2*n+n*
419  $ ilaenv( 1, 'ZUNMBR', 'PRC', n, n, n, -1 ) )
420  maxwrk = max( maxwrk, 2*n+n*
421  $ ilaenv( 1, 'ZUNMBR', 'QLN', m, n, n, -1 ) )
422  maxwrk = maxwrk + m*n
423  minwrk = minwrk + n*n
424  ELSE IF( wntqs ) THEN
425  maxwrk = max( maxwrk, 2*n+n*
426  $ ilaenv( 1, 'ZUNMBR', 'PRC', n, n, n, -1 ) )
427  maxwrk = max( maxwrk, 2*n+n*
428  $ ilaenv( 1, 'ZUNMBR', 'QLN', m, n, n, -1 ) )
429  ELSE IF( wntqa ) THEN
430  maxwrk = max( maxwrk, 2*n+n*
431  $ ilaenv( 1, 'ZUNGBR', 'PRC', n, n, n, -1 ) )
432  maxwrk = max( maxwrk, 2*n+m*
433  $ ilaenv( 1, 'ZUNGBR', 'QLN', m, m, n, -1 ) )
434  END IF
435  END IF
436  ELSE
437 *
438 * There is no complex work space needed for bidiagonal SVD
439 * The real work space needed for bidiagonal SVD is BDSPAC
440 * for computing singular values and singular vectors; BDSPAN
441 * for computing singular values only.
442 * BDSPAC = 5*M*M + 7*M
443 * BDSPAN = MAX(7*M+4, 3*M+2+SMLSIZ*(SMLSIZ+8))
444 *
445  IF( n.GE.mnthr1 ) THEN
446  IF( wntqn ) THEN
447 *
448 * Path 1t (N much larger than M, JOBZ='N')
449 *
450  maxwrk = m + m*ilaenv( 1, 'ZGELQF', ' ', m, n, -1,
451  $ -1 )
452  maxwrk = max( maxwrk, 2*m+2*m*
453  $ ilaenv( 1, 'ZGEBRD', ' ', m, m, -1, -1 ) )
454  minwrk = 3*m
455  ELSE IF( wntqo ) THEN
456 *
457 * Path 2t (N much larger than M, JOBZ='O')
458 *
459  wrkbl = m + m*ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 )
460  wrkbl = max( wrkbl, m+m*ilaenv( 1, 'ZUNGLQ', ' ', m,
461  $ n, m, -1 ) )
462  wrkbl = max( wrkbl, 2*m+2*m*
463  $ ilaenv( 1, 'ZGEBRD', ' ', m, m, -1, -1 ) )
464  wrkbl = max( wrkbl, 2*m+m*
465  $ ilaenv( 1, 'ZUNMBR', 'PRC', m, m, m, -1 ) )
466  wrkbl = max( wrkbl, 2*m+m*
467  $ ilaenv( 1, 'ZUNMBR', 'QLN', m, m, m, -1 ) )
468  maxwrk = m*n + m*m + wrkbl
469  minwrk = 2*m*m + 3*m
470  ELSE IF( wntqs ) THEN
471 *
472 * Path 3t (N much larger than M, JOBZ='S')
473 *
474  wrkbl = m + m*ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 )
475  wrkbl = max( wrkbl, m+m*ilaenv( 1, 'ZUNGLQ', ' ', m,
476  $ n, m, -1 ) )
477  wrkbl = max( wrkbl, 2*m+2*m*
478  $ ilaenv( 1, 'ZGEBRD', ' ', m, m, -1, -1 ) )
479  wrkbl = max( wrkbl, 2*m+m*
480  $ ilaenv( 1, 'ZUNMBR', 'PRC', m, m, m, -1 ) )
481  wrkbl = max( wrkbl, 2*m+m*
482  $ ilaenv( 1, 'ZUNMBR', 'QLN', m, m, m, -1 ) )
483  maxwrk = m*m + wrkbl
484  minwrk = m*m + 3*m
485  ELSE IF( wntqa ) THEN
486 *
487 * Path 4t (N much larger than M, JOBZ='A')
488 *
489  wrkbl = m + m*ilaenv( 1, 'ZGELQF', ' ', m, n, -1, -1 )
490  wrkbl = max( wrkbl, m+n*ilaenv( 1, 'ZUNGLQ', ' ', n,
491  $ n, m, -1 ) )
492  wrkbl = max( wrkbl, 2*m+2*m*
493  $ ilaenv( 1, 'ZGEBRD', ' ', m, m, -1, -1 ) )
494  wrkbl = max( wrkbl, 2*m+m*
495  $ ilaenv( 1, 'ZUNMBR', 'PRC', m, m, m, -1 ) )
496  wrkbl = max( wrkbl, 2*m+m*
497  $ ilaenv( 1, 'ZUNMBR', 'QLN', m, m, m, -1 ) )
498  maxwrk = m*m + wrkbl
499  minwrk = m*m + 2*m + n
500  END IF
501  ELSE IF( n.GE.mnthr2 ) THEN
502 *
503 * Path 5t (N much larger than M, but not as much as MNTHR1)
504 *
505  maxwrk = 2*m + ( m+n )*ilaenv( 1, 'ZGEBRD', ' ', m, n,
506  $ -1, -1 )
507  minwrk = 2*m + n
508  IF( wntqo ) THEN
509  maxwrk = max( maxwrk, 2*m+m*
510  $ ilaenv( 1, 'ZUNGBR', 'P', m, n, m, -1 ) )
511  maxwrk = max( maxwrk, 2*m+m*
512  $ ilaenv( 1, 'ZUNGBR', 'Q', m, m, n, -1 ) )
513  maxwrk = maxwrk + m*n
514  minwrk = minwrk + m*m
515  ELSE IF( wntqs ) THEN
516  maxwrk = max( maxwrk, 2*m+m*
517  $ ilaenv( 1, 'ZUNGBR', 'P', m, n, m, -1 ) )
518  maxwrk = max( maxwrk, 2*m+m*
519  $ ilaenv( 1, 'ZUNGBR', 'Q', m, m, n, -1 ) )
520  ELSE IF( wntqa ) THEN
521  maxwrk = max( maxwrk, 2*m+n*
522  $ ilaenv( 1, 'ZUNGBR', 'P', n, n, m, -1 ) )
523  maxwrk = max( maxwrk, 2*m+m*
524  $ ilaenv( 1, 'ZUNGBR', 'Q', m, m, n, -1 ) )
525  END IF
526  ELSE
527 *
528 * Path 6t (N greater than M, but not much larger)
529 *
530  maxwrk = 2*m + ( m+n )*ilaenv( 1, 'ZGEBRD', ' ', m, n,
531  $ -1, -1 )
532  minwrk = 2*m + n
533  IF( wntqo ) THEN
534  maxwrk = max( maxwrk, 2*m+m*
535  $ ilaenv( 1, 'ZUNMBR', 'PRC', m, n, m, -1 ) )
536  maxwrk = max( maxwrk, 2*m+m*
537  $ ilaenv( 1, 'ZUNMBR', 'QLN', m, m, n, -1 ) )
538  maxwrk = maxwrk + m*n
539  minwrk = minwrk + m*m
540  ELSE IF( wntqs ) THEN
541  maxwrk = max( maxwrk, 2*m+m*
542  $ ilaenv( 1, 'ZUNGBR', 'PRC', m, n, m, -1 ) )
543  maxwrk = max( maxwrk, 2*m+m*
544  $ ilaenv( 1, 'ZUNGBR', 'QLN', m, m, n, -1 ) )
545  ELSE IF( wntqa ) THEN
546  maxwrk = max( maxwrk, 2*m+n*
547  $ ilaenv( 1, 'ZUNGBR', 'PRC', n, n, m, -1 ) )
548  maxwrk = max( maxwrk, 2*m+m*
549  $ ilaenv( 1, 'ZUNGBR', 'QLN', m, m, n, -1 ) )
550  END IF
551  END IF
552  END IF
553  maxwrk = max( maxwrk, minwrk )
554  END IF
555  IF( info.EQ.0 ) THEN
556  work( 1 ) = maxwrk
557  IF( lwork.LT.minwrk .AND. lwork.NE.lquerv )
558  $ info = -13
559  END IF
560 *
561 * Quick returns
562 *
563  IF( info.NE.0 ) THEN
564  CALL xerbla( 'ZGESDD', -info )
565  RETURN
566  END IF
567  IF( lwork.EQ.lquerv )
568  $ RETURN
569  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
570  RETURN
571  END IF
572 *
573 * Get machine constants
574 *
575  eps = dlamch( 'P' )
576  smlnum = sqrt( dlamch( 'S' ) ) / eps
577  bignum = one / smlnum
578 *
579 * Scale A if max element outside range [SMLNUM,BIGNUM]
580 *
581  anrm = zlange( 'M', m, n, a, lda, dum )
582  iscl = 0
583  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
584  iscl = 1
585  CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
586  ELSE IF( anrm.GT.bignum ) THEN
587  iscl = 1
588  CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
589  END IF
590 *
591  IF( m.GE.n ) THEN
592 *
593 * A has at least as many rows as columns. If A has sufficiently
594 * more rows than columns, first reduce using the QR
595 * decomposition (if sufficient workspace available)
596 *
597  IF( m.GE.mnthr1 ) THEN
598 *
599  IF( wntqn ) THEN
600 *
601 * Path 1 (M much larger than N, JOBZ='N')
602 * No singular vectors to be computed
603 *
604  itau = 1
605  nwork = itau + n
606 *
607 * Compute A=Q*R
608 * (CWorkspace: need 2*N, prefer N+N*NB)
609 * (RWorkspace: need 0)
610 *
611  CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
612  $ lwork-nwork+1, ierr )
613 *
614 * Zero out below R
615 *
616  CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
617  $ lda )
618  ie = 1
619  itauq = 1
620  itaup = itauq + n
621  nwork = itaup + n
622 *
623 * Bidiagonalize R in A
624 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
625 * (RWorkspace: need N)
626 *
627  CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
628  $ work( itaup ), work( nwork ), lwork-nwork+1,
629  $ ierr )
630  nrwork = ie + n
631 *
632 * Perform bidiagonal SVD, compute singular values only
633 * (CWorkspace: 0)
634 * (RWorkspace: need BDSPAN)
635 *
636  CALL dbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1, dum, 1,
637  $ dum, idum, rwork( nrwork ), iwork, info )
638 *
639  ELSE IF( wntqo ) THEN
640 *
641 * Path 2 (M much larger than N, JOBZ='O')
642 * N left singular vectors to be overwritten on A and
643 * N right singular vectors to be computed in VT
644 *
645  iu = 1
646 *
647 * WORK(IU) is N by N
648 *
649  ldwrku = n
650  ir = iu + ldwrku*n
651  IF( lwork.GE.m*n+n*n+3*n ) THEN
652 *
653 * WORK(IR) is M by N
654 *
655  ldwrkr = m
656  ELSE
657  ldwrkr = ( lwork-n*n-3*n ) / n
658  END IF
659  itau = ir + ldwrkr*n
660  nwork = itau + n
661 *
662 * Compute A=Q*R
663 * (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
664 * (RWorkspace: 0)
665 *
666  CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
667  $ lwork-nwork+1, ierr )
668 *
669 * Copy R to WORK( IR ), zeroing out below it
670 *
671  CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
672  CALL zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),
673  $ ldwrkr )
674 *
675 * Generate Q in A
676 * (CWorkspace: need 2*N, prefer N+N*NB)
677 * (RWorkspace: 0)
678 *
679  CALL zungqr( m, n, n, a, lda, work( itau ),
680  $ work( nwork ), lwork-nwork+1, ierr )
681  ie = 1
682  itauq = itau
683  itaup = itauq + n
684  nwork = itaup + n
685 *
686 * Bidiagonalize R in WORK(IR)
687 * (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
688 * (RWorkspace: need N)
689 *
690  CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
691  $ work( itauq ), work( itaup ), work( nwork ),
692  $ lwork-nwork+1, ierr )
693 *
694 * Perform bidiagonal SVD, computing left singular vectors
695 * of R in WORK(IRU) and computing right singular vectors
696 * of R in WORK(IRVT)
697 * (CWorkspace: need 0)
698 * (RWorkspace: need BDSPAC)
699 *
700  iru = ie + n
701  irvt = iru + n*n
702  nrwork = irvt + n*n
703  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
704  $ n, rwork( irvt ), n, dum, idum,
705  $ rwork( nrwork ), iwork, info )
706 *
707 * Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
708 * Overwrite WORK(IU) by the left singular vectors of R
709 * (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
710 * (RWorkspace: 0)
711 *
712  CALL zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),
713  $ ldwrku )
714  CALL zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
715  $ work( itauq ), work( iu ), ldwrku,
716  $ work( nwork ), lwork-nwork+1, ierr )
717 *
718 * Copy real matrix RWORK(IRVT) to complex matrix VT
719 * Overwrite VT by the right singular vectors of R
720 * (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
721 * (RWorkspace: 0)
722 *
723  CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
724  CALL zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,
725  $ work( itaup ), vt, ldvt, work( nwork ),
726  $ lwork-nwork+1, ierr )
727 *
728 * Multiply Q in A by left singular vectors of R in
729 * WORK(IU), storing result in WORK(IR) and copying to A
730 * (CWorkspace: need 2*N*N, prefer N*N+M*N)
731 * (RWorkspace: 0)
732 *
733  DO 10 i = 1, m, ldwrkr
734  chunk = min( m-i+1, ldwrkr )
735  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
736  $ lda, work( iu ), ldwrku, czero,
737  $ work( ir ), ldwrkr )
738  CALL zlacpy( 'F', chunk, n, work( ir ), ldwrkr,
739  $ a( i, 1 ), lda )
740  10 CONTINUE
741 *
742  ELSE IF( wntqs ) THEN
743 *
744 * Path 3 (M much larger than N, JOBZ='S')
745 * N left singular vectors to be computed in U and
746 * N right singular vectors to be computed in VT
747 *
748  ir = 1
749 *
750 * WORK(IR) is N by N
751 *
752  ldwrkr = n
753  itau = ir + ldwrkr*n
754  nwork = itau + n
755 *
756 * Compute A=Q*R
757 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
758 * (RWorkspace: 0)
759 *
760  CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
761  $ lwork-nwork+1, ierr )
762 *
763 * Copy R to WORK(IR), zeroing out below it
764 *
765  CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
766  CALL zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),
767  $ ldwrkr )
768 *
769 * Generate Q in A
770 * (CWorkspace: need 2*N, prefer N+N*NB)
771 * (RWorkspace: 0)
772 *
773  CALL zungqr( m, n, n, a, lda, work( itau ),
774  $ work( nwork ), lwork-nwork+1, ierr )
775  ie = 1
776  itauq = itau
777  itaup = itauq + n
778  nwork = itaup + n
779 *
780 * Bidiagonalize R in WORK(IR)
781 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
782 * (RWorkspace: need N)
783 *
784  CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
785  $ work( itauq ), work( itaup ), work( nwork ),
786  $ lwork-nwork+1, ierr )
787 *
788 * Perform bidiagonal SVD, computing left singular vectors
789 * of bidiagonal matrix in RWORK(IRU) and computing right
790 * singular vectors of bidiagonal matrix in RWORK(IRVT)
791 * (CWorkspace: need 0)
792 * (RWorkspace: need BDSPAC)
793 *
794  iru = ie + n
795  irvt = iru + n*n
796  nrwork = irvt + n*n
797  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
798  $ n, rwork( irvt ), n, dum, idum,
799  $ rwork( nrwork ), iwork, info )
800 *
801 * Copy real matrix RWORK(IRU) to complex matrix U
802 * Overwrite U by left singular vectors of R
803 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
804 * (RWorkspace: 0)
805 *
806  CALL zlacp2( 'F', n, n, rwork( iru ), n, u, ldu )
807  CALL zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
808  $ work( itauq ), u, ldu, work( nwork ),
809  $ lwork-nwork+1, ierr )
810 *
811 * Copy real matrix RWORK(IRVT) to complex matrix VT
812 * Overwrite VT by right singular vectors of R
813 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
814 * (RWorkspace: 0)
815 *
816  CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
817  CALL zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,
818  $ work( itaup ), vt, ldvt, work( nwork ),
819  $ lwork-nwork+1, ierr )
820 *
821 * Multiply Q in A by left singular vectors of R in
822 * WORK(IR), storing result in U
823 * (CWorkspace: need N*N)
824 * (RWorkspace: 0)
825 *
826  CALL zlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
827  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),
828  $ ldwrkr, czero, u, ldu )
829 *
830  ELSE IF( wntqa ) THEN
831 *
832 * Path 4 (M much larger than N, JOBZ='A')
833 * M left singular vectors to be computed in U and
834 * N right singular vectors to be computed in VT
835 *
836  iu = 1
837 *
838 * WORK(IU) is N by N
839 *
840  ldwrku = n
841  itau = iu + ldwrku*n
842  nwork = itau + n
843 *
844 * Compute A=Q*R, copying result to U
845 * (CWorkspace: need 2*N, prefer N+N*NB)
846 * (RWorkspace: 0)
847 *
848  CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
849  $ lwork-nwork+1, ierr )
850  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
851 *
852 * Generate Q in U
853 * (CWorkspace: need N+M, prefer N+M*NB)
854 * (RWorkspace: 0)
855 *
856  CALL zungqr( m, m, n, u, ldu, work( itau ),
857  $ work( nwork ), lwork-nwork+1, ierr )
858 *
859 * Produce R in A, zeroing out below it
860 *
861  CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
862  $ lda )
863  ie = 1
864  itauq = itau
865  itaup = itauq + n
866  nwork = itaup + n
867 *
868 * Bidiagonalize R in A
869 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
870 * (RWorkspace: need N)
871 *
872  CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
873  $ work( itaup ), work( nwork ), lwork-nwork+1,
874  $ ierr )
875  iru = ie + n
876  irvt = iru + n*n
877  nrwork = irvt + n*n
878 *
879 * Perform bidiagonal SVD, computing left singular vectors
880 * of bidiagonal matrix in RWORK(IRU) and computing right
881 * singular vectors of bidiagonal matrix in RWORK(IRVT)
882 * (CWorkspace: need 0)
883 * (RWorkspace: need BDSPAC)
884 *
885  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
886  $ n, rwork( irvt ), n, dum, idum,
887  $ rwork( nrwork ), iwork, info )
888 *
889 * Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
890 * Overwrite WORK(IU) by left singular vectors of R
891 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
892 * (RWorkspace: 0)
893 *
894  CALL zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),
895  $ ldwrku )
896  CALL zunmbr( 'Q', 'L', 'N', n, n, n, a, lda,
897  $ work( itauq ), work( iu ), ldwrku,
898  $ work( nwork ), lwork-nwork+1, ierr )
899 *
900 * Copy real matrix RWORK(IRVT) to complex matrix VT
901 * Overwrite VT by right singular vectors of R
902 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
903 * (RWorkspace: 0)
904 *
905  CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
906  CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
907  $ work( itaup ), vt, ldvt, work( nwork ),
908  $ lwork-nwork+1, ierr )
909 *
910 * Multiply Q in U by left singular vectors of R in
911 * WORK(IU), storing result in A
912 * (CWorkspace: need N*N)
913 * (RWorkspace: 0)
914 *
915  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),
916  $ ldwrku, czero, a, lda )
917 *
918 * Copy left singular vectors of A from A to U
919 *
920  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
921 *
922  END IF
923 *
924  ELSE IF( m.GE.mnthr2 ) THEN
925 *
926 * MNTHR2 <= M < MNTHR1
927 *
928 * Path 5 (M much larger than N, but not as much as MNTHR1)
929 * Reduce to bidiagonal form without QR decomposition, use
930 * ZUNGBR and matrix multiplication to compute singular vectors
931 *
932  ie = 1
933  nrwork = ie + n
934  itauq = 1
935  itaup = itauq + n
936  nwork = itaup + n
937 *
938 * Bidiagonalize A
939 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
940 * (RWorkspace: need N)
941 *
942  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
943  $ work( itaup ), work( nwork ), lwork-nwork+1,
944  $ ierr )
945  IF( wntqn ) THEN
946 *
947 * Compute singular values only
948 * (Cworkspace: 0)
949 * (Rworkspace: need BDSPAN)
950 *
951  CALL dbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1, dum, 1,
952  $ dum, idum, rwork( nrwork ), iwork, info )
953  ELSE IF( wntqo ) THEN
954  iu = nwork
955  iru = nrwork
956  irvt = iru + n*n
957  nrwork = irvt + n*n
958 *
959 * Copy A to VT, generate P**H
960 * (Cworkspace: need 2*N, prefer N+N*NB)
961 * (Rworkspace: 0)
962 *
963  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
964  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
965  $ work( nwork ), lwork-nwork+1, ierr )
966 *
967 * Generate Q in A
968 * (CWorkspace: need 2*N, prefer N+N*NB)
969 * (RWorkspace: 0)
970 *
971  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
972  $ work( nwork ), lwork-nwork+1, ierr )
973 *
974  IF( lwork.GE.m*n+3*n ) THEN
975 *
976 * WORK( IU ) is M by N
977 *
978  ldwrku = m
979  ELSE
980 *
981 * WORK(IU) is LDWRKU by N
982 *
983  ldwrku = ( lwork-3*n ) / n
984  END IF
985  nwork = iu + ldwrku*n
986 *
987 * Perform bidiagonal SVD, computing left singular vectors
988 * of bidiagonal matrix in RWORK(IRU) and computing right
989 * singular vectors of bidiagonal matrix in RWORK(IRVT)
990 * (CWorkspace: need 0)
991 * (RWorkspace: need BDSPAC)
992 *
993  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
994  $ n, rwork( irvt ), n, dum, idum,
995  $ rwork( nrwork ), iwork, info )
996 *
997 * Multiply real matrix RWORK(IRVT) by P**H in VT,
998 * storing the result in WORK(IU), copying to VT
999 * (Cworkspace: need 0)
1000 * (Rworkspace: need 3*N*N)
1001 *
1002  CALL zlarcm( n, n, rwork( irvt ), n, vt, ldvt,
1003  $ work( iu ), ldwrku, rwork( nrwork ) )
1004  CALL zlacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt )
1005 *
1006 * Multiply Q in A by real matrix RWORK(IRU), storing the
1007 * result in WORK(IU), copying to A
1008 * (CWorkspace: need N*N, prefer M*N)
1009 * (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
1010 *
1011  nrwork = irvt
1012  DO 20 i = 1, m, ldwrku
1013  chunk = min( m-i+1, ldwrku )
1014  CALL zlacrm( chunk, n, a( i, 1 ), lda, rwork( iru ),
1015  $ n, work( iu ), ldwrku, rwork( nrwork ) )
1016  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
1017  $ a( i, 1 ), lda )
1018  20 CONTINUE
1019 *
1020  ELSE IF( wntqs ) THEN
1021 *
1022 * Copy A to VT, generate P**H
1023 * (Cworkspace: need 2*N, prefer N+N*NB)
1024 * (Rworkspace: 0)
1025 *
1026  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1027  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1028  $ work( nwork ), lwork-nwork+1, ierr )
1029 *
1030 * Copy A to U, generate Q
1031 * (Cworkspace: need 2*N, prefer N+N*NB)
1032 * (Rworkspace: 0)
1033 *
1034  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1035  CALL zungbr( 'Q', m, n, n, u, ldu, work( itauq ),
1036  $ work( nwork ), lwork-nwork+1, ierr )
1037 *
1038 * Perform bidiagonal SVD, computing left singular vectors
1039 * of bidiagonal matrix in RWORK(IRU) and computing right
1040 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1041 * (CWorkspace: need 0)
1042 * (RWorkspace: need BDSPAC)
1043 *
1044  iru = nrwork
1045  irvt = iru + n*n
1046  nrwork = irvt + n*n
1047  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1048  $ n, rwork( irvt ), n, dum, idum,
1049  $ rwork( nrwork ), iwork, info )
1050 *
1051 * Multiply real matrix RWORK(IRVT) by P**H in VT,
1052 * storing the result in A, copying to VT
1053 * (Cworkspace: need 0)
1054 * (Rworkspace: need 3*N*N)
1055 *
1056  CALL zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,
1057  $ rwork( nrwork ) )
1058  CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
1059 *
1060 * Multiply Q in U by real matrix RWORK(IRU), storing the
1061 * result in A, copying to U
1062 * (CWorkspace: need 0)
1063 * (Rworkspace: need N*N+2*M*N)
1064 *
1065  nrwork = irvt
1066  CALL zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,
1067  $ rwork( nrwork ) )
1068  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1069  ELSE
1070 *
1071 * Copy A to VT, generate P**H
1072 * (Cworkspace: need 2*N, prefer N+N*NB)
1073 * (Rworkspace: 0)
1074 *
1075  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1076  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1077  $ work( nwork ), lwork-nwork+1, ierr )
1078 *
1079 * Copy A to U, generate Q
1080 * (Cworkspace: need 2*N, prefer N+N*NB)
1081 * (Rworkspace: 0)
1082 *
1083  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1084  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1085  $ work( nwork ), lwork-nwork+1, ierr )
1086 *
1087 * Perform bidiagonal SVD, computing left singular vectors
1088 * of bidiagonal matrix in RWORK(IRU) and computing right
1089 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1090 * (CWorkspace: need 0)
1091 * (RWorkspace: need BDSPAC)
1092 *
1093  iru = nrwork
1094  irvt = iru + n*n
1095  nrwork = irvt + n*n
1096  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1097  $ n, rwork( irvt ), n, dum, idum,
1098  $ rwork( nrwork ), iwork, info )
1099 *
1100 * Multiply real matrix RWORK(IRVT) by P**H in VT,
1101 * storing the result in A, copying to VT
1102 * (Cworkspace: need 0)
1103 * (Rworkspace: need 3*N*N)
1104 *
1105  CALL zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,
1106  $ rwork( nrwork ) )
1107  CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
1108 *
1109 * Multiply Q in U by real matrix RWORK(IRU), storing the
1110 * result in A, copying to U
1111 * (CWorkspace: 0)
1112 * (Rworkspace: need 3*N*N)
1113 *
1114  nrwork = irvt
1115  CALL zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,
1116  $ rwork( nrwork ) )
1117  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1118  END IF
1119 *
1120  ELSE
1121 *
1122 * M .LT. MNTHR2
1123 *
1124 * Path 6 (M at least N, but not much larger)
1125 * Reduce to bidiagonal form without QR decomposition
1126 * Use ZUNMBR to compute singular vectors
1127 *
1128  ie = 1
1129  nrwork = ie + n
1130  itauq = 1
1131  itaup = itauq + n
1132  nwork = itaup + n
1133 *
1134 * Bidiagonalize A
1135 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
1136 * (RWorkspace: need N)
1137 *
1138  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1139  $ work( itaup ), work( nwork ), lwork-nwork+1,
1140  $ ierr )
1141  IF( wntqn ) THEN
1142 *
1143 * Compute singular values only
1144 * (Cworkspace: 0)
1145 * (Rworkspace: need BDSPAN)
1146 *
1147  CALL dbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1, dum, 1,
1148  $ dum, idum, rwork( nrwork ), iwork, info )
1149  ELSE IF( wntqo ) THEN
1150  iu = nwork
1151  iru = nrwork
1152  irvt = iru + n*n
1153  nrwork = irvt + n*n
1154  IF( lwork.GE.m*n+3*n ) THEN
1155 *
1156 * WORK( IU ) is M by N
1157 *
1158  ldwrku = m
1159  ELSE
1160 *
1161 * WORK( IU ) is LDWRKU by N
1162 *
1163  ldwrku = ( lwork-3*n ) / n
1164  END IF
1165  nwork = iu + ldwrku*n
1166 *
1167 * Perform bidiagonal SVD, computing left singular vectors
1168 * of bidiagonal matrix in RWORK(IRU) and computing right
1169 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1170 * (CWorkspace: need 0)
1171 * (RWorkspace: need BDSPAC)
1172 *
1173  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1174  $ n, rwork( irvt ), n, dum, idum,
1175  $ rwork( nrwork ), iwork, info )
1176 *
1177 * Copy real matrix RWORK(IRVT) to complex matrix VT
1178 * Overwrite VT by right singular vectors of A
1179 * (Cworkspace: need 2*N, prefer N+N*NB)
1180 * (Rworkspace: need 0)
1181 *
1182  CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1183  CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1184  $ work( itaup ), vt, ldvt, work( nwork ),
1185  $ lwork-nwork+1, ierr )
1186 *
1187  IF( lwork.GE.m*n+3*n ) THEN
1188 *
1189 * Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
1190 * Overwrite WORK(IU) by left singular vectors of A, copying
1191 * to A
1192 * (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
1193 * (Rworkspace: need 0)
1194 *
1195  CALL zlaset( 'F', m, n, czero, czero, work( iu ),
1196  $ ldwrku )
1197  CALL zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),
1198  $ ldwrku )
1199  CALL zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,
1200  $ work( itauq ), work( iu ), ldwrku,
1201  $ work( nwork ), lwork-nwork+1, ierr )
1202  CALL zlacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
1203  ELSE
1204 *
1205 * Generate Q in A
1206 * (Cworkspace: need 2*N, prefer N+N*NB)
1207 * (Rworkspace: need 0)
1208 *
1209  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
1210  $ work( nwork ), lwork-nwork+1, ierr )
1211 *
1212 * Multiply Q in A by real matrix RWORK(IRU), storing the
1213 * result in WORK(IU), copying to A
1214 * (CWorkspace: need N*N, prefer M*N)
1215 * (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
1216 *
1217  nrwork = irvt
1218  DO 30 i = 1, m, ldwrku
1219  chunk = min( m-i+1, ldwrku )
1220  CALL zlacrm( chunk, n, a( i, 1 ), lda,
1221  $ rwork( iru ), n, work( iu ), ldwrku,
1222  $ rwork( nrwork ) )
1223  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
1224  $ a( i, 1 ), lda )
1225  30 CONTINUE
1226  END IF
1227 *
1228  ELSE IF( wntqs ) THEN
1229 *
1230 * Perform bidiagonal SVD, computing left singular vectors
1231 * of bidiagonal matrix in RWORK(IRU) and computing right
1232 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1233 * (CWorkspace: need 0)
1234 * (RWorkspace: need BDSPAC)
1235 *
1236  iru = nrwork
1237  irvt = iru + n*n
1238  nrwork = irvt + n*n
1239  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1240  $ n, rwork( irvt ), n, dum, idum,
1241  $ rwork( nrwork ), iwork, info )
1242 *
1243 * Copy real matrix RWORK(IRU) to complex matrix U
1244 * Overwrite U by left singular vectors of A
1245 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
1246 * (RWorkspace: 0)
1247 *
1248  CALL zlaset( 'F', m, n, czero, czero, u, ldu )
1249  CALL zlacp2( 'F', n, n, rwork( iru ), n, u, ldu )
1250  CALL zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,
1251  $ work( itauq ), u, ldu, work( nwork ),
1252  $ lwork-nwork+1, ierr )
1253 *
1254 * Copy real matrix RWORK(IRVT) to complex matrix VT
1255 * Overwrite VT by right singular vectors of A
1256 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
1257 * (RWorkspace: 0)
1258 *
1259  CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1260  CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1261  $ work( itaup ), vt, ldvt, work( nwork ),
1262  $ lwork-nwork+1, ierr )
1263  ELSE
1264 *
1265 * Perform bidiagonal SVD, computing left singular vectors
1266 * of bidiagonal matrix in RWORK(IRU) and computing right
1267 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1268 * (CWorkspace: need 0)
1269 * (RWorkspace: need BDSPAC)
1270 *
1271  iru = nrwork
1272  irvt = iru + n*n
1273  nrwork = irvt + n*n
1274  CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1275  $ n, rwork( irvt ), n, dum, idum,
1276  $ rwork( nrwork ), iwork, info )
1277 *
1278 * Set the right corner of U to identity matrix
1279 *
1280  CALL zlaset( 'F', m, m, czero, czero, u, ldu )
1281  IF( m.GT.n ) THEN
1282  CALL zlaset( 'F', m-n, m-n, czero, cone,
1283  $ u( n+1, n+1 ), ldu )
1284  END IF
1285 *
1286 * Copy real matrix RWORK(IRU) to complex matrix U
1287 * Overwrite U by left singular vectors of A
1288 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1289 * (RWorkspace: 0)
1290 *
1291  CALL zlacp2( 'F', n, n, rwork( iru ), n, u, ldu )
1292  CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
1293  $ work( itauq ), u, ldu, work( nwork ),
1294  $ lwork-nwork+1, ierr )
1295 *
1296 * Copy real matrix RWORK(IRVT) to complex matrix VT
1297 * Overwrite VT by right singular vectors of A
1298 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
1299 * (RWorkspace: 0)
1300 *
1301  CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1302  CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1303  $ work( itaup ), vt, ldvt, work( nwork ),
1304  $ lwork-nwork+1, ierr )
1305  END IF
1306 *
1307  END IF
1308 *
1309  ELSE
1310 *
1311 * A has more columns than rows. If A has sufficiently more
1312 * columns than rows, first reduce using the LQ decomposition (if
1313 * sufficient workspace available)
1314 *
1315  IF( n.GE.mnthr1 ) THEN
1316 *
1317  IF( wntqn ) THEN
1318 *
1319 * Path 1t (N much larger than M, JOBZ='N')
1320 * No singular vectors to be computed
1321 *
1322  itau = 1
1323  nwork = itau + m
1324 *
1325 * Compute A=L*Q
1326 * (CWorkspace: need 2*M, prefer M+M*NB)
1327 * (RWorkspace: 0)
1328 *
1329  CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1330  $ lwork-nwork+1, ierr )
1331 *
1332 * Zero out above L
1333 *
1334  CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
1335  $ lda )
1336  ie = 1
1337  itauq = 1
1338  itaup = itauq + m
1339  nwork = itaup + m
1340 *
1341 * Bidiagonalize L in A
1342 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
1343 * (RWorkspace: need M)
1344 *
1345  CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
1346  $ work( itaup ), work( nwork ), lwork-nwork+1,
1347  $ ierr )
1348  nrwork = ie + m
1349 *
1350 * Perform bidiagonal SVD, compute singular values only
1351 * (CWorkspace: 0)
1352 * (RWorkspace: need BDSPAN)
1353 *
1354  CALL dbdsdc( 'U', 'N', m, s, rwork( ie ), dum, 1, dum, 1,
1355  $ dum, idum, rwork( nrwork ), iwork, info )
1356 *
1357  ELSE IF( wntqo ) THEN
1358 *
1359 * Path 2t (N much larger than M, JOBZ='O')
1360 * M right singular vectors to be overwritten on A and
1361 * M left singular vectors to be computed in U
1362 *
1363  ivt = 1
1364  ldwkvt = m
1365 *
1366 * WORK(IVT) is M by M
1367 *
1368  il = ivt + ldwkvt*m
1369  IF( lwork.GE.m*n+m*m+3*m ) THEN
1370 *
1371 * WORK(IL) M by N
1372 *
1373  ldwrkl = m
1374  chunk = n
1375  ELSE
1376 *
1377 * WORK(IL) is M by CHUNK
1378 *
1379  ldwrkl = m
1380  chunk = ( lwork-m*m-3*m ) / m
1381  END IF
1382  itau = il + ldwrkl*chunk
1383  nwork = itau + m
1384 *
1385 * Compute A=L*Q
1386 * (CWorkspace: need 2*M, prefer M+M*NB)
1387 * (RWorkspace: 0)
1388 *
1389  CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1390  $ lwork-nwork+1, ierr )
1391 *
1392 * Copy L to WORK(IL), zeroing about above it
1393 *
1394  CALL zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1395  CALL zlaset( 'U', m-1, m-1, czero, czero,
1396  $ work( il+ldwrkl ), ldwrkl )
1397 *
1398 * Generate Q in A
1399 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
1400 * (RWorkspace: 0)
1401 *
1402  CALL zunglq( m, n, m, a, lda, work( itau ),
1403  $ work( nwork ), lwork-nwork+1, ierr )
1404  ie = 1
1405  itauq = itau
1406  itaup = itauq + m
1407  nwork = itaup + m
1408 *
1409 * Bidiagonalize L in WORK(IL)
1410 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
1411 * (RWorkspace: need M)
1412 *
1413  CALL zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),
1414  $ work( itauq ), work( itaup ), work( nwork ),
1415  $ lwork-nwork+1, ierr )
1416 *
1417 * Perform bidiagonal SVD, computing left singular vectors
1418 * of bidiagonal matrix in RWORK(IRU) and computing right
1419 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1420 * (CWorkspace: need 0)
1421 * (RWorkspace: need BDSPAC)
1422 *
1423  iru = ie + m
1424  irvt = iru + m*m
1425  nrwork = irvt + m*m
1426  CALL dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1427  $ m, rwork( irvt ), m, dum, idum,
1428  $ rwork( nrwork ), iwork, info )
1429 *
1430 * Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
1431 * Overwrite WORK(IU) by the left singular vectors of L
1432 * (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
1433 * (RWorkspace: 0)
1434 *
1435  CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1436  CALL zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1437  $ work( itauq ), u, ldu, work( nwork ),
1438  $ lwork-nwork+1, ierr )
1439 *
1440 * Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1441 * Overwrite WORK(IVT) by the right singular vectors of L
1442 * (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
1443 * (RWorkspace: 0)
1444 *
1445  CALL zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1446  $ ldwkvt )
1447  CALL zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,
1448  $ work( itaup ), work( ivt ), ldwkvt,
1449  $ work( nwork ), lwork-nwork+1, ierr )
1450 *
1451 * Multiply right singular vectors of L in WORK(IL) by Q
1452 * in A, storing result in WORK(IL) and copying to A
1453 * (CWorkspace: need 2*M*M, prefer M*M+M*N))
1454 * (RWorkspace: 0)
1455 *
1456  DO 40 i = 1, n, chunk
1457  blk = min( n-i+1, chunk )
1458  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,
1459  $ a( 1, i ), lda, czero, work( il ),
1460  $ ldwrkl )
1461  CALL zlacpy( 'F', m, blk, work( il ), ldwrkl,
1462  $ a( 1, i ), lda )
1463  40 CONTINUE
1464 *
1465  ELSE IF( wntqs ) THEN
1466 *
1467 * Path 3t (N much larger than M, JOBZ='S')
1468 * M right singular vectors to be computed in VT and
1469 * M left singular vectors to be computed in U
1470 *
1471  il = 1
1472 *
1473 * WORK(IL) is M by M
1474 *
1475  ldwrkl = m
1476  itau = il + ldwrkl*m
1477  nwork = itau + m
1478 *
1479 * Compute A=L*Q
1480 * (CWorkspace: need 2*M, prefer M+M*NB)
1481 * (RWorkspace: 0)
1482 *
1483  CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1484  $ lwork-nwork+1, ierr )
1485 *
1486 * Copy L to WORK(IL), zeroing out above it
1487 *
1488  CALL zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1489  CALL zlaset( 'U', m-1, m-1, czero, czero,
1490  $ work( il+ldwrkl ), ldwrkl )
1491 *
1492 * Generate Q in A
1493 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
1494 * (RWorkspace: 0)
1495 *
1496  CALL zunglq( m, n, m, a, lda, work( itau ),
1497  $ work( nwork ), lwork-nwork+1, ierr )
1498  ie = 1
1499  itauq = itau
1500  itaup = itauq + m
1501  nwork = itaup + m
1502 *
1503 * Bidiagonalize L in WORK(IL)
1504 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
1505 * (RWorkspace: need M)
1506 *
1507  CALL zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),
1508  $ work( itauq ), work( itaup ), work( nwork ),
1509  $ lwork-nwork+1, ierr )
1510 *
1511 * Perform bidiagonal SVD, computing left singular vectors
1512 * of bidiagonal matrix in RWORK(IRU) and computing right
1513 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1514 * (CWorkspace: need 0)
1515 * (RWorkspace: need BDSPAC)
1516 *
1517  iru = ie + m
1518  irvt = iru + m*m
1519  nrwork = irvt + m*m
1520  CALL dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1521  $ m, rwork( irvt ), m, dum, idum,
1522  $ rwork( nrwork ), iwork, info )
1523 *
1524 * Copy real matrix RWORK(IRU) to complex matrix U
1525 * Overwrite U by left singular vectors of L
1526 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
1527 * (RWorkspace: 0)
1528 *
1529  CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1530  CALL zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1531  $ work( itauq ), u, ldu, work( nwork ),
1532  $ lwork-nwork+1, ierr )
1533 *
1534 * Copy real matrix RWORK(IRVT) to complex matrix VT
1535 * Overwrite VT by left singular vectors of L
1536 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
1537 * (RWorkspace: 0)
1538 *
1539  CALL zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
1540  CALL zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,
1541  $ work( itaup ), vt, ldvt, work( nwork ),
1542  $ lwork-nwork+1, ierr )
1543 *
1544 * Copy VT to WORK(IL), multiply right singular vectors of L
1545 * in WORK(IL) by Q in A, storing result in VT
1546 * (CWorkspace: need M*M)
1547 * (RWorkspace: 0)
1548 *
1549  CALL zlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1550  CALL zgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,
1551  $ a, lda, czero, vt, ldvt )
1552 *
1553  ELSE IF( wntqa ) THEN
1554 *
1555 * Path 9t (N much larger than M, JOBZ='A')
1556 * N right singular vectors to be computed in VT and
1557 * M left singular vectors to be computed in U
1558 *
1559  ivt = 1
1560 *
1561 * WORK(IVT) is M by M
1562 *
1563  ldwkvt = m
1564  itau = ivt + ldwkvt*m
1565  nwork = itau + m
1566 *
1567 * Compute A=L*Q, copying result to VT
1568 * (CWorkspace: need 2*M, prefer M+M*NB)
1569 * (RWorkspace: 0)
1570 *
1571  CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1572  $ lwork-nwork+1, ierr )
1573  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
1574 *
1575 * Generate Q in VT
1576 * (CWorkspace: need M+N, prefer M+N*NB)
1577 * (RWorkspace: 0)
1578 *
1579  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
1580  $ work( nwork ), lwork-nwork+1, ierr )
1581 *
1582 * Produce L in A, zeroing out above it
1583 *
1584  CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
1585  $ lda )
1586  ie = 1
1587  itauq = itau
1588  itaup = itauq + m
1589  nwork = itaup + m
1590 *
1591 * Bidiagonalize L in A
1592 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
1593 * (RWorkspace: need M)
1594 *
1595  CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
1596  $ work( itaup ), work( nwork ), lwork-nwork+1,
1597  $ ierr )
1598 *
1599 * Perform bidiagonal SVD, computing left singular vectors
1600 * of bidiagonal matrix in RWORK(IRU) and computing right
1601 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1602 * (CWorkspace: need 0)
1603 * (RWorkspace: need BDSPAC)
1604 *
1605  iru = ie + m
1606  irvt = iru + m*m
1607  nrwork = irvt + m*m
1608  CALL dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1609  $ m, rwork( irvt ), m, dum, idum,
1610  $ rwork( nrwork ), iwork, info )
1611 *
1612 * Copy real matrix RWORK(IRU) to complex matrix U
1613 * Overwrite U by left singular vectors of L
1614 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
1615 * (RWorkspace: 0)
1616 *
1617  CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1618  CALL zunmbr( 'Q', 'L', 'N', m, m, m, a, lda,
1619  $ work( itauq ), u, ldu, work( nwork ),
1620  $ lwork-nwork+1, ierr )
1621 *
1622 * Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1623 * Overwrite WORK(IVT) by right singular vectors of L
1624 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
1625 * (RWorkspace: 0)
1626 *
1627  CALL zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1628  $ ldwkvt )
1629  CALL zunmbr( 'P', 'R', 'C', m, m, m, a, lda,
1630  $ work( itaup ), work( ivt ), ldwkvt,
1631  $ work( nwork ), lwork-nwork+1, ierr )
1632 *
1633 * Multiply right singular vectors of L in WORK(IVT) by
1634 * Q in VT, storing result in A
1635 * (CWorkspace: need M*M)
1636 * (RWorkspace: 0)
1637 *
1638  CALL zgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,
1639  $ vt, ldvt, czero, a, lda )
1640 *
1641 * Copy right singular vectors of A from A to VT
1642 *
1643  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
1644 *
1645  END IF
1646 *
1647  ELSE IF( n.GE.mnthr2 ) THEN
1648 *
1649 * MNTHR2 <= N < MNTHR1
1650 *
1651 * Path 5t (N much larger than M, but not as much as MNTHR1)
1652 * Reduce to bidiagonal form without QR decomposition, use
1653 * ZUNGBR and matrix multiplication to compute singular vectors
1654 *
1655 *
1656  ie = 1
1657  nrwork = ie + m
1658  itauq = 1
1659  itaup = itauq + m
1660  nwork = itaup + m
1661 *
1662 * Bidiagonalize A
1663 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
1664 * (RWorkspace: M)
1665 *
1666  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1667  $ work( itaup ), work( nwork ), lwork-nwork+1,
1668  $ ierr )
1669 *
1670  IF( wntqn ) THEN
1671 *
1672 * Compute singular values only
1673 * (Cworkspace: 0)
1674 * (Rworkspace: need BDSPAN)
1675 *
1676  CALL dbdsdc( 'L', 'N', m, s, rwork( ie ), dum, 1, dum, 1,
1677  $ dum, idum, rwork( nrwork ), iwork, info )
1678  ELSE IF( wntqo ) THEN
1679  irvt = nrwork
1680  iru = irvt + m*m
1681  nrwork = iru + m*m
1682  ivt = nwork
1683 *
1684 * Copy A to U, generate Q
1685 * (Cworkspace: need 2*M, prefer M+M*NB)
1686 * (Rworkspace: 0)
1687 *
1688  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
1689  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1690  $ work( nwork ), lwork-nwork+1, ierr )
1691 *
1692 * Generate P**H in A
1693 * (Cworkspace: need 2*M, prefer M+M*NB)
1694 * (Rworkspace: 0)
1695 *
1696  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
1697  $ work( nwork ), lwork-nwork+1, ierr )
1698 *
1699  ldwkvt = m
1700  IF( lwork.GE.m*n+3*m ) THEN
1701 *
1702 * WORK( IVT ) is M by N
1703 *
1704  nwork = ivt + ldwkvt*n
1705  chunk = n
1706  ELSE
1707 *
1708 * WORK( IVT ) is M by CHUNK
1709 *
1710  chunk = ( lwork-3*m ) / m
1711  nwork = ivt + ldwkvt*chunk
1712  END IF
1713 *
1714 * Perform bidiagonal SVD, computing left singular vectors
1715 * of bidiagonal matrix in RWORK(IRU) and computing right
1716 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1717 * (CWorkspace: need 0)
1718 * (RWorkspace: need BDSPAC)
1719 *
1720  CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1721  $ m, rwork( irvt ), m, dum, idum,
1722  $ rwork( nrwork ), iwork, info )
1723 *
1724 * Multiply Q in U by real matrix RWORK(IRVT)
1725 * storing the result in WORK(IVT), copying to U
1726 * (Cworkspace: need 0)
1727 * (Rworkspace: need 2*M*M)
1728 *
1729  CALL zlacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),
1730  $ ldwkvt, rwork( nrwork ) )
1731  CALL zlacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu )
1732 *
1733 * Multiply RWORK(IRVT) by P**H in A, storing the
1734 * result in WORK(IVT), copying to A
1735 * (CWorkspace: need M*M, prefer M*N)
1736 * (Rworkspace: need 2*M*M, prefer 2*M*N)
1737 *
1738  nrwork = iru
1739  DO 50 i = 1, n, chunk
1740  blk = min( n-i+1, chunk )
1741  CALL zlarcm( m, blk, rwork( irvt ), m, a( 1, i ), lda,
1742  $ work( ivt ), ldwkvt, rwork( nrwork ) )
1743  CALL zlacpy( 'F', m, blk, work( ivt ), ldwkvt,
1744  $ a( 1, i ), lda )
1745  50 CONTINUE
1746  ELSE IF( wntqs ) THEN
1747 *
1748 * Copy A to U, generate Q
1749 * (Cworkspace: need 2*M, prefer M+M*NB)
1750 * (Rworkspace: 0)
1751 *
1752  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
1753  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1754  $ work( nwork ), lwork-nwork+1, ierr )
1755 *
1756 * Copy A to VT, generate P**H
1757 * (Cworkspace: need 2*M, prefer M+M*NB)
1758 * (Rworkspace: 0)
1759 *
1760  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
1761  CALL zungbr( 'P', m, n, m, vt, ldvt, work( itaup ),
1762  $ work( nwork ), lwork-nwork+1, ierr )
1763 *
1764 * Perform bidiagonal SVD, computing left singular vectors
1765 * of bidiagonal matrix in RWORK(IRU) and computing right
1766 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1767 * (CWorkspace: need 0)
1768 * (RWorkspace: need BDSPAC)
1769 *
1770  irvt = nrwork
1771  iru = irvt + m*m
1772  nrwork = iru + m*m
1773  CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1774  $ m, rwork( irvt ), m, dum, idum,
1775  $ rwork( nrwork ), iwork, info )
1776 *
1777 * Multiply Q in U by real matrix RWORK(IRU), storing the
1778 * result in A, copying to U
1779 * (CWorkspace: need 0)
1780 * (Rworkspace: need 3*M*M)
1781 *
1782  CALL zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,
1783  $ rwork( nrwork ) )
1784  CALL zlacpy( 'F', m, m, a, lda, u, ldu )
1785 *
1786 * Multiply real matrix RWORK(IRVT) by P**H in VT,
1787 * storing the result in A, copying to VT
1788 * (Cworkspace: need 0)
1789 * (Rworkspace: need M*M+2*M*N)
1790 *
1791  nrwork = iru
1792  CALL zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,
1793  $ rwork( nrwork ) )
1794  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
1795  ELSE
1796 *
1797 * Copy A to U, generate Q
1798 * (Cworkspace: need 2*M, prefer M+M*NB)
1799 * (Rworkspace: 0)
1800 *
1801  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
1802  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1803  $ work( nwork ), lwork-nwork+1, ierr )
1804 *
1805 * Copy A to VT, generate P**H
1806 * (Cworkspace: need 2*M, prefer M+M*NB)
1807 * (Rworkspace: 0)
1808 *
1809  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
1810  CALL zungbr( 'P', n, n, m, vt, ldvt, work( itaup ),
1811  $ work( nwork ), lwork-nwork+1, ierr )
1812 *
1813 * Perform bidiagonal SVD, computing left singular vectors
1814 * of bidiagonal matrix in RWORK(IRU) and computing right
1815 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1816 * (CWorkspace: need 0)
1817 * (RWorkspace: need BDSPAC)
1818 *
1819  irvt = nrwork
1820  iru = irvt + m*m
1821  nrwork = iru + m*m
1822  CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1823  $ m, rwork( irvt ), m, dum, idum,
1824  $ rwork( nrwork ), iwork, info )
1825 *
1826 * Multiply Q in U by real matrix RWORK(IRU), storing the
1827 * result in A, copying to U
1828 * (CWorkspace: need 0)
1829 * (Rworkspace: need 3*M*M)
1830 *
1831  CALL zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,
1832  $ rwork( nrwork ) )
1833  CALL zlacpy( 'F', m, m, a, lda, u, ldu )
1834 *
1835 * Multiply real matrix RWORK(IRVT) by P**H in VT,
1836 * storing the result in A, copying to VT
1837 * (Cworkspace: need 0)
1838 * (Rworkspace: need M*M+2*M*N)
1839 *
1840  CALL zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,
1841  $ rwork( nrwork ) )
1842  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
1843  END IF
1844 *
1845  ELSE
1846 *
1847 * N .LT. MNTHR2
1848 *
1849 * Path 6t (N greater than M, but not much larger)
1850 * Reduce to bidiagonal form without LQ decomposition
1851 * Use ZUNMBR to compute singular vectors
1852 *
1853  ie = 1
1854  nrwork = ie + m
1855  itauq = 1
1856  itaup = itauq + m
1857  nwork = itaup + m
1858 *
1859 * Bidiagonalize A
1860 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
1861 * (RWorkspace: M)
1862 *
1863  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1864  $ work( itaup ), work( nwork ), lwork-nwork+1,
1865  $ ierr )
1866  IF( wntqn ) THEN
1867 *
1868 * Compute singular values only
1869 * (Cworkspace: 0)
1870 * (Rworkspace: need BDSPAN)
1871 *
1872  CALL dbdsdc( 'L', 'N', m, s, rwork( ie ), dum, 1, dum, 1,
1873  $ dum, idum, rwork( nrwork ), iwork, info )
1874  ELSE IF( wntqo ) THEN
1875  ldwkvt = m
1876  ivt = nwork
1877  IF( lwork.GE.m*n+3*m ) THEN
1878 *
1879 * WORK( IVT ) is M by N
1880 *
1881  CALL zlaset( 'F', m, n, czero, czero, work( ivt ),
1882  $ ldwkvt )
1883  nwork = ivt + ldwkvt*n
1884  ELSE
1885 *
1886 * WORK( IVT ) is M by CHUNK
1887 *
1888  chunk = ( lwork-3*m ) / m
1889  nwork = ivt + ldwkvt*chunk
1890  END IF
1891 *
1892 * Perform bidiagonal SVD, computing left singular vectors
1893 * of bidiagonal matrix in RWORK(IRU) and computing right
1894 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1895 * (CWorkspace: need 0)
1896 * (RWorkspace: need BDSPAC)
1897 *
1898  irvt = nrwork
1899  iru = irvt + m*m
1900  nrwork = iru + m*m
1901  CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1902  $ m, rwork( irvt ), m, dum, idum,
1903  $ rwork( nrwork ), iwork, info )
1904 *
1905 * Copy real matrix RWORK(IRU) to complex matrix U
1906 * Overwrite U by left singular vectors of A
1907 * (Cworkspace: need 2*M, prefer M+M*NB)
1908 * (Rworkspace: need 0)
1909 *
1910  CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1911  CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
1912  $ work( itauq ), u, ldu, work( nwork ),
1913  $ lwork-nwork+1, ierr )
1914 *
1915  IF( lwork.GE.m*n+3*m ) THEN
1916 *
1917 * Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1918 * Overwrite WORK(IVT) by right singular vectors of A,
1919 * copying to A
1920 * (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
1921 * (Rworkspace: need 0)
1922 *
1923  CALL zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1924  $ ldwkvt )
1925  CALL zunmbr( 'P', 'R', 'C', m, n, m, a, lda,
1926  $ work( itaup ), work( ivt ), ldwkvt,
1927  $ work( nwork ), lwork-nwork+1, ierr )
1928  CALL zlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1929  ELSE
1930 *
1931 * Generate P**H in A
1932 * (Cworkspace: need 2*M, prefer M+M*NB)
1933 * (Rworkspace: need 0)
1934 *
1935  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
1936  $ work( nwork ), lwork-nwork+1, ierr )
1937 *
1938 * Multiply Q in A by real matrix RWORK(IRU), storing the
1939 * result in WORK(IU), copying to A
1940 * (CWorkspace: need M*M, prefer M*N)
1941 * (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
1942 *
1943  nrwork = iru
1944  DO 60 i = 1, n, chunk
1945  blk = min( n-i+1, chunk )
1946  CALL zlarcm( m, blk, rwork( irvt ), m, a( 1, i ),
1947  $ lda, work( ivt ), ldwkvt,
1948  $ rwork( nrwork ) )
1949  CALL zlacpy( 'F', m, blk, work( ivt ), ldwkvt,
1950  $ a( 1, i ), lda )
1951  60 CONTINUE
1952  END IF
1953  ELSE IF( wntqs ) THEN
1954 *
1955 * Perform bidiagonal SVD, computing left singular vectors
1956 * of bidiagonal matrix in RWORK(IRU) and computing right
1957 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1958 * (CWorkspace: need 0)
1959 * (RWorkspace: need BDSPAC)
1960 *
1961  irvt = nrwork
1962  iru = irvt + m*m
1963  nrwork = iru + m*m
1964  CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1965  $ m, rwork( irvt ), m, dum, idum,
1966  $ rwork( nrwork ), iwork, info )
1967 *
1968 * Copy real matrix RWORK(IRU) to complex matrix U
1969 * Overwrite U by left singular vectors of A
1970 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
1971 * (RWorkspace: M*M)
1972 *
1973  CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1974  CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
1975  $ work( itauq ), u, ldu, work( nwork ),
1976  $ lwork-nwork+1, ierr )
1977 *
1978 * Copy real matrix RWORK(IRVT) to complex matrix VT
1979 * Overwrite VT by right singular vectors of A
1980 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
1981 * (RWorkspace: M*M)
1982 *
1983  CALL zlaset( 'F', m, n, czero, czero, vt, ldvt )
1984  CALL zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
1985  CALL zunmbr( 'P', 'R', 'C', m, n, m, a, lda,
1986  $ work( itaup ), vt, ldvt, work( nwork ),
1987  $ lwork-nwork+1, ierr )
1988  ELSE
1989 *
1990 * Perform bidiagonal SVD, computing left singular vectors
1991 * of bidiagonal matrix in RWORK(IRU) and computing right
1992 * singular vectors of bidiagonal matrix in RWORK(IRVT)
1993 * (CWorkspace: need 0)
1994 * (RWorkspace: need BDSPAC)
1995 *
1996  irvt = nrwork
1997  iru = irvt + m*m
1998  nrwork = iru + m*m
1999 *
2000  CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2001  $ m, rwork( irvt ), m, dum, idum,
2002  $ rwork( nrwork ), iwork, info )
2003 *
2004 * Copy real matrix RWORK(IRU) to complex matrix U
2005 * Overwrite U by left singular vectors of A
2006 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2007 * (RWorkspace: M*M)
2008 *
2009  CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2010  CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2011  $ work( itauq ), u, ldu, work( nwork ),
2012  $ lwork-nwork+1, ierr )
2013 *
2014 * Set all of VT to identity matrix
2015 *
2016  CALL zlaset( 'F', n, n, czero, cone, vt, ldvt )
2017 *
2018 * Copy real matrix RWORK(IRVT) to complex matrix VT
2019 * Overwrite VT by right singular vectors of A
2020 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2021 * (RWorkspace: M*M)
2022 *
2023  CALL zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
2024  CALL zunmbr( 'P', 'R', 'C', n, n, m, a, lda,
2025  $ work( itaup ), vt, ldvt, work( nwork ),
2026  $ lwork-nwork+1, ierr )
2027  END IF
2028 *
2029  END IF
2030 *
2031  END IF
2032 *
2033 * Undo scaling if necessary
2034 *
2035  IF( iscl.EQ.1 ) THEN
2036  IF( anrm.GT.bignum )
2037  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
2038  $ ierr )
2039  IF( info.NE.0 .AND. anrm.GT.bignum )
2040  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
2041  $ rwork( ie ), minmn, ierr )
2042  IF( anrm.LT.smlnum )
2043  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
2044  $ ierr )
2045  IF( info.NE.0 .AND. anrm.LT.smlnum )
2046  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
2047  $ rwork( ie ), minmn, ierr )
2048  END IF
2049 *
2050 * Return optimal workspace in WORK(1)
2051 *
2052  work( 1 ) = maxwrk
2053 *
2054  RETURN
2055 *
2056 * End of ZGESDD
2057 *
subroutine dbdsdc(UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, WORK, IWORK, INFO)
DBDSDC
Definition: dbdsdc.f:207
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:105
subroutine zlacp2(UPLO, M, N, A, LDA, B, LDB)
ZLACP2 copies all or part of a real two-dimensional array to a complex array.
Definition: zlacp2.f:106
subroutine zgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
ZGEBRD
Definition: zgebrd.f:207
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189
subroutine zungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGQR
Definition: zungqr.f:130
subroutine zungbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGBR
Definition: zungbr.f:159
subroutine zlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: zlascl.f:141
subroutine zlacrm(M, N, A, LDA, B, LDB, C, LDC, RWORK)
ZLACRM multiplies a complex matrix by a square real matrix.
Definition: zlacrm.f:116
double precision function dlamch(CMACH)
DLAMCH
Definition: dlamch.f:65
subroutine zunmbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
ZUNMBR
Definition: zunmbr.f:198
subroutine zgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGELQF
Definition: zgelqf.f:137
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: zlaset.f:108
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine zunglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGLQ
Definition: zunglq.f:129
subroutine zgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition: zgeqrf.f:151
double precision function zlange(NORM, M, N, A, LDA, WORK)
ZLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition: zlange.f:117
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:141
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
Definition: tstiee.f:83
subroutine zlarcm(M, N, A, LDA, B, LDB, C, LDC, RWORK)
ZLARCM copies all or part of a real two-dimensional array to a complex array.
Definition: zlarcm.f:116

Here is the call graph for this function:

Here is the caller graph for this function:

subroutine zgesvd ( character  JOBU,
character  JOBVT,
integer  M,
integer  N,
complex*16, dimension( lda, * )  A,
integer  LDA,
double precision, dimension( * )  S,
complex*16, dimension( ldu, * )  U,
integer  LDU,
complex*16, dimension( ldvt, * )  VT,
integer  LDVT,
complex*16, dimension( * )  WORK,
integer  LWORK,
double precision, dimension( * )  RWORK,
integer  INFO 
)

ZGESVD computes the singular value decomposition (SVD) for GE matrices

Download ZGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZGESVD computes the singular value decomposition (SVD) of a complex
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors. The SVD is written

      A = U * SIGMA * conjugate-transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
 V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns V**H, not V.
Parameters
[in]JOBU
          JOBU is CHARACTER*1
          Specifies options for computing all or part of the matrix U:
          = 'A':  all M columns of U are returned in array U:
          = 'S':  the first min(m,n) columns of U (the left singular
                  vectors) are returned in the array U;
          = 'O':  the first min(m,n) columns of U (the left singular
                  vectors) are overwritten on the array A;
          = 'N':  no columns of U (no left singular vectors) are
                  computed.
[in]JOBVT
          JOBVT is CHARACTER*1
          Specifies options for computing all or part of the matrix
          V**H:
          = 'A':  all N rows of V**H are returned in the array VT;
          = 'S':  the first min(m,n) rows of V**H (the right singular
                  vectors) are returned in the array VT;
          = 'O':  the first min(m,n) rows of V**H (the right singular
                  vectors) are overwritten on the array A;
          = 'N':  no rows of V**H (no right singular vectors) are
                  computed.

          JOBVT and JOBU cannot both be 'O'.
[in]M
          M is INTEGER
          The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.  N >= 0.
[in,out]A
          A is COMPLEX*16 array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          if JOBU = 'O',  A is overwritten with the first min(m,n)
                          columns of U (the left singular vectors,
                          stored columnwise);
          if JOBVT = 'O', A is overwritten with the first min(m,n)
                          rows of V**H (the right singular vectors,
                          stored rowwise);
          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
                          are destroyed.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]S
          S is DOUBLE PRECISION array, dimension (min(M,N))
          The singular values of A, sorted so that S(i) >= S(i+1).
[out]U
          U is COMPLEX*16 array, dimension (LDU,UCOL)
          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
          If JOBU = 'A', U contains the M-by-M unitary matrix U;
          if JOBU = 'S', U contains the first min(m,n) columns of U
          (the left singular vectors, stored columnwise);
          if JOBU = 'N' or 'O', U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U.  LDU >= 1; if
          JOBU = 'S' or 'A', LDU >= M.
[out]VT
          VT is COMPLEX*16 array, dimension (LDVT,N)
          If JOBVT = 'A', VT contains the N-by-N unitary matrix
          V**H;
          if JOBVT = 'S', VT contains the first min(m,n) rows of
          V**H (the right singular vectors, stored rowwise);
          if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVT
          LDVT is INTEGER
          The leading dimension of the array VT.  LDVT >= 1; if
          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[out]WORK
          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK.
          LWORK >=  MAX(1,2*MIN(M,N)+MAX(M,N)).
          For good performance, LWORK should generally be larger.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]RWORK
          RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
          unconverged superdiagonal elements of an upper bidiagonal
          matrix B whose diagonal is in S (not necessarily sorted).
          B satisfies A = U * B * VT, so it has the same singular
          values as A, and singular vectors related by U and VT.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if ZBDSQR did not converge, INFO specifies how many
                superdiagonals of an intermediate bidiagonal form B
                did not converge to zero. See the description of RWORK
                above for details.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
April 2012

Definition at line 216 of file zgesvd.f.

216 *
217 * -- LAPACK driver routine (version 3.6.0) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220 * April 2012
221 *
222 * .. Scalar Arguments ..
223  CHARACTER jobu, jobvt
224  INTEGER info, lda, ldu, ldvt, lwork, m, n
225 * ..
226 * .. Array Arguments ..
227  DOUBLE PRECISION rwork( * ), s( * )
228  COMPLEX*16 a( lda, * ), u( ldu, * ), vt( ldvt, * ),
229  $ work( * )
230 * ..
231 *
232 * =====================================================================
233 *
234 * .. Parameters ..
235  COMPLEX*16 czero, cone
236  parameter( czero = ( 0.0d0, 0.0d0 ),
237  $ cone = ( 1.0d0, 0.0d0 ) )
238  DOUBLE PRECISION zero, one
239  parameter( zero = 0.0d0, one = 1.0d0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL lquery, wntua, wntuas, wntun, wntuo, wntus,
243  $ wntva, wntvas, wntvn, wntvo, wntvs
244  INTEGER blk, chunk, i, ie, ierr, ir, irwork, iscl,
245  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
246  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
247  $ nrvt, wrkbl
248  INTEGER lwork_zgeqrf, lwork_zungqr_n, lwork_zungqr_m,
249  $ lwork_zgebrd, lwork_zungbr_p, lwork_zungbr_q,
250  $ lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m
251  DOUBLE PRECISION anrm, bignum, eps, smlnum
252 * ..
253 * .. Local Arrays ..
254  DOUBLE PRECISION dum( 1 )
255  COMPLEX*16 cdum( 1 )
256 * ..
257 * .. External Subroutines ..
258  EXTERNAL dlascl, xerbla, zbdsqr, zgebrd, zgelqf, zgemm,
260  $ zungqr, zunmbr
261 * ..
262 * .. External Functions ..
263  LOGICAL lsame
264  INTEGER ilaenv
265  DOUBLE PRECISION dlamch, zlange
266  EXTERNAL lsame, ilaenv, dlamch, zlange
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, min, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input arguments
274 *
275  info = 0
276  minmn = min( m, n )
277  wntua = lsame( jobu, 'A' )
278  wntus = lsame( jobu, 'S' )
279  wntuas = wntua .OR. wntus
280  wntuo = lsame( jobu, 'O' )
281  wntun = lsame( jobu, 'N' )
282  wntva = lsame( jobvt, 'A' )
283  wntvs = lsame( jobvt, 'S' )
284  wntvas = wntva .OR. wntvs
285  wntvo = lsame( jobvt, 'O' )
286  wntvn = lsame( jobvt, 'N' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
290  info = -1
291  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
292  $ ( wntvo .AND. wntuo ) ) THEN
293  info = -2
294  ELSE IF( m.LT.0 ) THEN
295  info = -3
296  ELSE IF( n.LT.0 ) THEN
297  info = -4
298  ELSE IF( lda.LT.max( 1, m ) ) THEN
299  info = -6
300  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
301  info = -9
302  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
303  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
304  info = -11
305  END IF
306 *
307 * Compute workspace
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  IF( m.GE.n .AND. minmn.GT.0 ) THEN
319 *
320 * Space needed for ZBDSQR is BDSPAC = 5*N
321 *
322  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
323 * Compute space needed for ZGEQRF
324  CALL zgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
325  lwork_zgeqrf=cdum(1)
326 * Compute space needed for ZUNGQR
327  CALL zungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
328  lwork_zungqr_n=cdum(1)
329  CALL zungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
330  lwork_zungqr_m=cdum(1)
331 * Compute space needed for ZGEBRD
332  CALL zgebrd( n, n, a, lda, s, dum(1), cdum(1),
333  $ cdum(1), cdum(1), -1, ierr )
334  lwork_zgebrd=cdum(1)
335 * Compute space needed for ZUNGBR
336  CALL zungbr( 'P', n, n, n, a, lda, cdum(1),
337  $ cdum(1), -1, ierr )
338  lwork_zungbr_p=cdum(1)
339  CALL zungbr( 'Q', n, n, n, a, lda, cdum(1),
340  $ cdum(1), -1, ierr )
341  lwork_zungbr_q=cdum(1)
342 *
343  IF( m.GE.mnthr ) THEN
344  IF( wntun ) THEN
345 *
346 * Path 1 (M much larger than N, JOBU='N')
347 *
348  maxwrk = n + lwork_zgeqrf
349  maxwrk = max( maxwrk, 2*n+lwork_zgebrd )
350  IF( wntvo .OR. wntvas )
351  $ maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
352  minwrk = 3*n
353  ELSE IF( wntuo .AND. wntvn ) THEN
354 *
355 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
356 *
357  wrkbl = n + lwork_zgeqrf
358  wrkbl = max( wrkbl, n+lwork_zungqr_n )
359  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
360  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
361  maxwrk = max( n*n+wrkbl, n*n+m*n )
362  minwrk = 2*n + m
363  ELSE IF( wntuo .AND. wntvas ) THEN
364 *
365 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
366 * 'A')
367 *
368  wrkbl = n + lwork_zgeqrf
369  wrkbl = max( wrkbl, n+lwork_zungqr_n )
370  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
371  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
372  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
373  maxwrk = max( n*n+wrkbl, n*n+m*n )
374  minwrk = 2*n + m
375  ELSE IF( wntus .AND. wntvn ) THEN
376 *
377 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
378 *
379  wrkbl = n + lwork_zgeqrf
380  wrkbl = max( wrkbl, n+lwork_zungqr_n )
381  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
382  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
383  maxwrk = n*n + wrkbl
384  minwrk = 2*n + m
385  ELSE IF( wntus .AND. wntvo ) THEN
386 *
387 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
388 *
389  wrkbl = n + lwork_zgeqrf
390  wrkbl = max( wrkbl, n+lwork_zungqr_n )
391  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
392  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
393  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
394  maxwrk = 2*n*n + wrkbl
395  minwrk = 2*n + m
396  ELSE IF( wntus .AND. wntvas ) THEN
397 *
398 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
399 * 'A')
400 *
401  wrkbl = n + lwork_zgeqrf
402  wrkbl = max( wrkbl, n+lwork_zungqr_n )
403  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
404  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
405  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
406  maxwrk = n*n + wrkbl
407  minwrk = 2*n + m
408  ELSE IF( wntua .AND. wntvn ) THEN
409 *
410 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
411 *
412  wrkbl = n + lwork_zgeqrf
413  wrkbl = max( wrkbl, n+lwork_zungqr_m )
414  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
415  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
416  maxwrk = n*n + wrkbl
417  minwrk = 2*n + m
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_zgeqrf
423  wrkbl = max( wrkbl, n+lwork_zungqr_m )
424  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
425  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
426  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
427  maxwrk = 2*n*n + wrkbl
428  minwrk = 2*n + m
429  ELSE IF( wntua .AND. wntvas ) THEN
430 *
431 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
432 * 'A')
433 *
434  wrkbl = n + lwork_zgeqrf
435  wrkbl = max( wrkbl, n+lwork_zungqr_m )
436  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
437  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
438  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
439  maxwrk = n*n + wrkbl
440  minwrk = 2*n + m
441  END IF
442  ELSE
443 *
444 * Path 10 (M at least N, but not much larger)
445 *
446  CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
447  $ cdum(1), cdum(1), -1, ierr )
448  lwork_zgebrd=cdum(1)
449  maxwrk = 2*n + lwork_zgebrd
450  IF( wntus .OR. wntuo ) THEN
451  CALL zungbr( 'Q', m, n, n, a, lda, cdum(1),
452  $ cdum(1), -1, ierr )
453  lwork_zungbr_q=cdum(1)
454  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
455  END IF
456  IF( wntua ) THEN
457  CALL zungbr( 'Q', m, m, n, a, lda, cdum(1),
458  $ cdum(1), -1, ierr )
459  lwork_zungbr_q=cdum(1)
460  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
461  END IF
462  IF( .NOT.wntvn ) THEN
463  maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
464  minwrk = 2*n + m
465  END IF
466  END IF
467  ELSE IF( minmn.GT.0 ) THEN
468 *
469 * Space needed for ZBDSQR is BDSPAC = 5*M
470 *
471  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
472 * Compute space needed for ZGELQF
473  CALL zgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
474  lwork_zgelqf=cdum(1)
475 * Compute space needed for ZUNGLQ
476  CALL zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
477  $ ierr )
478  lwork_zunglq_n=cdum(1)
479  CALL zunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
480  lwork_zunglq_m=cdum(1)
481 * Compute space needed for ZGEBRD
482  CALL zgebrd( m, m, a, lda, s, dum(1), cdum(1),
483  $ cdum(1), cdum(1), -1, ierr )
484  lwork_zgebrd=cdum(1)
485 * Compute space needed for ZUNGBR P
486  CALL zungbr( 'P', m, m, m, a, n, cdum(1),
487  $ cdum(1), -1, ierr )
488  lwork_zungbr_p=cdum(1)
489 * Compute space needed for ZUNGBR Q
490  CALL zungbr( 'Q', m, m, m, a, n, cdum(1),
491  $ cdum(1), -1, ierr )
492  lwork_zungbr_q=cdum(1)
493  IF( n.GE.mnthr ) THEN
494  IF( wntvn ) THEN
495 *
496 * Path 1t(N much larger than M, JOBVT='N')
497 *
498  maxwrk = m + lwork_zgelqf
499  maxwrk = max( maxwrk, 2*m+lwork_zgebrd )
500  IF( wntuo .OR. wntuas )
501  $ maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
502  minwrk = 3*m
503  ELSE IF( wntvo .AND. wntun ) THEN
504 *
505 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
506 *
507  wrkbl = m + lwork_zgelqf
508  wrkbl = max( wrkbl, m+lwork_zunglq_m )
509  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
510  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
511  maxwrk = max( m*m+wrkbl, m*m+m*n )
512  minwrk = 2*m + n
513  ELSE IF( wntvo .AND. wntuas ) THEN
514 *
515 * Path 3t(N much larger than M, JOBU='S' or 'A',
516 * JOBVT='O')
517 *
518  wrkbl = m + lwork_zgelqf
519  wrkbl = max( wrkbl, m+lwork_zunglq_m )
520  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
521  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
522  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
523  maxwrk = max( m*m+wrkbl, m*m+m*n )
524  minwrk = 2*m + n
525  ELSE IF( wntvs .AND. wntun ) THEN
526 *
527 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
528 *
529  wrkbl = m + lwork_zgelqf
530  wrkbl = max( wrkbl, m+lwork_zunglq_m )
531  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
532  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
533  maxwrk = m*m + wrkbl
534  minwrk = 2*m + n
535  ELSE IF( wntvs .AND. wntuo ) THEN
536 *
537 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
538 *
539  wrkbl = m + lwork_zgelqf
540  wrkbl = max( wrkbl, m+lwork_zunglq_m )
541  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
542  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
543  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
544  maxwrk = 2*m*m + wrkbl
545  minwrk = 2*m + n
546  ELSE IF( wntvs .AND. wntuas ) THEN
547 *
548 * Path 6t(N much larger than M, JOBU='S' or 'A',
549 * JOBVT='S')
550 *
551  wrkbl = m + lwork_zgelqf
552  wrkbl = max( wrkbl, m+lwork_zunglq_m )
553  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
554  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
555  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
556  maxwrk = m*m + wrkbl
557  minwrk = 2*m + n
558  ELSE IF( wntva .AND. wntun ) THEN
559 *
560 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
561 *
562  wrkbl = m + lwork_zgelqf
563  wrkbl = max( wrkbl, m+lwork_zunglq_n )
564  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
565  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
566  maxwrk = m*m + wrkbl
567  minwrk = 2*m + n
568  ELSE IF( wntva .AND. wntuo ) THEN
569 *
570 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
571 *
572  wrkbl = m + lwork_zgelqf
573  wrkbl = max( wrkbl, m+lwork_zunglq_n )
574  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
575  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
576  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
577  maxwrk = 2*m*m + wrkbl
578  minwrk = 2*m + n
579  ELSE IF( wntva .AND. wntuas ) THEN
580 *
581 * Path 9t(N much larger than M, JOBU='S' or 'A',
582 * JOBVT='A')
583 *
584  wrkbl = m + lwork_zgelqf
585  wrkbl = max( wrkbl, m+lwork_zunglq_n )
586  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
587  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
588  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
589  maxwrk = m*m + wrkbl
590  minwrk = 2*m + n
591  END IF
592  ELSE
593 *
594 * Path 10t(N greater than M, but not much larger)
595 *
596  CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
597  $ cdum(1), cdum(1), -1, ierr )
598  lwork_zgebrd=cdum(1)
599  maxwrk = 2*m + lwork_zgebrd
600  IF( wntvs .OR. wntvo ) THEN
601 * Compute space needed for ZUNGBR P
602  CALL zungbr( 'P', m, n, m, a, n, cdum(1),
603  $ cdum(1), -1, ierr )
604  lwork_zungbr_p=cdum(1)
605  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
606  END IF
607  IF( wntva ) THEN
608  CALL zungbr( 'P', n, n, m, a, n, cdum(1),
609  $ cdum(1), -1, ierr )
610  lwork_zungbr_p=cdum(1)
611  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
612  END IF
613  IF( .NOT.wntun ) THEN
614  maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
615  minwrk = 2*m + n
616  END IF
617  END IF
618  END IF
619  maxwrk = max( maxwrk, minwrk )
620  work( 1 ) = maxwrk
621 *
622  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
623  info = -13
624  END IF
625  END IF
626 *
627  IF( info.NE.0 ) THEN
628  CALL xerbla( 'ZGESVD', -info )
629  RETURN
630  ELSE IF( lquery ) THEN
631  RETURN
632  END IF
633 *
634 * Quick return if possible
635 *
636  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
637  RETURN
638  END IF
639 *
640 * Get machine constants
641 *
642  eps = dlamch( 'P' )
643  smlnum = sqrt( dlamch( 'S' ) ) / eps
644  bignum = one / smlnum
645 *
646 * Scale A if max element outside range [SMLNUM,BIGNUM]
647 *
648  anrm = zlange( 'M', m, n, a, lda, dum )
649  iscl = 0
650  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
651  iscl = 1
652  CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
653  ELSE IF( anrm.GT.bignum ) THEN
654  iscl = 1
655  CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
656  END IF
657 *
658  IF( m.GE.n ) THEN
659 *
660 * A has at least as many rows as columns. If A has sufficiently
661 * more rows than columns, first reduce using the QR
662 * decomposition (if sufficient workspace available)
663 *
664  IF( m.GE.mnthr ) THEN
665 *
666  IF( wntun ) THEN
667 *
668 * Path 1 (M much larger than N, JOBU='N')
669 * No left singular vectors to be computed
670 *
671  itau = 1
672  iwork = itau + n
673 *
674 * Compute A=Q*R
675 * (CWorkspace: need 2*N, prefer N+N*NB)
676 * (RWorkspace: need 0)
677 *
678  CALL zgeqrf( m, n, a, lda, work( itau ), work( iwork ),
679  $ lwork-iwork+1, ierr )
680 *
681 * Zero out below R
682 *
683  CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
684  $ lda )
685  ie = 1
686  itauq = 1
687  itaup = itauq + n
688  iwork = itaup + n
689 *
690 * Bidiagonalize R in A
691 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
692 * (RWorkspace: need N)
693 *
694  CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
695  $ work( itaup ), work( iwork ), lwork-iwork+1,
696  $ ierr )
697  ncvt = 0
698  IF( wntvo .OR. wntvas ) THEN
699 *
700 * If right singular vectors desired, generate P'.
701 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
702 * (RWorkspace: 0)
703 *
704  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
705  $ work( iwork ), lwork-iwork+1, ierr )
706  ncvt = n
707  END IF
708  irwork = ie + n
709 *
710 * Perform bidiagonal QR iteration, computing right
711 * singular vectors of A in A if desired
712 * (CWorkspace: 0)
713 * (RWorkspace: need BDSPAC)
714 *
715  CALL zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
716  $ cdum, 1, cdum, 1, rwork( irwork ), info )
717 *
718 * If right singular vectors desired in VT, copy them there
719 *
720  IF( wntvas )
721  $ CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
722 *
723  ELSE IF( wntuo .AND. wntvn ) THEN
724 *
725 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
726 * N left singular vectors to be overwritten on A and
727 * no right singular vectors to be computed
728 *
729  IF( lwork.GE.n*n+3*n ) THEN
730 *
731 * Sufficient workspace for a fast algorithm
732 *
733  ir = 1
734  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
735 *
736 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
737 *
738  ldwrku = lda
739  ldwrkr = lda
740  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
741 *
742 * WORK(IU) is LDA by N, WORK(IR) is N by N
743 *
744  ldwrku = lda
745  ldwrkr = n
746  ELSE
747 *
748 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
749 *
750  ldwrku = ( lwork-n*n ) / n
751  ldwrkr = n
752  END IF
753  itau = ir + ldwrkr*n
754  iwork = itau + n
755 *
756 * Compute A=Q*R
757 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
758 * (RWorkspace: 0)
759 *
760  CALL zgeqrf( m, n, a, lda, work( itau ),
761  $ work( iwork ), lwork-iwork+1, ierr )
762 *
763 * Copy R to WORK(IR) and zero out below it
764 *
765  CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
766  CALL zlaset( 'L', n-1, n-1, czero, czero,
767  $ work( ir+1 ), ldwrkr )
768 *
769 * Generate Q in A
770 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
771 * (RWorkspace: 0)
772 *
773  CALL zungqr( m, n, n, a, lda, work( itau ),
774  $ work( iwork ), lwork-iwork+1, ierr )
775  ie = 1
776  itauq = itau
777  itaup = itauq + n
778  iwork = itaup + n
779 *
780 * Bidiagonalize R in WORK(IR)
781 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
782 * (RWorkspace: need N)
783 *
784  CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
785  $ work( itauq ), work( itaup ),
786  $ work( iwork ), lwork-iwork+1, ierr )
787 *
788 * Generate left vectors bidiagonalizing R
789 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
790 * (RWorkspace: need 0)
791 *
792  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
793  $ work( itauq ), work( iwork ),
794  $ lwork-iwork+1, ierr )
795  irwork = ie + n
796 *
797 * Perform bidiagonal QR iteration, computing left
798 * singular vectors of R in WORK(IR)
799 * (CWorkspace: need N*N)
800 * (RWorkspace: need BDSPAC)
801 *
802  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
803  $ work( ir ), ldwrkr, cdum, 1,
804  $ rwork( irwork ), info )
805  iu = itauq
806 *
807 * Multiply Q in A by left singular vectors of R in
808 * WORK(IR), storing result in WORK(IU) and copying to A
809 * (CWorkspace: need N*N+N, prefer N*N+M*N)
810 * (RWorkspace: 0)
811 *
812  DO 10 i = 1, m, ldwrku
813  chunk = min( m-i+1, ldwrku )
814  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
815  $ lda, work( ir ), ldwrkr, czero,
816  $ work( iu ), ldwrku )
817  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
818  $ a( i, 1 ), lda )
819  10 CONTINUE
820 *
821  ELSE
822 *
823 * Insufficient workspace for a fast algorithm
824 *
825  ie = 1
826  itauq = 1
827  itaup = itauq + n
828  iwork = itaup + n
829 *
830 * Bidiagonalize A
831 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
832 * (RWorkspace: N)
833 *
834  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
835  $ work( itauq ), work( itaup ),
836  $ work( iwork ), lwork-iwork+1, ierr )
837 *
838 * Generate left vectors bidiagonalizing A
839 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
840 * (RWorkspace: 0)
841 *
842  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
843  $ work( iwork ), lwork-iwork+1, ierr )
844  irwork = ie + n
845 *
846 * Perform bidiagonal QR iteration, computing left
847 * singular vectors of A in A
848 * (CWorkspace: need 0)
849 * (RWorkspace: need BDSPAC)
850 *
851  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
852  $ a, lda, cdum, 1, rwork( irwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+3*n ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
891 * (RWorkspace: 0)
892 *
893  CALL zgeqrf( m, n, a, lda, work( itau ),
894  $ work( iwork ), lwork-iwork+1, ierr )
895 *
896 * Copy R to VT, zeroing out below it
897 *
898  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
899  IF( n.GT.1 )
900  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
901  $ vt( 2, 1 ), ldvt )
902 *
903 * Generate Q in A
904 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
905 * (RWorkspace: 0)
906 *
907  CALL zungqr( m, n, n, a, lda, work( itau ),
908  $ work( iwork ), lwork-iwork+1, ierr )
909  ie = 1
910  itauq = itau
911  itaup = itauq + n
912  iwork = itaup + n
913 *
914 * Bidiagonalize R in VT, copying result to WORK(IR)
915 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
916 * (RWorkspace: need N)
917 *
918  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
919  $ work( itauq ), work( itaup ),
920  $ work( iwork ), lwork-iwork+1, ierr )
921  CALL zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
922 *
923 * Generate left vectors bidiagonalizing R in WORK(IR)
924 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
925 * (RWorkspace: 0)
926 *
927  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
928  $ work( itauq ), work( iwork ),
929  $ lwork-iwork+1, ierr )
930 *
931 * Generate right vectors bidiagonalizing R in VT
932 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
933 * (RWorkspace: 0)
934 *
935  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
936  $ work( iwork ), lwork-iwork+1, ierr )
937  irwork = ie + n
938 *
939 * Perform bidiagonal QR iteration, computing left
940 * singular vectors of R in WORK(IR) and computing right
941 * singular vectors of R in VT
942 * (CWorkspace: need N*N)
943 * (RWorkspace: need BDSPAC)
944 *
945  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
946  $ ldvt, work( ir ), ldwrkr, cdum, 1,
947  $ rwork( irwork ), info )
948  iu = itauq
949 *
950 * Multiply Q in A by left singular vectors of R in
951 * WORK(IR), storing result in WORK(IU) and copying to A
952 * (CWorkspace: need N*N+N, prefer N*N+M*N)
953 * (RWorkspace: 0)
954 *
955  DO 20 i = 1, m, ldwrku
956  chunk = min( m-i+1, ldwrku )
957  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
958  $ lda, work( ir ), ldwrkr, czero,
959  $ work( iu ), ldwrku )
960  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
961  $ a( i, 1 ), lda )
962  20 CONTINUE
963 *
964  ELSE
965 *
966 * Insufficient workspace for a fast algorithm
967 *
968  itau = 1
969  iwork = itau + n
970 *
971 * Compute A=Q*R
972 * (CWorkspace: need 2*N, prefer N+N*NB)
973 * (RWorkspace: 0)
974 *
975  CALL zgeqrf( m, n, a, lda, work( itau ),
976  $ work( iwork ), lwork-iwork+1, ierr )
977 *
978 * Copy R to VT, zeroing out below it
979 *
980  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
981  IF( n.GT.1 )
982  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
983  $ vt( 2, 1 ), ldvt )
984 *
985 * Generate Q in A
986 * (CWorkspace: need 2*N, prefer N+N*NB)
987 * (RWorkspace: 0)
988 *
989  CALL zungqr( m, n, n, a, lda, work( itau ),
990  $ work( iwork ), lwork-iwork+1, ierr )
991  ie = 1
992  itauq = itau
993  itaup = itauq + n
994  iwork = itaup + n
995 *
996 * Bidiagonalize R in VT
997 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
998 * (RWorkspace: N)
999 *
1000  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1001  $ work( itauq ), work( itaup ),
1002  $ work( iwork ), lwork-iwork+1, ierr )
1003 *
1004 * Multiply Q in A by left vectors bidiagonalizing R
1005 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1006 * (RWorkspace: 0)
1007 *
1008  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1009  $ work( itauq ), a, lda, work( iwork ),
1010  $ lwork-iwork+1, ierr )
1011 *
1012 * Generate right vectors bidiagonalizing R in VT
1013 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1014 * (RWorkspace: 0)
1015 *
1016  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1017  $ work( iwork ), lwork-iwork+1, ierr )
1018  irwork = ie + n
1019 *
1020 * Perform bidiagonal QR iteration, computing left
1021 * singular vectors of A in A and computing right
1022 * singular vectors of A in VT
1023 * (CWorkspace: 0)
1024 * (RWorkspace: need BDSPAC)
1025 *
1026  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1027  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1028  $ info )
1029 *
1030  END IF
1031 *
1032  ELSE IF( wntus ) THEN
1033 *
1034  IF( wntvn ) THEN
1035 *
1036 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1037 * N left singular vectors to be computed in U and
1038 * no right singular vectors to be computed
1039 *
1040  IF( lwork.GE.n*n+3*n ) THEN
1041 *
1042 * Sufficient workspace for a fast algorithm
1043 *
1044  ir = 1
1045  IF( lwork.GE.wrkbl+lda*n ) THEN
1046 *
1047 * WORK(IR) is LDA by N
1048 *
1049  ldwrkr = lda
1050  ELSE
1051 *
1052 * WORK(IR) is N by N
1053 *
1054  ldwrkr = n
1055  END IF
1056  itau = ir + ldwrkr*n
1057  iwork = itau + n
1058 *
1059 * Compute A=Q*R
1060 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1061 * (RWorkspace: 0)
1062 *
1063  CALL zgeqrf( m, n, a, lda, work( itau ),
1064  $ work( iwork ), lwork-iwork+1, ierr )
1065 *
1066 * Copy R to WORK(IR), zeroing out below it
1067 *
1068  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1069  $ ldwrkr )
1070  CALL zlaset( 'L', n-1, n-1, czero, czero,
1071  $ work( ir+1 ), ldwrkr )
1072 *
1073 * Generate Q in A
1074 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1075 * (RWorkspace: 0)
1076 *
1077  CALL zungqr( m, n, n, a, lda, work( itau ),
1078  $ work( iwork ), lwork-iwork+1, ierr )
1079  ie = 1
1080  itauq = itau
1081  itaup = itauq + n
1082  iwork = itaup + n
1083 *
1084 * Bidiagonalize R in WORK(IR)
1085 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1086 * (RWorkspace: need N)
1087 *
1088  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1089  $ rwork( ie ), work( itauq ),
1090  $ work( itaup ), work( iwork ),
1091  $ lwork-iwork+1, ierr )
1092 *
1093 * Generate left vectors bidiagonalizing R in WORK(IR)
1094 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1095 * (RWorkspace: 0)
1096 *
1097  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1098  $ work( itauq ), work( iwork ),
1099  $ lwork-iwork+1, ierr )
1100  irwork = ie + n
1101 *
1102 * Perform bidiagonal QR iteration, computing left
1103 * singular vectors of R in WORK(IR)
1104 * (CWorkspace: need N*N)
1105 * (RWorkspace: need BDSPAC)
1106 *
1107  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1108  $ 1, work( ir ), ldwrkr, cdum, 1,
1109  $ rwork( irwork ), info )
1110 *
1111 * Multiply Q in A by left singular vectors of R in
1112 * WORK(IR), storing result in U
1113 * (CWorkspace: need N*N)
1114 * (RWorkspace: 0)
1115 *
1116  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1117  $ work( ir ), ldwrkr, czero, u, ldu )
1118 *
1119  ELSE
1120 *
1121 * Insufficient workspace for a fast algorithm
1122 *
1123  itau = 1
1124  iwork = itau + n
1125 *
1126 * Compute A=Q*R, copying result to U
1127 * (CWorkspace: need 2*N, prefer N+N*NB)
1128 * (RWorkspace: 0)
1129 *
1130  CALL zgeqrf( m, n, a, lda, work( itau ),
1131  $ work( iwork ), lwork-iwork+1, ierr )
1132  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1133 *
1134 * Generate Q in U
1135 * (CWorkspace: need 2*N, prefer N+N*NB)
1136 * (RWorkspace: 0)
1137 *
1138  CALL zungqr( m, n, n, u, ldu, work( itau ),
1139  $ work( iwork ), lwork-iwork+1, ierr )
1140  ie = 1
1141  itauq = itau
1142  itaup = itauq + n
1143  iwork = itaup + n
1144 *
1145 * Zero out below R in A
1146 *
1147  CALL zlaset( 'L', n-1, n-1, czero, czero,
1148  $ a( 2, 1 ), lda )
1149 *
1150 * Bidiagonalize R in A
1151 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1152 * (RWorkspace: need N)
1153 *
1154  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1155  $ work( itauq ), work( itaup ),
1156  $ work( iwork ), lwork-iwork+1, ierr )
1157 *
1158 * Multiply Q in U by left vectors bidiagonalizing R
1159 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1160 * (RWorkspace: 0)
1161 *
1162  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1163  $ work( itauq ), u, ldu, work( iwork ),
1164  $ lwork-iwork+1, ierr )
1165  irwork = ie + n
1166 *
1167 * Perform bidiagonal QR iteration, computing left
1168 * singular vectors of A in U
1169 * (CWorkspace: 0)
1170 * (RWorkspace: need BDSPAC)
1171 *
1172  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1173  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1174  $ info )
1175 *
1176  END IF
1177 *
1178  ELSE IF( wntvo ) THEN
1179 *
1180 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1181 * N left singular vectors to be computed in U and
1182 * N right singular vectors to be overwritten on A
1183 *
1184  IF( lwork.GE.2*n*n+3*n ) THEN
1185 *
1186 * Sufficient workspace for a fast algorithm
1187 *
1188  iu = 1
1189  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1190 *
1191 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1192 *
1193  ldwrku = lda
1194  ir = iu + ldwrku*n
1195  ldwrkr = lda
1196  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1197 *
1198 * WORK(IU) is LDA by N and WORK(IR) is N by N
1199 *
1200  ldwrku = lda
1201  ir = iu + ldwrku*n
1202  ldwrkr = n
1203  ELSE
1204 *
1205 * WORK(IU) is N by N and WORK(IR) is N by N
1206 *
1207  ldwrku = n
1208  ir = iu + ldwrku*n
1209  ldwrkr = n
1210  END IF
1211  itau = ir + ldwrkr*n
1212  iwork = itau + n
1213 *
1214 * Compute A=Q*R
1215 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1216 * (RWorkspace: 0)
1217 *
1218  CALL zgeqrf( m, n, a, lda, work( itau ),
1219  $ work( iwork ), lwork-iwork+1, ierr )
1220 *
1221 * Copy R to WORK(IU), zeroing out below it
1222 *
1223  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1224  $ ldwrku )
1225  CALL zlaset( 'L', n-1, n-1, czero, czero,
1226  $ work( iu+1 ), ldwrku )
1227 *
1228 * Generate Q in A
1229 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1230 * (RWorkspace: 0)
1231 *
1232  CALL zungqr( m, n, n, a, lda, work( itau ),
1233  $ work( iwork ), lwork-iwork+1, ierr )
1234  ie = 1
1235  itauq = itau
1236  itaup = itauq + n
1237  iwork = itaup + n
1238 *
1239 * Bidiagonalize R in WORK(IU), copying result to
1240 * WORK(IR)
1241 * (CWorkspace: need 2*N*N+3*N,
1242 * prefer 2*N*N+2*N+2*N*NB)
1243 * (RWorkspace: need N)
1244 *
1245  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1246  $ rwork( ie ), work( itauq ),
1247  $ work( itaup ), work( iwork ),
1248  $ lwork-iwork+1, ierr )
1249  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1250  $ work( ir ), ldwrkr )
1251 *
1252 * Generate left bidiagonalizing vectors in WORK(IU)
1253 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1254 * (RWorkspace: 0)
1255 *
1256  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1257  $ work( itauq ), work( iwork ),
1258  $ lwork-iwork+1, ierr )
1259 *
1260 * Generate right bidiagonalizing vectors in WORK(IR)
1261 * (CWorkspace: need 2*N*N+3*N-1,
1262 * prefer 2*N*N+2*N+(N-1)*NB)
1263 * (RWorkspace: 0)
1264 *
1265  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1266  $ work( itaup ), work( iwork ),
1267  $ lwork-iwork+1, ierr )
1268  irwork = ie + n
1269 *
1270 * Perform bidiagonal QR iteration, computing left
1271 * singular vectors of R in WORK(IU) and computing
1272 * right singular vectors of R in WORK(IR)
1273 * (CWorkspace: need 2*N*N)
1274 * (RWorkspace: need BDSPAC)
1275 *
1276  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1277  $ work( ir ), ldwrkr, work( iu ),
1278  $ ldwrku, cdum, 1, rwork( irwork ),
1279  $ info )
1280 *
1281 * Multiply Q in A by left singular vectors of R in
1282 * WORK(IU), storing result in U
1283 * (CWorkspace: need N*N)
1284 * (RWorkspace: 0)
1285 *
1286  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1287  $ work( iu ), ldwrku, czero, u, ldu )
1288 *
1289 * Copy right singular vectors of R to A
1290 * (CWorkspace: need N*N)
1291 * (RWorkspace: 0)
1292 *
1293  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1294  $ lda )
1295 *
1296  ELSE
1297 *
1298 * Insufficient workspace for a fast algorithm
1299 *
1300  itau = 1
1301  iwork = itau + n
1302 *
1303 * Compute A=Q*R, copying result to U
1304 * (CWorkspace: need 2*N, prefer N+N*NB)
1305 * (RWorkspace: 0)
1306 *
1307  CALL zgeqrf( m, n, a, lda, work( itau ),
1308  $ work( iwork ), lwork-iwork+1, ierr )
1309  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1310 *
1311 * Generate Q in U
1312 * (CWorkspace: need 2*N, prefer N+N*NB)
1313 * (RWorkspace: 0)
1314 *
1315  CALL zungqr( m, n, n, u, ldu, work( itau ),
1316  $ work( iwork ), lwork-iwork+1, ierr )
1317  ie = 1
1318  itauq = itau
1319  itaup = itauq + n
1320  iwork = itaup + n
1321 *
1322 * Zero out below R in A
1323 *
1324  CALL zlaset( 'L', n-1, n-1, czero, czero,
1325  $ a( 2, 1 ), lda )
1326 *
1327 * Bidiagonalize R in A
1328 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1329 * (RWorkspace: need N)
1330 *
1331  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1332  $ work( itauq ), work( itaup ),
1333  $ work( iwork ), lwork-iwork+1, ierr )
1334 *
1335 * Multiply Q in U by left vectors bidiagonalizing R
1336 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1337 * (RWorkspace: 0)
1338 *
1339  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1340  $ work( itauq ), u, ldu, work( iwork ),
1341  $ lwork-iwork+1, ierr )
1342 *
1343 * Generate right vectors bidiagonalizing R in A
1344 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1345 * (RWorkspace: 0)
1346 *
1347  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1348  $ work( iwork ), lwork-iwork+1, ierr )
1349  irwork = ie + n
1350 *
1351 * Perform bidiagonal QR iteration, computing left
1352 * singular vectors of A in U and computing right
1353 * singular vectors of A in A
1354 * (CWorkspace: 0)
1355 * (RWorkspace: need BDSPAC)
1356 *
1357  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1358  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1359  $ info )
1360 *
1361  END IF
1362 *
1363  ELSE IF( wntvas ) THEN
1364 *
1365 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1366 * or 'A')
1367 * N left singular vectors to be computed in U and
1368 * N right singular vectors to be computed in VT
1369 *
1370  IF( lwork.GE.n*n+3*n ) THEN
1371 *
1372 * Sufficient workspace for a fast algorithm
1373 *
1374  iu = 1
1375  IF( lwork.GE.wrkbl+lda*n ) THEN
1376 *
1377 * WORK(IU) is LDA by N
1378 *
1379  ldwrku = lda
1380  ELSE
1381 *
1382 * WORK(IU) is N by N
1383 *
1384  ldwrku = n
1385  END IF
1386  itau = iu + ldwrku*n
1387  iwork = itau + n
1388 *
1389 * Compute A=Q*R
1390 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1391 * (RWorkspace: 0)
1392 *
1393  CALL zgeqrf( m, n, a, lda, work( itau ),
1394  $ work( iwork ), lwork-iwork+1, ierr )
1395 *
1396 * Copy R to WORK(IU), zeroing out below it
1397 *
1398  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1399  $ ldwrku )
1400  CALL zlaset( 'L', n-1, n-1, czero, czero,
1401  $ work( iu+1 ), ldwrku )
1402 *
1403 * Generate Q in A
1404 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1405 * (RWorkspace: 0)
1406 *
1407  CALL zungqr( m, n, n, a, lda, work( itau ),
1408  $ work( iwork ), lwork-iwork+1, ierr )
1409  ie = 1
1410  itauq = itau
1411  itaup = itauq + n
1412  iwork = itaup + n
1413 *
1414 * Bidiagonalize R in WORK(IU), copying result to VT
1415 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1416 * (RWorkspace: need N)
1417 *
1418  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1419  $ rwork( ie ), work( itauq ),
1420  $ work( itaup ), work( iwork ),
1421  $ lwork-iwork+1, ierr )
1422  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1423  $ ldvt )
1424 *
1425 * Generate left bidiagonalizing vectors in WORK(IU)
1426 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1427 * (RWorkspace: 0)
1428 *
1429  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1430  $ work( itauq ), work( iwork ),
1431  $ lwork-iwork+1, ierr )
1432 *
1433 * Generate right bidiagonalizing vectors in VT
1434 * (CWorkspace: need N*N+3*N-1,
1435 * prefer N*N+2*N+(N-1)*NB)
1436 * (RWorkspace: 0)
1437 *
1438  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1439  $ work( iwork ), lwork-iwork+1, ierr )
1440  irwork = ie + n
1441 *
1442 * Perform bidiagonal QR iteration, computing left
1443 * singular vectors of R in WORK(IU) and computing
1444 * right singular vectors of R in VT
1445 * (CWorkspace: need N*N)
1446 * (RWorkspace: need BDSPAC)
1447 *
1448  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1449  $ ldvt, work( iu ), ldwrku, cdum, 1,
1450  $ rwork( irwork ), info )
1451 *
1452 * Multiply Q in A by left singular vectors of R in
1453 * WORK(IU), storing result in U
1454 * (CWorkspace: need N*N)
1455 * (RWorkspace: 0)
1456 *
1457  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1458  $ work( iu ), ldwrku, czero, u, ldu )
1459 *
1460  ELSE
1461 *
1462 * Insufficient workspace for a fast algorithm
1463 *
1464  itau = 1
1465  iwork = itau + n
1466 *
1467 * Compute A=Q*R, copying result to U
1468 * (CWorkspace: need 2*N, prefer N+N*NB)
1469 * (RWorkspace: 0)
1470 *
1471  CALL zgeqrf( m, n, a, lda, work( itau ),
1472  $ work( iwork ), lwork-iwork+1, ierr )
1473  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1474 *
1475 * Generate Q in U
1476 * (CWorkspace: need 2*N, prefer N+N*NB)
1477 * (RWorkspace: 0)
1478 *
1479  CALL zungqr( m, n, n, u, ldu, work( itau ),
1480  $ work( iwork ), lwork-iwork+1, ierr )
1481 *
1482 * Copy R to VT, zeroing out below it
1483 *
1484  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1485  IF( n.GT.1 )
1486  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
1487  $ vt( 2, 1 ), ldvt )
1488  ie = 1
1489  itauq = itau
1490  itaup = itauq + n
1491  iwork = itaup + n
1492 *
1493 * Bidiagonalize R in VT
1494 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1495 * (RWorkspace: need N)
1496 *
1497  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1498  $ work( itauq ), work( itaup ),
1499  $ work( iwork ), lwork-iwork+1, ierr )
1500 *
1501 * Multiply Q in U by left bidiagonalizing vectors
1502 * in VT
1503 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1504 * (RWorkspace: 0)
1505 *
1506  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1507  $ work( itauq ), u, ldu, work( iwork ),
1508  $ lwork-iwork+1, ierr )
1509 *
1510 * Generate right bidiagonalizing vectors in VT
1511 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1512 * (RWorkspace: 0)
1513 *
1514  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1515  $ work( iwork ), lwork-iwork+1, ierr )
1516  irwork = ie + n
1517 *
1518 * Perform bidiagonal QR iteration, computing left
1519 * singular vectors of A in U and computing right
1520 * singular vectors of A in VT
1521 * (CWorkspace: 0)
1522 * (RWorkspace: need BDSPAC)
1523 *
1524  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1525  $ ldvt, u, ldu, cdum, 1,
1526  $ rwork( irwork ), info )
1527 *
1528  END IF
1529 *
1530  END IF
1531 *
1532  ELSE IF( wntua ) THEN
1533 *
1534  IF( wntvn ) THEN
1535 *
1536 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1537 * M left singular vectors to be computed in U and
1538 * no right singular vectors to be computed
1539 *
1540  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1541 *
1542 * Sufficient workspace for a fast algorithm
1543 *
1544  ir = 1
1545  IF( lwork.GE.wrkbl+lda*n ) THEN
1546 *
1547 * WORK(IR) is LDA by N
1548 *
1549  ldwrkr = lda
1550  ELSE
1551 *
1552 * WORK(IR) is N by N
1553 *
1554  ldwrkr = n
1555  END IF
1556  itau = ir + ldwrkr*n
1557  iwork = itau + n
1558 *
1559 * Compute A=Q*R, copying result to U
1560 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1561 * (RWorkspace: 0)
1562 *
1563  CALL zgeqrf( m, n, a, lda, work( itau ),
1564  $ work( iwork ), lwork-iwork+1, ierr )
1565  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1566 *
1567 * Copy R to WORK(IR), zeroing out below it
1568 *
1569  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1570  $ ldwrkr )
1571  CALL zlaset( 'L', n-1, n-1, czero, czero,
1572  $ work( ir+1 ), ldwrkr )
1573 *
1574 * Generate Q in U
1575 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1576 * (RWorkspace: 0)
1577 *
1578  CALL zungqr( m, m, n, u, ldu, work( itau ),
1579  $ work( iwork ), lwork-iwork+1, ierr )
1580  ie = 1
1581  itauq = itau
1582  itaup = itauq + n
1583  iwork = itaup + n
1584 *
1585 * Bidiagonalize R in WORK(IR)
1586 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1587 * (RWorkspace: need N)
1588 *
1589  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1590  $ rwork( ie ), work( itauq ),
1591  $ work( itaup ), work( iwork ),
1592  $ lwork-iwork+1, ierr )
1593 *
1594 * Generate left bidiagonalizing vectors in WORK(IR)
1595 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1596 * (RWorkspace: 0)
1597 *
1598  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1599  $ work( itauq ), work( iwork ),
1600  $ lwork-iwork+1, ierr )
1601  irwork = ie + n
1602 *
1603 * Perform bidiagonal QR iteration, computing left
1604 * singular vectors of R in WORK(IR)
1605 * (CWorkspace: need N*N)
1606 * (RWorkspace: need BDSPAC)
1607 *
1608  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1609  $ 1, work( ir ), ldwrkr, cdum, 1,
1610  $ rwork( irwork ), info )
1611 *
1612 * Multiply Q in U by left singular vectors of R in
1613 * WORK(IR), storing result in A
1614 * (CWorkspace: need N*N)
1615 * (RWorkspace: 0)
1616 *
1617  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1618  $ work( ir ), ldwrkr, czero, a, lda )
1619 *
1620 * Copy left singular vectors of A from A to U
1621 *
1622  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1623 *
1624  ELSE
1625 *
1626 * Insufficient workspace for a fast algorithm
1627 *
1628  itau = 1
1629  iwork = itau + n
1630 *
1631 * Compute A=Q*R, copying result to U
1632 * (CWorkspace: need 2*N, prefer N+N*NB)
1633 * (RWorkspace: 0)
1634 *
1635  CALL zgeqrf( m, n, a, lda, work( itau ),
1636  $ work( iwork ), lwork-iwork+1, ierr )
1637  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1638 *
1639 * Generate Q in U
1640 * (CWorkspace: need N+M, prefer N+M*NB)
1641 * (RWorkspace: 0)
1642 *
1643  CALL zungqr( m, m, n, u, ldu, work( itau ),
1644  $ work( iwork ), lwork-iwork+1, ierr )
1645  ie = 1
1646  itauq = itau
1647  itaup = itauq + n
1648  iwork = itaup + n
1649 *
1650 * Zero out below R in A
1651 *
1652  CALL zlaset( 'L', n-1, n-1, czero, czero,
1653  $ a( 2, 1 ), lda )
1654 *
1655 * Bidiagonalize R in A
1656 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1657 * (RWorkspace: need N)
1658 *
1659  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1660  $ work( itauq ), work( itaup ),
1661  $ work( iwork ), lwork-iwork+1, ierr )
1662 *
1663 * Multiply Q in U by left bidiagonalizing vectors
1664 * in A
1665 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1666 * (RWorkspace: 0)
1667 *
1668  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1669  $ work( itauq ), u, ldu, work( iwork ),
1670  $ lwork-iwork+1, ierr )
1671  irwork = ie + n
1672 *
1673 * Perform bidiagonal QR iteration, computing left
1674 * singular vectors of A in U
1675 * (CWorkspace: 0)
1676 * (RWorkspace: need BDSPAC)
1677 *
1678  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1679  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1680  $ info )
1681 *
1682  END IF
1683 *
1684  ELSE IF( wntvo ) THEN
1685 *
1686 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1687 * M left singular vectors to be computed in U and
1688 * N right singular vectors to be overwritten on A
1689 *
1690  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1691 *
1692 * Sufficient workspace for a fast algorithm
1693 *
1694  iu = 1
1695  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1696 *
1697 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1698 *
1699  ldwrku = lda
1700  ir = iu + ldwrku*n
1701  ldwrkr = lda
1702  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1703 *
1704 * WORK(IU) is LDA by N and WORK(IR) is N by N
1705 *
1706  ldwrku = lda
1707  ir = iu + ldwrku*n
1708  ldwrkr = n
1709  ELSE
1710 *
1711 * WORK(IU) is N by N and WORK(IR) is N by N
1712 *
1713  ldwrku = n
1714  ir = iu + ldwrku*n
1715  ldwrkr = n
1716  END IF
1717  itau = ir + ldwrkr*n
1718  iwork = itau + n
1719 *
1720 * Compute A=Q*R, copying result to U
1721 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1722 * (RWorkspace: 0)
1723 *
1724  CALL zgeqrf( m, n, a, lda, work( itau ),
1725  $ work( iwork ), lwork-iwork+1, ierr )
1726  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1727 *
1728 * Generate Q in U
1729 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1730 * (RWorkspace: 0)
1731 *
1732  CALL zungqr( m, m, n, u, ldu, work( itau ),
1733  $ work( iwork ), lwork-iwork+1, ierr )
1734 *
1735 * Copy R to WORK(IU), zeroing out below it
1736 *
1737  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1738  $ ldwrku )
1739  CALL zlaset( 'L', n-1, n-1, czero, czero,
1740  $ work( iu+1 ), ldwrku )
1741  ie = 1
1742  itauq = itau
1743  itaup = itauq + n
1744  iwork = itaup + n
1745 *
1746 * Bidiagonalize R in WORK(IU), copying result to
1747 * WORK(IR)
1748 * (CWorkspace: need 2*N*N+3*N,
1749 * prefer 2*N*N+2*N+2*N*NB)
1750 * (RWorkspace: need N)
1751 *
1752  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1753  $ rwork( ie ), work( itauq ),
1754  $ work( itaup ), work( iwork ),
1755  $ lwork-iwork+1, ierr )
1756  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1757  $ work( ir ), ldwrkr )
1758 *
1759 * Generate left bidiagonalizing vectors in WORK(IU)
1760 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1761 * (RWorkspace: 0)
1762 *
1763  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1764  $ work( itauq ), work( iwork ),
1765  $ lwork-iwork+1, ierr )
1766 *
1767 * Generate right bidiagonalizing vectors in WORK(IR)
1768 * (CWorkspace: need 2*N*N+3*N-1,
1769 * prefer 2*N*N+2*N+(N-1)*NB)
1770 * (RWorkspace: 0)
1771 *
1772  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1773  $ work( itaup ), work( iwork ),
1774  $ lwork-iwork+1, ierr )
1775  irwork = ie + n
1776 *
1777 * Perform bidiagonal QR iteration, computing left
1778 * singular vectors of R in WORK(IU) and computing
1779 * right singular vectors of R in WORK(IR)
1780 * (CWorkspace: need 2*N*N)
1781 * (RWorkspace: need BDSPAC)
1782 *
1783  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1784  $ work( ir ), ldwrkr, work( iu ),
1785  $ ldwrku, cdum, 1, rwork( irwork ),
1786  $ info )
1787 *
1788 * Multiply Q in U by left singular vectors of R in
1789 * WORK(IU), storing result in A
1790 * (CWorkspace: need N*N)
1791 * (RWorkspace: 0)
1792 *
1793  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1794  $ work( iu ), ldwrku, czero, a, lda )
1795 *
1796 * Copy left singular vectors of A from A to U
1797 *
1798  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1799 *
1800 * Copy right singular vectors of R from WORK(IR) to A
1801 *
1802  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1803  $ lda )
1804 *
1805  ELSE
1806 *
1807 * Insufficient workspace for a fast algorithm
1808 *
1809  itau = 1
1810  iwork = itau + n
1811 *
1812 * Compute A=Q*R, copying result to U
1813 * (CWorkspace: need 2*N, prefer N+N*NB)
1814 * (RWorkspace: 0)
1815 *
1816  CALL zgeqrf( m, n, a, lda, work( itau ),
1817  $ work( iwork ), lwork-iwork+1, ierr )
1818  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1819 *
1820 * Generate Q in U
1821 * (CWorkspace: need N+M, prefer N+M*NB)
1822 * (RWorkspace: 0)
1823 *
1824  CALL zungqr( m, m, n, u, ldu, work( itau ),
1825  $ work( iwork ), lwork-iwork+1, ierr )
1826  ie = 1
1827  itauq = itau
1828  itaup = itauq + n
1829  iwork = itaup + n
1830 *
1831 * Zero out below R in A
1832 *
1833  CALL zlaset( 'L', n-1, n-1, czero, czero,
1834  $ a( 2, 1 ), lda )
1835 *
1836 * Bidiagonalize R in A
1837 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1838 * (RWorkspace: need N)
1839 *
1840  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1841  $ work( itauq ), work( itaup ),
1842  $ work( iwork ), lwork-iwork+1, ierr )
1843 *
1844 * Multiply Q in U by left bidiagonalizing vectors
1845 * in A
1846 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1847 * (RWorkspace: 0)
1848 *
1849  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1850  $ work( itauq ), u, ldu, work( iwork ),
1851  $ lwork-iwork+1, ierr )
1852 *
1853 * Generate right bidiagonalizing vectors in A
1854 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1855 * (RWorkspace: 0)
1856 *
1857  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1858  $ work( iwork ), lwork-iwork+1, ierr )
1859  irwork = ie + n
1860 *
1861 * Perform bidiagonal QR iteration, computing left
1862 * singular vectors of A in U and computing right
1863 * singular vectors of A in A
1864 * (CWorkspace: 0)
1865 * (RWorkspace: need BDSPAC)
1866 *
1867  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1868  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1869  $ info )
1870 *
1871  END IF
1872 *
1873  ELSE IF( wntvas ) THEN
1874 *
1875 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1876 * or 'A')
1877 * M left singular vectors to be computed in U and
1878 * N right singular vectors to be computed in VT
1879 *
1880  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1881 *
1882 * Sufficient workspace for a fast algorithm
1883 *
1884  iu = 1
1885  IF( lwork.GE.wrkbl+lda*n ) THEN
1886 *
1887 * WORK(IU) is LDA by N
1888 *
1889  ldwrku = lda
1890  ELSE
1891 *
1892 * WORK(IU) is N by N
1893 *
1894  ldwrku = n
1895  END IF
1896  itau = iu + ldwrku*n
1897  iwork = itau + n
1898 *
1899 * Compute A=Q*R, copying result to U
1900 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1901 * (RWorkspace: 0)
1902 *
1903  CALL zgeqrf( m, n, a, lda, work( itau ),
1904  $ work( iwork ), lwork-iwork+1, ierr )
1905  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1906 *
1907 * Generate Q in U
1908 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1909 * (RWorkspace: 0)
1910 *
1911  CALL zungqr( m, m, n, u, ldu, work( itau ),
1912  $ work( iwork ), lwork-iwork+1, ierr )
1913 *
1914 * Copy R to WORK(IU), zeroing out below it
1915 *
1916  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1917  $ ldwrku )
1918  CALL zlaset( 'L', n-1, n-1, czero, czero,
1919  $ work( iu+1 ), ldwrku )
1920  ie = 1
1921  itauq = itau
1922  itaup = itauq + n
1923  iwork = itaup + n
1924 *
1925 * Bidiagonalize R in WORK(IU), copying result to VT
1926 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1927 * (RWorkspace: need N)
1928 *
1929  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1930  $ rwork( ie ), work( itauq ),
1931  $ work( itaup ), work( iwork ),
1932  $ lwork-iwork+1, ierr )
1933  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1934  $ ldvt )
1935 *
1936 * Generate left bidiagonalizing vectors in WORK(IU)
1937 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1938 * (RWorkspace: 0)
1939 *
1940  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1941  $ work( itauq ), work( iwork ),
1942  $ lwork-iwork+1, ierr )
1943 *
1944 * Generate right bidiagonalizing vectors in VT
1945 * (CWorkspace: need N*N+3*N-1,
1946 * prefer N*N+2*N+(N-1)*NB)
1947 * (RWorkspace: need 0)
1948 *
1949  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1950  $ work( iwork ), lwork-iwork+1, ierr )
1951  irwork = ie + n
1952 *
1953 * Perform bidiagonal QR iteration, computing left
1954 * singular vectors of R in WORK(IU) and computing
1955 * right singular vectors of R in VT
1956 * (CWorkspace: need N*N)
1957 * (RWorkspace: need BDSPAC)
1958 *
1959  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1960  $ ldvt, work( iu ), ldwrku, cdum, 1,
1961  $ rwork( irwork ), info )
1962 *
1963 * Multiply Q in U by left singular vectors of R in
1964 * WORK(IU), storing result in A
1965 * (CWorkspace: need N*N)
1966 * (RWorkspace: 0)
1967 *
1968  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1969  $ work( iu ), ldwrku, czero, a, lda )
1970 *
1971 * Copy left singular vectors of A from A to U
1972 *
1973  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1974 *
1975  ELSE
1976 *
1977 * Insufficient workspace for a fast algorithm
1978 *
1979  itau = 1
1980  iwork = itau + n
1981 *
1982 * Compute A=Q*R, copying result to U
1983 * (CWorkspace: need 2*N, prefer N+N*NB)
1984 * (RWorkspace: 0)
1985 *
1986  CALL zgeqrf( m, n, a, lda, work( itau ),
1987  $ work( iwork ), lwork-iwork+1, ierr )
1988  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1989 *
1990 * Generate Q in U
1991 * (CWorkspace: need N+M, prefer N+M*NB)
1992 * (RWorkspace: 0)
1993 *
1994  CALL zungqr( m, m, n, u, ldu, work( itau ),
1995  $ work( iwork ), lwork-iwork+1, ierr )
1996 *
1997 * Copy R from A to VT, zeroing out below it
1998 *
1999  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2000  IF( n.GT.1 )
2001  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
2002  $ vt( 2, 1 ), ldvt )
2003  ie = 1
2004  itauq = itau
2005  itaup = itauq + n
2006  iwork = itaup + n
2007 *
2008 * Bidiagonalize R in VT
2009 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2010 * (RWorkspace: need N)
2011 *
2012  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
2013  $ work( itauq ), work( itaup ),
2014  $ work( iwork ), lwork-iwork+1, ierr )
2015 *
2016 * Multiply Q in U by left bidiagonalizing vectors
2017 * in VT
2018 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2019 * (RWorkspace: 0)
2020 *
2021  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2022  $ work( itauq ), u, ldu, work( iwork ),
2023  $ lwork-iwork+1, ierr )
2024 *
2025 * Generate right bidiagonalizing vectors in VT
2026 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2027 * (RWorkspace: 0)
2028 *
2029  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2030  $ work( iwork ), lwork-iwork+1, ierr )
2031  irwork = ie + n
2032 *
2033 * Perform bidiagonal QR iteration, computing left
2034 * singular vectors of A in U and computing right
2035 * singular vectors of A in VT
2036 * (CWorkspace: 0)
2037 * (RWorkspace: need BDSPAC)
2038 *
2039  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2040  $ ldvt, u, ldu, cdum, 1,
2041  $ rwork( irwork ), info )
2042 *
2043  END IF
2044 *
2045  END IF
2046 *
2047  END IF
2048 *
2049  ELSE
2050 *
2051 * M .LT. MNTHR
2052 *
2053 * Path 10 (M at least N, but not much larger)
2054 * Reduce to bidiagonal form without QR decomposition
2055 *
2056  ie = 1
2057  itauq = 1
2058  itaup = itauq + n
2059  iwork = itaup + n
2060 *
2061 * Bidiagonalize A
2062 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2063 * (RWorkspace: need N)
2064 *
2065  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2066  $ work( itaup ), work( iwork ), lwork-iwork+1,
2067  $ ierr )
2068  IF( wntuas ) THEN
2069 *
2070 * If left singular vectors desired in U, copy result to U
2071 * and generate left bidiagonalizing vectors in U
2072 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2073 * (RWorkspace: 0)
2074 *
2075  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
2076  IF( wntus )
2077  $ ncu = n
2078  IF( wntua )
2079  $ ncu = m
2080  CALL zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2081  $ work( iwork ), lwork-iwork+1, ierr )
2082  END IF
2083  IF( wntvas ) THEN
2084 *
2085 * If right singular vectors desired in VT, copy result to
2086 * VT and generate right bidiagonalizing vectors in VT
2087 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2088 * (RWorkspace: 0)
2089 *
2090  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2091  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2092  $ work( iwork ), lwork-iwork+1, ierr )
2093  END IF
2094  IF( wntuo ) THEN
2095 *
2096 * If left singular vectors desired in A, generate left
2097 * bidiagonalizing vectors in A
2098 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2099 * (RWorkspace: 0)
2100 *
2101  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
2102  $ work( iwork ), lwork-iwork+1, ierr )
2103  END IF
2104  IF( wntvo ) THEN
2105 *
2106 * If right singular vectors desired in A, generate right
2107 * bidiagonalizing vectors in A
2108 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2109 * (RWorkspace: 0)
2110 *
2111  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
2112  $ work( iwork ), lwork-iwork+1, ierr )
2113  END IF
2114  irwork = ie + n
2115  IF( wntuas .OR. wntuo )
2116  $ nru = m
2117  IF( wntun )
2118  $ nru = 0
2119  IF( wntvas .OR. wntvo )
2120  $ ncvt = n
2121  IF( wntvn )
2122  $ ncvt = 0
2123  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2124 *
2125 * Perform bidiagonal QR iteration, if desired, computing
2126 * left singular vectors in U and computing right singular
2127 * vectors in VT
2128 * (CWorkspace: 0)
2129 * (RWorkspace: need BDSPAC)
2130 *
2131  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2132  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2133  $ info )
2134  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2135 *
2136 * Perform bidiagonal QR iteration, if desired, computing
2137 * left singular vectors in U and computing right singular
2138 * vectors in A
2139 * (CWorkspace: 0)
2140 * (RWorkspace: need BDSPAC)
2141 *
2142  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2143  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2144  $ info )
2145  ELSE
2146 *
2147 * Perform bidiagonal QR iteration, if desired, computing
2148 * left singular vectors in A and computing right singular
2149 * vectors in VT
2150 * (CWorkspace: 0)
2151 * (RWorkspace: need BDSPAC)
2152 *
2153  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2154  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2155  $ info )
2156  END IF
2157 *
2158  END IF
2159 *
2160  ELSE
2161 *
2162 * A has more columns than rows. If A has sufficiently more
2163 * columns than rows, first reduce using the LQ decomposition (if
2164 * sufficient workspace available)
2165 *
2166  IF( n.GE.mnthr ) THEN
2167 *
2168  IF( wntvn ) THEN
2169 *
2170 * Path 1t(N much larger than M, JOBVT='N')
2171 * No right singular vectors to be computed
2172 *
2173  itau = 1
2174  iwork = itau + m
2175 *
2176 * Compute A=L*Q
2177 * (CWorkspace: need 2*M, prefer M+M*NB)
2178 * (RWorkspace: 0)
2179 *
2180  CALL zgelqf( m, n, a, lda, work( itau ), work( iwork ),
2181  $ lwork-iwork+1, ierr )
2182 *
2183 * Zero out above L
2184 *
2185  CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2186  $ lda )
2187  ie = 1
2188  itauq = 1
2189  itaup = itauq + m
2190  iwork = itaup + m
2191 *
2192 * Bidiagonalize L in A
2193 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2194 * (RWorkspace: need M)
2195 *
2196  CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2197  $ work( itaup ), work( iwork ), lwork-iwork+1,
2198  $ ierr )
2199  IF( wntuo .OR. wntuas ) THEN
2200 *
2201 * If left singular vectors desired, generate Q
2202 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2203 * (RWorkspace: 0)
2204 *
2205  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2206  $ work( iwork ), lwork-iwork+1, ierr )
2207  END IF
2208  irwork = ie + m
2209  nru = 0
2210  IF( wntuo .OR. wntuas )
2211  $ nru = m
2212 *
2213 * Perform bidiagonal QR iteration, computing left singular
2214 * vectors of A in A if desired
2215 * (CWorkspace: 0)
2216 * (RWorkspace: need BDSPAC)
2217 *
2218  CALL zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2219  $ a, lda, cdum, 1, rwork( irwork ), info )
2220 *
2221 * If left singular vectors desired in U, copy them there
2222 *
2223  IF( wntuas )
2224  $ CALL zlacpy( 'F', m, m, a, lda, u, ldu )
2225 *
2226  ELSE IF( wntvo .AND. wntun ) THEN
2227 *
2228 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2229 * M right singular vectors to be overwritten on A and
2230 * no left singular vectors to be computed
2231 *
2232  IF( lwork.GE.m*m+3*m ) THEN
2233 *
2234 * Sufficient workspace for a fast algorithm
2235 *
2236  ir = 1
2237  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2238 *
2239 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2240 *
2241  ldwrku = lda
2242  chunk = n
2243  ldwrkr = lda
2244  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2245 *
2246 * WORK(IU) is LDA by N and WORK(IR) is M by M
2247 *
2248  ldwrku = lda
2249  chunk = n
2250  ldwrkr = m
2251  ELSE
2252 *
2253 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2254 *
2255  ldwrku = m
2256  chunk = ( lwork-m*m ) / m
2257  ldwrkr = m
2258  END IF
2259  itau = ir + ldwrkr*m
2260  iwork = itau + m
2261 *
2262 * Compute A=L*Q
2263 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2264 * (RWorkspace: 0)
2265 *
2266  CALL zgelqf( m, n, a, lda, work( itau ),
2267  $ work( iwork ), lwork-iwork+1, ierr )
2268 *
2269 * Copy L to WORK(IR) and zero out above it
2270 *
2271  CALL zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2272  CALL zlaset( 'U', m-1, m-1, czero, czero,
2273  $ work( ir+ldwrkr ), ldwrkr )
2274 *
2275 * Generate Q in A
2276 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2277 * (RWorkspace: 0)
2278 *
2279  CALL zunglq( m, n, m, a, lda, work( itau ),
2280  $ work( iwork ), lwork-iwork+1, ierr )
2281  ie = 1
2282  itauq = itau
2283  itaup = itauq + m
2284  iwork = itaup + m
2285 *
2286 * Bidiagonalize L in WORK(IR)
2287 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2288 * (RWorkspace: need M)
2289 *
2290  CALL zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2291  $ work( itauq ), work( itaup ),
2292  $ work( iwork ), lwork-iwork+1, ierr )
2293 *
2294 * Generate right vectors bidiagonalizing L
2295 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2296 * (RWorkspace: 0)
2297 *
2298  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2299  $ work( itaup ), work( iwork ),
2300  $ lwork-iwork+1, ierr )
2301  irwork = ie + m
2302 *
2303 * Perform bidiagonal QR iteration, computing right
2304 * singular vectors of L in WORK(IR)
2305 * (CWorkspace: need M*M)
2306 * (RWorkspace: need BDSPAC)
2307 *
2308  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2309  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2310  $ rwork( irwork ), info )
2311  iu = itauq
2312 *
2313 * Multiply right singular vectors of L in WORK(IR) by Q
2314 * in A, storing result in WORK(IU) and copying to A
2315 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2316 * (RWorkspace: 0)
2317 *
2318  DO 30 i = 1, n, chunk
2319  blk = min( n-i+1, chunk )
2320  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2321  $ ldwrkr, a( 1, i ), lda, czero,
2322  $ work( iu ), ldwrku )
2323  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2324  $ a( 1, i ), lda )
2325  30 CONTINUE
2326 *
2327  ELSE
2328 *
2329 * Insufficient workspace for a fast algorithm
2330 *
2331  ie = 1
2332  itauq = 1
2333  itaup = itauq + m
2334  iwork = itaup + m
2335 *
2336 * Bidiagonalize A
2337 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2338 * (RWorkspace: need M)
2339 *
2340  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
2341  $ work( itauq ), work( itaup ),
2342  $ work( iwork ), lwork-iwork+1, ierr )
2343 *
2344 * Generate right vectors bidiagonalizing A
2345 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2346 * (RWorkspace: 0)
2347 *
2348  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
2349  $ work( iwork ), lwork-iwork+1, ierr )
2350  irwork = ie + m
2351 *
2352 * Perform bidiagonal QR iteration, computing right
2353 * singular vectors of A in A
2354 * (CWorkspace: 0)
2355 * (RWorkspace: need BDSPAC)
2356 *
2357  CALL zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2358  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2359 *
2360  END IF
2361 *
2362  ELSE IF( wntvo .AND. wntuas ) THEN
2363 *
2364 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2365 * M right singular vectors to be overwritten on A and
2366 * M left singular vectors to be computed in U
2367 *
2368  IF( lwork.GE.m*m+3*m ) THEN
2369 *
2370 * Sufficient workspace for a fast algorithm
2371 *
2372  ir = 1
2373  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2374 *
2375 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2376 *
2377  ldwrku = lda
2378  chunk = n
2379  ldwrkr = lda
2380  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2381 *
2382 * WORK(IU) is LDA by N and WORK(IR) is M by M
2383 *
2384  ldwrku = lda
2385  chunk = n
2386  ldwrkr = m
2387  ELSE
2388 *
2389 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2390 *
2391  ldwrku = m
2392  chunk = ( lwork-m*m ) / m
2393  ldwrkr = m
2394  END IF
2395  itau = ir + ldwrkr*m
2396  iwork = itau + m
2397 *
2398 * Compute A=L*Q
2399 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2400 * (RWorkspace: 0)
2401 *
2402  CALL zgelqf( m, n, a, lda, work( itau ),
2403  $ work( iwork ), lwork-iwork+1, ierr )
2404 *
2405 * Copy L to U, zeroing about above it
2406 *
2407  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2408  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2409  $ ldu )
2410 *
2411 * Generate Q in A
2412 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2413 * (RWorkspace: 0)
2414 *
2415  CALL zunglq( m, n, m, a, lda, work( itau ),
2416  $ work( iwork ), lwork-iwork+1, ierr )
2417  ie = 1
2418  itauq = itau
2419  itaup = itauq + m
2420  iwork = itaup + m
2421 *
2422 * Bidiagonalize L in U, copying result to WORK(IR)
2423 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2424 * (RWorkspace: need M)
2425 *
2426  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2427  $ work( itauq ), work( itaup ),
2428  $ work( iwork ), lwork-iwork+1, ierr )
2429  CALL zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2430 *
2431 * Generate right vectors bidiagonalizing L in WORK(IR)
2432 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2433 * (RWorkspace: 0)
2434 *
2435  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2436  $ work( itaup ), work( iwork ),
2437  $ lwork-iwork+1, ierr )
2438 *
2439 * Generate left vectors bidiagonalizing L in U
2440 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2441 * (RWorkspace: 0)
2442 *
2443  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2444  $ work( iwork ), lwork-iwork+1, ierr )
2445  irwork = ie + m
2446 *
2447 * Perform bidiagonal QR iteration, computing left
2448 * singular vectors of L in U, and computing right
2449 * singular vectors of L in WORK(IR)
2450 * (CWorkspace: need M*M)
2451 * (RWorkspace: need BDSPAC)
2452 *
2453  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2454  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2455  $ rwork( irwork ), info )
2456  iu = itauq
2457 *
2458 * Multiply right singular vectors of L in WORK(IR) by Q
2459 * in A, storing result in WORK(IU) and copying to A
2460 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2461 * (RWorkspace: 0)
2462 *
2463  DO 40 i = 1, n, chunk
2464  blk = min( n-i+1, chunk )
2465  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2466  $ ldwrkr, a( 1, i ), lda, czero,
2467  $ work( iu ), ldwrku )
2468  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2469  $ a( 1, i ), lda )
2470  40 CONTINUE
2471 *
2472  ELSE
2473 *
2474 * Insufficient workspace for a fast algorithm
2475 *
2476  itau = 1
2477  iwork = itau + m
2478 *
2479 * Compute A=L*Q
2480 * (CWorkspace: need 2*M, prefer M+M*NB)
2481 * (RWorkspace: 0)
2482 *
2483  CALL zgelqf( m, n, a, lda, work( itau ),
2484  $ work( iwork ), lwork-iwork+1, ierr )
2485 *
2486 * Copy L to U, zeroing out above it
2487 *
2488  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2489  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2490  $ ldu )
2491 *
2492 * Generate Q in A
2493 * (CWorkspace: need 2*M, prefer M+M*NB)
2494 * (RWorkspace: 0)
2495 *
2496  CALL zunglq( m, n, m, a, lda, work( itau ),
2497  $ work( iwork ), lwork-iwork+1, ierr )
2498  ie = 1
2499  itauq = itau
2500  itaup = itauq + m
2501  iwork = itaup + m
2502 *
2503 * Bidiagonalize L in U
2504 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2505 * (RWorkspace: need M)
2506 *
2507  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2508  $ work( itauq ), work( itaup ),
2509  $ work( iwork ), lwork-iwork+1, ierr )
2510 *
2511 * Multiply right vectors bidiagonalizing L by Q in A
2512 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2513 * (RWorkspace: 0)
2514 *
2515  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2516  $ work( itaup ), a, lda, work( iwork ),
2517  $ lwork-iwork+1, ierr )
2518 *
2519 * Generate left vectors bidiagonalizing L in U
2520 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2521 * (RWorkspace: 0)
2522 *
2523  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2524  $ work( iwork ), lwork-iwork+1, ierr )
2525  irwork = ie + m
2526 *
2527 * Perform bidiagonal QR iteration, computing left
2528 * singular vectors of A in U and computing right
2529 * singular vectors of A in A
2530 * (CWorkspace: 0)
2531 * (RWorkspace: need BDSPAC)
2532 *
2533  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2534  $ u, ldu, cdum, 1, rwork( irwork ), info )
2535 *
2536  END IF
2537 *
2538  ELSE IF( wntvs ) THEN
2539 *
2540  IF( wntun ) THEN
2541 *
2542 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2543 * M right singular vectors to be computed in VT and
2544 * no left singular vectors to be computed
2545 *
2546  IF( lwork.GE.m*m+3*m ) THEN
2547 *
2548 * Sufficient workspace for a fast algorithm
2549 *
2550  ir = 1
2551  IF( lwork.GE.wrkbl+lda*m ) THEN
2552 *
2553 * WORK(IR) is LDA by M
2554 *
2555  ldwrkr = lda
2556  ELSE
2557 *
2558 * WORK(IR) is M by M
2559 *
2560  ldwrkr = m
2561  END IF
2562  itau = ir + ldwrkr*m
2563  iwork = itau + m
2564 *
2565 * Compute A=L*Q
2566 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2567 * (RWorkspace: 0)
2568 *
2569  CALL zgelqf( m, n, a, lda, work( itau ),
2570  $ work( iwork ), lwork-iwork+1, ierr )
2571 *
2572 * Copy L to WORK(IR), zeroing out above it
2573 *
2574  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
2575  $ ldwrkr )
2576  CALL zlaset( 'U', m-1, m-1, czero, czero,
2577  $ work( ir+ldwrkr ), ldwrkr )
2578 *
2579 * Generate Q in A
2580 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2581 * (RWorkspace: 0)
2582 *
2583  CALL zunglq( m, n, m, a, lda, work( itau ),
2584  $ work( iwork ), lwork-iwork+1, ierr )
2585  ie = 1
2586  itauq = itau
2587  itaup = itauq + m
2588  iwork = itaup + m
2589 *
2590 * Bidiagonalize L in WORK(IR)
2591 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2592 * (RWorkspace: need M)
2593 *
2594  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
2595  $ rwork( ie ), work( itauq ),
2596  $ work( itaup ), work( iwork ),
2597  $ lwork-iwork+1, ierr )
2598 *
2599 * Generate right vectors bidiagonalizing L in
2600 * WORK(IR)
2601 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2602 * (RWorkspace: 0)
2603 *
2604  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2605  $ work( itaup ), work( iwork ),
2606  $ lwork-iwork+1, ierr )
2607  irwork = ie + m
2608 *
2609 * Perform bidiagonal QR iteration, computing right
2610 * singular vectors of L in WORK(IR)
2611 * (CWorkspace: need M*M)
2612 * (RWorkspace: need BDSPAC)
2613 *
2614  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2615  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2616  $ rwork( irwork ), info )
2617 *
2618 * Multiply right singular vectors of L in WORK(IR) by
2619 * Q in A, storing result in VT
2620 * (CWorkspace: need M*M)
2621 * (RWorkspace: 0)
2622 *
2623  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
2624  $ ldwrkr, a, lda, czero, vt, ldvt )
2625 *
2626  ELSE
2627 *
2628 * Insufficient workspace for a fast algorithm
2629 *
2630  itau = 1
2631  iwork = itau + m
2632 *
2633 * Compute A=L*Q
2634 * (CWorkspace: need 2*M, prefer M+M*NB)
2635 * (RWorkspace: 0)
2636 *
2637  CALL zgelqf( m, n, a, lda, work( itau ),
2638  $ work( iwork ), lwork-iwork+1, ierr )
2639 *
2640 * Copy result to VT
2641 *
2642  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2643 *
2644 * Generate Q in VT
2645 * (CWorkspace: need 2*M, prefer M+M*NB)
2646 * (RWorkspace: 0)
2647 *
2648  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2649  $ work( iwork ), lwork-iwork+1, ierr )
2650  ie = 1
2651  itauq = itau
2652  itaup = itauq + m
2653  iwork = itaup + m
2654 *
2655 * Zero out above L in A
2656 *
2657  CALL zlaset( 'U', m-1, m-1, czero, czero,
2658  $ a( 1, 2 ), lda )
2659 *
2660 * Bidiagonalize L in A
2661 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2662 * (RWorkspace: need M)
2663 *
2664  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2665  $ work( itauq ), work( itaup ),
2666  $ work( iwork ), lwork-iwork+1, ierr )
2667 *
2668 * Multiply right vectors bidiagonalizing L by Q in VT
2669 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2670 * (RWorkspace: 0)
2671 *
2672  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2673  $ work( itaup ), vt, ldvt,
2674  $ work( iwork ), lwork-iwork+1, ierr )
2675  irwork = ie + m
2676 *
2677 * Perform bidiagonal QR iteration, computing right
2678 * singular vectors of A in VT
2679 * (CWorkspace: 0)
2680 * (RWorkspace: need BDSPAC)
2681 *
2682  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2683  $ ldvt, cdum, 1, cdum, 1,
2684  $ rwork( irwork ), info )
2685 *
2686  END IF
2687 *
2688  ELSE IF( wntuo ) THEN
2689 *
2690 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2691 * M right singular vectors to be computed in VT and
2692 * M left singular vectors to be overwritten on A
2693 *
2694  IF( lwork.GE.2*m*m+3*m ) THEN
2695 *
2696 * Sufficient workspace for a fast algorithm
2697 *
2698  iu = 1
2699  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2700 *
2701 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2702 *
2703  ldwrku = lda
2704  ir = iu + ldwrku*m
2705  ldwrkr = lda
2706  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2707 *
2708 * WORK(IU) is LDA by M and WORK(IR) is M by M
2709 *
2710  ldwrku = lda
2711  ir = iu + ldwrku*m
2712  ldwrkr = m
2713  ELSE
2714 *
2715 * WORK(IU) is M by M and WORK(IR) is M by M
2716 *
2717  ldwrku = m
2718  ir = iu + ldwrku*m
2719  ldwrkr = m
2720  END IF
2721  itau = ir + ldwrkr*m
2722  iwork = itau + m
2723 *
2724 * Compute A=L*Q
2725 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2726 * (RWorkspace: 0)
2727 *
2728  CALL zgelqf( m, n, a, lda, work( itau ),
2729  $ work( iwork ), lwork-iwork+1, ierr )
2730 *
2731 * Copy L to WORK(IU), zeroing out below it
2732 *
2733  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2734  $ ldwrku )
2735  CALL zlaset( 'U', m-1, m-1, czero, czero,
2736  $ work( iu+ldwrku ), ldwrku )
2737 *
2738 * Generate Q in A
2739 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2740 * (RWorkspace: 0)
2741 *
2742  CALL zunglq( m, n, m, a, lda, work( itau ),
2743  $ work( iwork ), lwork-iwork+1, ierr )
2744  ie = 1
2745  itauq = itau
2746  itaup = itauq + m
2747  iwork = itaup + m
2748 *
2749 * Bidiagonalize L in WORK(IU), copying result to
2750 * WORK(IR)
2751 * (CWorkspace: need 2*M*M+3*M,
2752 * prefer 2*M*M+2*M+2*M*NB)
2753 * (RWorkspace: need M)
2754 *
2755  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2756  $ rwork( ie ), work( itauq ),
2757  $ work( itaup ), work( iwork ),
2758  $ lwork-iwork+1, ierr )
2759  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
2760  $ work( ir ), ldwrkr )
2761 *
2762 * Generate right bidiagonalizing vectors in WORK(IU)
2763 * (CWorkspace: need 2*M*M+3*M-1,
2764 * prefer 2*M*M+2*M+(M-1)*NB)
2765 * (RWorkspace: 0)
2766 *
2767  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2768  $ work( itaup ), work( iwork ),
2769  $ lwork-iwork+1, ierr )
2770 *
2771 * Generate left bidiagonalizing vectors in WORK(IR)
2772 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2773 * (RWorkspace: 0)
2774 *
2775  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2776  $ work( itauq ), work( iwork ),
2777  $ lwork-iwork+1, ierr )
2778  irwork = ie + m
2779 *
2780 * Perform bidiagonal QR iteration, computing left
2781 * singular vectors of L in WORK(IR) and computing
2782 * right singular vectors of L in WORK(IU)
2783 * (CWorkspace: need 2*M*M)
2784 * (RWorkspace: need BDSPAC)
2785 *
2786  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2787  $ work( iu ), ldwrku, work( ir ),
2788  $ ldwrkr, cdum, 1, rwork( irwork ),
2789  $ info )
2790 *
2791 * Multiply right singular vectors of L in WORK(IU) by
2792 * Q in A, storing result in VT
2793 * (CWorkspace: need M*M)
2794 * (RWorkspace: 0)
2795 *
2796  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2797  $ ldwrku, a, lda, czero, vt, ldvt )
2798 *
2799 * Copy left singular vectors of L to A
2800 * (CWorkspace: need M*M)
2801 * (RWorkspace: 0)
2802 *
2803  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2804  $ lda )
2805 *
2806  ELSE
2807 *
2808 * Insufficient workspace for a fast algorithm
2809 *
2810  itau = 1
2811  iwork = itau + m
2812 *
2813 * Compute A=L*Q, copying result to VT
2814 * (CWorkspace: need 2*M, prefer M+M*NB)
2815 * (RWorkspace: 0)
2816 *
2817  CALL zgelqf( m, n, a, lda, work( itau ),
2818  $ work( iwork ), lwork-iwork+1, ierr )
2819  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2820 *
2821 * Generate Q in VT
2822 * (CWorkspace: need 2*M, prefer M+M*NB)
2823 * (RWorkspace: 0)
2824 *
2825  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2826  $ work( iwork ), lwork-iwork+1, ierr )
2827  ie = 1
2828  itauq = itau
2829  itaup = itauq + m
2830  iwork = itaup + m
2831 *
2832 * Zero out above L in A
2833 *
2834  CALL zlaset( 'U', m-1, m-1, czero, czero,
2835  $ a( 1, 2 ), lda )
2836 *
2837 * Bidiagonalize L in A
2838 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2839 * (RWorkspace: need M)
2840 *
2841  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2842  $ work( itauq ), work( itaup ),
2843  $ work( iwork ), lwork-iwork+1, ierr )
2844 *
2845 * Multiply right vectors bidiagonalizing L by Q in VT
2846 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2847 * (RWorkspace: 0)
2848 *
2849  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2850  $ work( itaup ), vt, ldvt,
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Generate left bidiagonalizing vectors of L in A
2854 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2855 * (RWorkspace: 0)
2856 *
2857  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2858  $ work( iwork ), lwork-iwork+1, ierr )
2859  irwork = ie + m
2860 *
2861 * Perform bidiagonal QR iteration, computing left
2862 * singular vectors of A in A and computing right
2863 * singular vectors of A in VT
2864 * (CWorkspace: 0)
2865 * (RWorkspace: need BDSPAC)
2866 *
2867  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2868  $ ldvt, a, lda, cdum, 1,
2869  $ rwork( irwork ), info )
2870 *
2871  END IF
2872 *
2873  ELSE IF( wntuas ) THEN
2874 *
2875 * Path 6t(N much larger than M, JOBU='S' or 'A',
2876 * JOBVT='S')
2877 * M right singular vectors to be computed in VT and
2878 * M left singular vectors to be computed in U
2879 *
2880  IF( lwork.GE.m*m+3*m ) THEN
2881 *
2882 * Sufficient workspace for a fast algorithm
2883 *
2884  iu = 1
2885  IF( lwork.GE.wrkbl+lda*m ) THEN
2886 *
2887 * WORK(IU) is LDA by N
2888 *
2889  ldwrku = lda
2890  ELSE
2891 *
2892 * WORK(IU) is LDA by M
2893 *
2894  ldwrku = m
2895  END IF
2896  itau = iu + ldwrku*m
2897  iwork = itau + m
2898 *
2899 * Compute A=L*Q
2900 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)