db/db4/dgesdd_8f_source.html

 *> \brief \b DGESDD

 *

 *  =========== DOCUMENTATION ===========

 *

 * Online html documentation available at

 *            http://www.netlib.org/lapack/explore-html/

 *

 *> \htmlonly

 *> Download DGESDD + dependencies

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesdd.f">

 *> [TGZ]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesdd.f">

 *> [ZIP]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesdd.f">

 *> [TXT]</a>

 *> \endhtmlonly

 *

 *  Definition:

 *  ===========

 *

 *       SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,

 *                          WORK, LWORK, IWORK, INFO )

 *

 *       .. Scalar Arguments ..

 *       CHARACTER          JOBZ

 *       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N

 *       ..

 *       .. Array Arguments ..

 *       INTEGER            IWORK( * )

 *       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),

 *      $                   VT( LDVT, * ), WORK( * )

 *       ..

 *

 *

 *> \par Purpose:

 *  =============

 *>

 *> \verbatim

 *>

 *> DGESDD computes the singular value decomposition (SVD) of a real

 *> M-by-N matrix A, optionally computing the left and right singular

 *> vectors.  If singular vectors are desired, it uses a

 *> divide-and-conquer algorithm.

 *>

 *> The SVD is written

 *>

 *>      A = U * SIGMA * transpose(V)

 *>

 *> where SIGMA is an M-by-N matrix which is zero except for its

 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and

 *> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA

 *> are the singular values of A; they are real and non-negative, and

 *> are returned in descending order.  The first min(m,n) columns of

 *> U and V are the left and right singular vectors of A.

 *>

 *> Note that the routine returns VT = V**T, not V.

 *>

 *> The divide and conquer algorithm makes very mild assumptions about

 *> floating point arithmetic. It will work on machines with a guard

 *> digit in add/subtract, or on those binary machines without guard

 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or

 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines

 *> without guard digits, but we know of none.

 *> \endverbatim

 *

 *  Arguments:

 *  ==========

 *

 *> \param[in] JOBZ

 *> \verbatim

 *>          JOBZ is CHARACTER*1

 *>          Specifies options for computing all or part of the matrix U:

 *>          = 'A':  all M columns of U and all N rows of V**T are

 *>                  returned in the arrays U and VT;

 *>          = 'S':  the first min(M,N) columns of U and the first

 *>                  min(M,N) rows of V**T are returned in the arrays U

 *>                  and VT;

 *>          = 'O':  If M >= N, the first N columns of U are overwritten

 *>                  on the array A and all rows of V**T are returned in

 *>                  the array VT;

 *>                  otherwise, all columns of U are returned in the

 *>                  array U and the first M rows of V**T are overwritten

 *>                  in the array A;

 *>          = 'N':  no columns of U or rows of V**T are computed.

 *> \endverbatim

 *>

 *> \param[in] M

 *> \verbatim

 *>          M is INTEGER

 *>          The number of rows of the input matrix A.  M >= 0.

 *> \endverbatim

 *>

 *> \param[in] N

 *> \verbatim

 *>          N is INTEGER

 *>          The number of columns of the input matrix A.  N >= 0.

 *> \endverbatim

 *>

 *> \param[in,out] A

 *> \verbatim

 *>          A is DOUBLE PRECISION array, dimension (LDA,N)

 *>          On entry, the M-by-N matrix A.

 *>          On exit,

 *>          if JOBZ = 'O',  A is overwritten with the first N columns

 *>                          of U (the left singular vectors, stored

 *>                          columnwise) if M >= N;

 *>                          A is overwritten with the first M rows

 *>                          of V**T (the right singular vectors, stored

 *>                          rowwise) otherwise.

 *>          if JOBZ .ne. 'O', the contents of A are destroyed.

 *> \endverbatim

 *>

 *> \param[in] LDA

 *> \verbatim

 *>          LDA is INTEGER

 *>          The leading dimension of the array A.  LDA >= max(1,M).

 *> \endverbatim

 *>

 *> \param[out] S

 *> \verbatim

 *>          S is DOUBLE PRECISION array, dimension (min(M,N))

 *>          The singular values of A, sorted so that S(i) >= S(i+1).

 *> \endverbatim

 *>

 *> \param[out] U

 *> \verbatim

 *>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)

 *>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;

 *>          UCOL = min(M,N) if JOBZ = 'S'.

 *>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M

 *>          orthogonal matrix U;

 *>          if JOBZ = 'S', U contains the first min(M,N) columns of U

 *>          (the left singular vectors, stored columnwise);

 *>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.

 *> \endverbatim

 *>

 *> \param[in] LDU

 *> \verbatim

 *>          LDU is INTEGER

 *>          The leading dimension of the array U.  LDU >= 1; if

 *>          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.

 *> \endverbatim

 *>

 *> \param[out] VT

 *> \verbatim

 *>          VT is DOUBLE PRECISION array, dimension (LDVT,N)

 *>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the

 *>          N-by-N orthogonal matrix V**T;

 *>          if JOBZ = 'S', VT contains the first min(M,N) rows of

 *>          V**T (the right singular vectors, stored rowwise);

 *>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.

 *> \endverbatim

 *>

 *> \param[in] LDVT

 *> \verbatim

 *>          LDVT is INTEGER

 *>          The leading dimension of the array VT.  LDVT >= 1;

 *>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;

 *>          if JOBZ = 'S', LDVT >= min(M,N).

 *> \endverbatim

 *>

 *> \param[out] WORK

 *> \verbatim

 *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))

 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;

 *> \endverbatim

 *>

 *> \param[in] LWORK

 *> \verbatim

 *>          LWORK is INTEGER

 *>          The dimension of the array WORK. LWORK >= 1.

 *>          If LWORK = -1, a workspace query is assumed.  The optimal

 *>          size for the WORK array is calculated and stored in WORK(1),

 *>          and no other work except argument checking is performed.

 *>

 *>          Let mx = max(M,N) and mn = min(M,N).

 *>          If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).

 *>          If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).

 *>          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.

 *>          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.

 *>          These are not tight minimums in all cases; see comments inside code.

 *>          For good performance, LWORK should generally be larger;

 *>          a query is recommended.

 *> \endverbatim

 *>

 *> \param[out] IWORK

 *> \verbatim

 *>          IWORK is INTEGER array, dimension (8*min(M,N))

 *> \endverbatim

 *>

 *> \param[out] INFO

 *> \verbatim

 *>          INFO is INTEGER

 *>          = 0:  successful exit.

 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.

 *>          > 0:  DBDSDC did not converge, updating process failed.

 *> \endverbatim

 *

 *  Authors:

 *  ========

 *

 *> \author Univ. of Tennessee

 *> \author Univ. of California Berkeley

 *> \author Univ. of Colorado Denver

 *> \author NAG Ltd.

 *

 *> \date June 2016

 *

 *> \ingroup doubleGEsing

 *

 *> \par Contributors:

 *  ==================

 *>

 *>     Ming Gu and Huan Ren, Computer Science Division, University of

 *>     California at Berkeley, USA

 *>

 *> @precisions fortran d -> s

 *  =====================================================================

       SUBROUTINE dgesdd( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,

      $                   work, lwork, iwork, info )

       implicit none

 *

 *  -- LAPACK driver routine (version 3.6.1) --

 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 *     June 2016

 *

 *     .. Scalar Arguments ..

       CHARACTER          JOBZ

       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N

 *     ..

 *     .. Array Arguments ..

       INTEGER            IWORK( * )

       DOUBLE PRECISION   A( lda, * ), S( * ), U( ldu, * ),

      $                   vt( ldvt, * ), work( * )

 *     ..

 *

 *  =====================================================================

 *

 *     .. Parameters ..

       DOUBLE PRECISION   ZERO, ONE

       parameter                ( zero = 0.0d0, one = 1.0d0 )

 *     ..

 *     .. Local Scalars ..

       LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS

       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,

      $                   ir, iscl, itau, itaup, itauq, iu, ivt, ldwkvt,

      $                   ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,

      $                   mnthr, nwork, wrkbl

       INTEGER            LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,

      $                   lwork_dgebrd_nn, lwork_dgelqf_mn,

      $                   lwork_dgeqrf_mn,

      $                   lwork_dorgbr_p_mm, lwork_dorgbr_q_nn,

      $                   lwork_dorglq_mn, lwork_dorglq_nn,

      $                   lwork_dorgqr_mm, lwork_dorgqr_mn,

      $                   lwork_dormbr_prt_mm, lwork_dormbr_qln_mm,

      $                   lwork_dormbr_prt_mn, lwork_dormbr_qln_mn,

      $                   lwork_dormbr_prt_nn, lwork_dormbr_qln_nn

       DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM

 *     ..

 *     .. Local Arrays ..

       INTEGER            IDUM( 1 )

       DOUBLE PRECISION   DUM( 1 )

 *     ..

 *     .. External Subroutines ..

       EXTERNAL           dbdsdc, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,

      $                   dlascl, dlaset, dorgbr, dorglq, dorgqr, dormbr,

      $                   xerbla

 *     ..

 *     .. External Functions ..

       LOGICAL            LSAME

       DOUBLE PRECISION   DLAMCH, DLANGE

       EXTERNAL           dlamch, dlange, lsame

 *     ..

 *     .. Intrinsic Functions ..

       INTRINSIC          int, max, min, sqrt

 *     ..

 *     .. Executable Statements ..

 *

 *     Test the input arguments

 *

       info   = 0

       minmn  = min( m, n )

       wntqa  = lsame( jobz, 'A' )

       wntqs  = lsame( jobz, 'S' )

       wntqas = wntqa .OR. wntqs

       wntqo  = lsame( jobz, 'O' )

       wntqn  = lsame( jobz, 'N' )

       lquery = ( lwork.EQ.-1 )

 *

       IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN

          info = -1

       ELSE IF( m.LT.0 ) THEN

          info = -2

       ELSE IF( n.LT.0 ) THEN

          info = -3

       ELSE IF( lda.LT.max( 1, m ) ) THEN

          info = -5

       ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.

      $         ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN

          info = -8

       ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.

      $         ( wntqs .AND. ldvt.LT.minmn ) .OR.

      $         ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN

          info = -10

       END IF

 *

 *     Compute workspace

 *       Note: Comments in the code beginning "Workspace:" describe the

 *       minimal amount of workspace allocated at that point in the code,

 *       as well as the preferred amount for good performance.

 *       NB refers to the optimal block size for the immediately

 *       following subroutine, as returned by ILAENV.

 *

       IF( info.EQ.0 ) THEN

          minwrk = 1

          maxwrk = 1

          bdspac = 0

          mnthr  = int( minmn*11.0d0 / 6.0d0 )

          IF( m.GE.n .AND. minmn.GT.0 ) THEN

 *

 *           Compute space needed for DBDSDC

 *

             IF( wntqn ) THEN

 *              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)

 *              keep 7*N for backwards compatability.

                bdspac = 7*n

             ELSE

                bdspac = 3*n*n + 4*n

             END IF

 *

 *           Compute space preferred for each routine

             CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),

      $                   dum(1), dum(1), -1, ierr )

             lwork_dgebrd_mn = int( dum(1) )

 *

             CALL dgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),

      $                   dum(1), dum(1), -1, ierr )

             lwork_dgebrd_nn = int( dum(1) )

 *

             CALL dgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )

             lwork_dgeqrf_mn = int( dum(1) )

 *

             CALL dorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,

      $                   ierr )

             lwork_dorgbr_q_nn = int( dum(1) )

 *

             CALL dorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )

             lwork_dorgqr_mm = int( dum(1) )

 *

             CALL dorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )

             lwork_dorgqr_mn = int( dum(1) )

 *

             CALL dormbr( 'P', 'R', 'T', n, n, n, dum(1), n,

      $                   dum(1), dum(1), n, dum(1), -1, ierr )

             lwork_dormbr_prt_nn = int( dum(1) )

 *

             CALL dormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,

      $                   dum(1), dum(1), n, dum(1), -1, ierr )

             lwork_dormbr_qln_nn = int( dum(1) )

 *

             CALL dormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,

      $                   dum(1), dum(1), m, dum(1), -1, ierr )

             lwork_dormbr_qln_mn = int( dum(1) )

 *

             CALL dormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,

      $                   dum(1), dum(1), m, dum(1), -1, ierr )

             lwork_dormbr_qln_mm = int( dum(1) )

 *

             IF( m.GE.mnthr ) THEN

                IF( wntqn ) THEN

 *

 *                 Path 1 (M >> N, JOBZ='N')

 *

                   wrkbl = n + lwork_dgeqrf_mn

                   wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                   maxwrk = max( wrkbl, bdspac + n )

                   minwrk = bdspac + n

                ELSE IF( wntqo ) THEN

 *

 *                 Path 2 (M >> N, JOBZ='O')

 *

                   wrkbl = n + lwork_dgeqrf_mn

                   wrkbl = max( wrkbl,   n + lwork_dorgqr_mn )

                   wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                   wrkbl = max( wrkbl, 3*n + bdspac )

                   maxwrk = wrkbl + 2*n*n

                   minwrk = bdspac + 2*n*n + 3*n

                ELSE IF( wntqs ) THEN

 *

 *                 Path 3 (M >> N, JOBZ='S')

 *

                   wrkbl = n + lwork_dgeqrf_mn

                   wrkbl = max( wrkbl,   n + lwork_dorgqr_mn )

                   wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                   wrkbl = max( wrkbl, 3*n + bdspac )

                   maxwrk = wrkbl + n*n

                   minwrk = bdspac + n*n + 3*n

                ELSE IF( wntqa ) THEN

 *

 *                 Path 4 (M >> N, JOBZ='A')

 *

                   wrkbl = n + lwork_dgeqrf_mn

                   wrkbl = max( wrkbl,   n + lwork_dorgqr_mm )

                   wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                   wrkbl = max( wrkbl, 3*n + bdspac )

                   maxwrk = wrkbl + n*n

                   minwrk = n*n + max( 3*n + bdspac, n + m )

                END IF

             ELSE

 *

 *              Path 5 (M >= N, but not much larger)

 *

                wrkbl = 3*n + lwork_dgebrd_mn

                IF( wntqn ) THEN

 *                 Path 5n (M >= N, jobz='N')

                   maxwrk = max( wrkbl, 3*n + bdspac )

                   minwrk = 3*n + max( m, bdspac )

                ELSE IF( wntqo ) THEN

 *                 Path 5o (M >= N, jobz='O')

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )

                   wrkbl = max( wrkbl, 3*n + bdspac )

                   maxwrk = wrkbl + m*n

                   minwrk = 3*n + max( m, n*n + bdspac )

                ELSE IF( wntqs ) THEN

 *                 Path 5s (M >= N, jobz='S')

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                   maxwrk = max( wrkbl, 3*n + bdspac )

                   minwrk = 3*n + max( m, bdspac )

                ELSE IF( wntqa ) THEN

 *                 Path 5a (M >= N, jobz='A')

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                   maxwrk = max( wrkbl, 3*n + bdspac )

                   minwrk = 3*n + max( m, bdspac )

                END IF

             END IF

          ELSE IF( minmn.GT.0 ) THEN

 *

 *           Compute space needed for DBDSDC

 *

             IF( wntqn ) THEN

 *              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)

 *              keep 7*N for backwards compatability.

                bdspac = 7*m

             ELSE

                bdspac = 3*m*m + 4*m

             END IF

 *

 *           Compute space preferred for each routine

             CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),

      $                   dum(1), dum(1), -1, ierr )

             lwork_dgebrd_mn = int( dum(1) )

 *

             CALL dgebrd( m, m, a, m, s, dum(1), dum(1),

      $                   dum(1), dum(1), -1, ierr )

             lwork_dgebrd_mm = int( dum(1) )

 *

             CALL dgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )

             lwork_dgelqf_mn = int( dum(1) )

 *

             CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )

             lwork_dorglq_nn = int( dum(1) )

 *

             CALL dorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )

             lwork_dorglq_mn = int( dum(1) )

 *

             CALL dorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )

             lwork_dorgbr_p_mm = int( dum(1) )

 *

             CALL dormbr( 'P', 'R', 'T', m, m, m, dum(1), m,

      $                   dum(1), dum(1), m, dum(1), -1, ierr )

             lwork_dormbr_prt_mm = int( dum(1) )

 *

             CALL dormbr( 'P', 'R', 'T', m, n, m, dum(1), m,

      $                   dum(1), dum(1), m, dum(1), -1, ierr )

             lwork_dormbr_prt_mn = int( dum(1) )

 *

             CALL dormbr( 'P', 'R', 'T', n, n, m, dum(1), n,

      $                   dum(1), dum(1), n, dum(1), -1, ierr )

             lwork_dormbr_prt_nn = int( dum(1) )

 *

             CALL dormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,

      $                   dum(1), dum(1), m, dum(1), -1, ierr )

             lwork_dormbr_qln_mm = int( dum(1) )

 *

             IF( n.GE.mnthr ) THEN

                IF( wntqn ) THEN

 *

 *                 Path 1t (N >> M, JOBZ='N')

 *

                   wrkbl = m + lwork_dgelqf_mn

                   wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                   maxwrk = max( wrkbl, bdspac + m )

                   minwrk = bdspac + m

                ELSE IF( wntqo ) THEN

 *

 *                 Path 2t (N >> M, JOBZ='O')

 *

                   wrkbl = m + lwork_dgelqf_mn

                   wrkbl = max( wrkbl,   m + lwork_dorglq_mn )

                   wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )

                   wrkbl = max( wrkbl, 3*m + bdspac )

                   maxwrk = wrkbl + 2*m*m

                   minwrk = bdspac + 2*m*m + 3*m

                ELSE IF( wntqs ) THEN

 *

 *                 Path 3t (N >> M, JOBZ='S')

 *

                   wrkbl = m + lwork_dgelqf_mn

                   wrkbl = max( wrkbl,   m + lwork_dorglq_mn )

                   wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )

                   wrkbl = max( wrkbl, 3*m + bdspac )

                   maxwrk = wrkbl + m*m

                   minwrk = bdspac + m*m + 3*m

                ELSE IF( wntqa ) THEN

 *

 *                 Path 4t (N >> M, JOBZ='A')

 *

                   wrkbl = m + lwork_dgelqf_mn

                   wrkbl = max( wrkbl,   m + lwork_dorglq_nn )

                   wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )

                   wrkbl = max( wrkbl, 3*m + bdspac )

                   maxwrk = wrkbl + m*m

                   minwrk = m*m + max( 3*m + bdspac, m + n )

                END IF

             ELSE

 *

 *              Path 5t (N > M, but not much larger)

 *

                wrkbl = 3*m + lwork_dgebrd_mn

                IF( wntqn ) THEN

 *                 Path 5tn (N > M, jobz='N')

                   maxwrk = max( wrkbl, 3*m + bdspac )

                   minwrk = 3*m + max( n, bdspac )

                ELSE IF( wntqo ) THEN

 *                 Path 5to (N > M, jobz='O')

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )

                   wrkbl = max( wrkbl, 3*m + bdspac )

                   maxwrk = wrkbl + m*n

                   minwrk = 3*m + max( n, m*m + bdspac )

                ELSE IF( wntqs ) THEN

 *                 Path 5ts (N > M, jobz='S')

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )

                   maxwrk = max( wrkbl, 3*m + bdspac )

                   minwrk = 3*m + max( n, bdspac )

                ELSE IF( wntqa ) THEN

 *                 Path 5ta (N > M, jobz='A')

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                   wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_nn )

                   maxwrk = max( wrkbl, 3*m + bdspac )

                   minwrk = 3*m + max( n, bdspac )

                END IF

             END IF

          END IF


          maxwrk = max( maxwrk, minwrk )

          work( 1 ) = maxwrk

 *

          IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN

             info = -12

          END IF

       END IF

 *

       IF( info.NE.0 ) THEN

          CALL xerbla( 'DGESDD', -info )

          RETURN

       ELSE IF( lquery ) THEN

          RETURN

       END IF

 *

 *     Quick return if possible

 *

       IF( m.EQ.0 .OR. n.EQ.0 ) THEN

          RETURN

       END IF

 *

 *     Get machine constants

 *

       eps = dlamch( 'P' )

       smlnum = sqrt( dlamch( 'S' ) ) / eps

       bignum = one / smlnum

 *

 *     Scale A if max element outside range [SMLNUM,BIGNUM]

 *

       anrm = dlange( 'M', m, n, a, lda, dum )

       iscl = 0

       IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN

          iscl = 1

          CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )

       ELSE IF( anrm.GT.bignum ) THEN

          iscl = 1

          CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )

       END IF

 *

       IF( m.GE.n ) THEN

 *

 *        A has at least as many rows as columns. If A has sufficiently

 *        more rows than columns, first reduce using the QR

 *        decomposition (if sufficient workspace available)

 *

          IF( m.GE.mnthr ) THEN

 *

             IF( wntqn ) THEN

 *

 *              Path 1 (M >> N, JOBZ='N')

 *              No singular vectors to be computed

 *

                itau = 1

                nwork = itau + n

 *

 *              Compute A=Q*R

 *              Workspace: need   N [tau] + N    [work]

 *              Workspace: prefer N [tau] + N*NB [work]

 *

                CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Zero out below R

 *

                CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )

                ie = 1

                itauq = ie + n

                itaup = itauq + n

                nwork = itaup + n

 *

 *              Bidiagonalize R in A

 *              Workspace: need   3*N [e, tauq, taup] + N      [work]

 *              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]

 *

                CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),

      $                      work( itaup ), work( nwork ), lwork-nwork+1,

      $                      ierr )

                nwork = ie + n

 *

 *              Perform bidiagonal SVD, computing singular values only

 *              Workspace: need   N [e] + BDSPAC

 *

                CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,

      $                      dum, idum, work( nwork ), iwork, info )

 *

             ELSE IF( wntqo ) THEN

 *

 *              Path 2 (M >> N, JOBZ = 'O')

 *              N left singular vectors to be overwritten on A and

 *              N right singular vectors to be computed in VT

 *

                ir = 1

 *

 *              WORK(IR) is LDWRKR by N

 *

                IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN

                   ldwrkr = lda

                ELSE

                   ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n

                END IF

                itau = ir + ldwrkr*n

                nwork = itau + n

 *

 *              Compute A=Q*R

 *              Workspace: need   N*N [R] + N [tau] + N    [work]

 *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

 *

                CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Copy R to WORK(IR), zeroing out below it

 *

                CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )

                CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),

      $                      ldwrkr )

 *

 *              Generate Q in A

 *              Workspace: need   N*N [R] + N [tau] + N    [work]

 *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

 *

                CALL dorgqr( m, n, n, a, lda, work( itau ),

      $                      work( nwork ), lwork - nwork + 1, ierr )

                ie = itau

                itauq = ie + n

                itaup = itauq + n

                nwork = itaup + n

 *

 *              Bidiagonalize R in WORK(IR)

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]

 *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]

 *

                CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),

      $                      work( itauq ), work( itaup ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              WORK(IU) is N by N

 *

                iu = nwork

                nwork = iu + n*n

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in WORK(IU) and computing right

 *              singular vectors of bidiagonal matrix in VT

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,

      $                      vt, ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Overwrite WORK(IU) by left singular vectors of R

 *              and VT by right singular vectors of R

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work]

 *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,

      $                      work( itauq ), work( iu ), n, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Multiply Q in A by left singular vectors of R in

 *              WORK(IU), storing result in WORK(IR) and copying to A

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U]

 *              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]

 *

                DO 10 i = 1, m, ldwrkr

                   chunk = min( m - i + 1, ldwrkr )

                   CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),

      $                        lda, work( iu ), n, zero, work( ir ),

      $                        ldwrkr )

                   CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,

      $                         a( i, 1 ), lda )

    10          CONTINUE

 *

             ELSE IF( wntqs ) THEN

 *

 *              Path 3 (M >> N, JOBZ='S')

 *              N left singular vectors to be computed in U and

 *              N right singular vectors to be computed in VT

 *

                ir = 1

 *

 *              WORK(IR) is N by N

 *

                ldwrkr = n

                itau = ir + ldwrkr*n

                nwork = itau + n

 *

 *              Compute A=Q*R

 *              Workspace: need   N*N [R] + N [tau] + N    [work]

 *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

 *

                CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Copy R to WORK(IR), zeroing out below it

 *

                CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )

                CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),

      $                      ldwrkr )

 *

 *              Generate Q in A

 *              Workspace: need   N*N [R] + N [tau] + N    [work]

 *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

 *

                CALL dorgqr( m, n, n, a, lda, work( itau ),

      $                      work( nwork ), lwork - nwork + 1, ierr )

                ie = itau

                itauq = ie + n

                itaup = itauq + n

                nwork = itaup + n

 *

 *              Bidiagonalize R in WORK(IR)

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]

 *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]

 *

                CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),

      $                      work( itauq ), work( itaup ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagoal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in VT

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,

      $                      ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Overwrite U by left singular vectors of R and VT

 *              by right singular vectors of R

 *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work]

 *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

                CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Multiply Q in A by left singular vectors of R in

 *              WORK(IR), storing result in U

 *              Workspace: need   N*N [R]

 *

                CALL dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )

                CALL dgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),

      $                     ldwrkr, zero, u, ldu )

 *

             ELSE IF( wntqa ) THEN

 *

 *              Path 4 (M >> N, JOBZ='A')

 *              M left singular vectors to be computed in U and

 *              N right singular vectors to be computed in VT

 *

                iu = 1

 *

 *              WORK(IU) is N by N

 *

                ldwrku = n

                itau = iu + ldwrku*n

                nwork = itau + n

 *

 *              Compute A=Q*R, copying result to U

 *              Workspace: need   N*N [U] + N [tau] + N    [work]

 *              Workspace: prefer N*N [U] + N [tau] + N*NB [work]

 *

                CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dlacpy( 'L', m, n, a, lda, u, ldu )

 *

 *              Generate Q in U

 *              Workspace: need   N*N [U] + N [tau] + M    [work]

 *              Workspace: prefer N*N [U] + N [tau] + M*NB [work]

                CALL dorgqr( m, m, n, u, ldu, work( itau ),

      $                      work( nwork ), lwork - nwork + 1, ierr )

 *

 *              Produce R in A, zeroing out other entries

 *

                CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )

                ie = itau

                itauq = ie + n

                itaup = itauq + n

                nwork = itaup + n

 *

 *              Bidiagonalize R in A

 *              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work]

 *              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]

 *

                CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),

      $                      work( itaup ), work( nwork ), lwork-nwork+1,

      $                      ierr )

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in WORK(IU) and computing right

 *              singular vectors of bidiagonal matrix in VT

 *              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,

      $                      vt, ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Overwrite WORK(IU) by left singular vectors of R and VT

 *              by right singular vectors of R

 *              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work]

 *              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', n, n, n, a, lda,

      $                      work( itauq ), work( iu ), ldwrku,

      $                      work( nwork ), lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Multiply Q in U by left singular vectors of R in

 *              WORK(IU), storing result in A

 *              Workspace: need   N*N [U]

 *

                CALL dgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),

      $                     ldwrku, zero, a, lda )

 *

 *              Copy left singular vectors of A from A to U

 *

                CALL dlacpy( 'F', m, n, a, lda, u, ldu )

 *

             END IF

 *

          ELSE

 *

 *           M .LT. MNTHR

 *

 *           Path 5 (M >= N, but not much larger)

 *           Reduce to bidiagonal form without QR decomposition

 *

             ie = 1

             itauq = ie + n

             itaup = itauq + n

             nwork = itaup + n

 *

 *           Bidiagonalize A

 *           Workspace: need   3*N [e, tauq, taup] + M        [work]

 *           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]

 *

             CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),

      $                   work( itaup ), work( nwork ), lwork-nwork+1,

      $                   ierr )

             IF( wntqn ) THEN

 *

 *              Path 5n (M >= N, JOBZ='N')

 *              Perform bidiagonal SVD, only computing singular values

 *              Workspace: need   3*N [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,

      $                      dum, idum, work( nwork ), iwork, info )

             ELSE IF( wntqo ) THEN

 *              Path 5o (M >= N, JOBZ='O')

                iu = nwork

                IF( lwork .GE. m*n + 3*n + bdspac ) THEN

 *

 *                 WORK( IU ) is M by N

 *

                   ldwrku = m

                   nwork = iu + ldwrku*n

                   CALL dlaset( 'F', m, n, zero, zero, work( iu ),

      $                         ldwrku )

 *                 IR is unused; silence compile warnings

                   ir = -1

                ELSE

 *

 *                 WORK( IU ) is N by N

 *

                   ldwrku = n

                   nwork = iu + ldwrku*n

 *

 *                 WORK(IR) is LDWRKR by N

 *

                   ir = nwork

                   ldwrkr = ( lwork - n*n - 3*n ) / n

                END IF

                nwork = iu + ldwrku*n

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in WORK(IU) and computing right

 *              singular vectors of bidiagonal matrix in VT

 *              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),

      $                      ldwrku, vt, ldvt, dum, idum, work( nwork ),

      $                      iwork, info )

 *

 *              Overwrite VT by right singular vectors of A

 *              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]

 *              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]

 *

                CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

                IF( lwork .GE. m*n + 3*n + bdspac ) THEN

 *

 *                 Path 5o-fast

 *                 Overwrite WORK(IU) by left singular vectors of A

 *                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work]

 *                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]

 *

                   CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,

      $                         work( itauq ), work( iu ), ldwrku,

      $                         work( nwork ), lwork - nwork + 1, ierr )

 *

 *                 Copy left singular vectors of A from WORK(IU) to A

 *

                   CALL dlacpy( 'F', m, n, work( iu ), ldwrku, a, lda )

                ELSE

 *

 *                 Path 5o-slow

 *                 Generate Q in A

 *                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]

 *                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]

 *

                   CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),

      $                         work( nwork ), lwork - nwork + 1, ierr )

 *

 *                 Multiply Q in A by left singular vectors of

 *                 bidiagonal matrix in WORK(IU), storing result in

 *                 WORK(IR) and copying to A

 *                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R]

 *                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R]

 *

                   DO 20 i = 1, m, ldwrkr

                      chunk = min( m - i + 1, ldwrkr )

                      CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),

      $                           lda, work( iu ), ldwrku, zero,

      $                           work( ir ), ldwrkr )

                      CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,

      $                            a( i, 1 ), lda )

    20             CONTINUE

                END IF

 *

             ELSE IF( wntqs ) THEN

 *

 *              Path 5s (M >= N, JOBZ='S')

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in VT

 *              Workspace: need   3*N [e, tauq, taup] + BDSPAC

 *

                CALL dlaset( 'F', m, n, zero, zero, u, ldu )

                CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,

      $                      ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Overwrite U by left singular vectors of A and VT

 *              by right singular vectors of A

 *              Workspace: need   3*N [e, tauq, taup] + N    [work]

 *              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

             ELSE IF( wntqa ) THEN

 *

 *              Path 5a (M >= N, JOBZ='A')

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in VT

 *              Workspace: need   3*N [e, tauq, taup] + BDSPAC

 *

                CALL dlaset( 'F', m, m, zero, zero, u, ldu )

                CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,

      $                      ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Set the right corner of U to identity matrix

 *

                IF( m.GT.n ) THEN

                   CALL dlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),

      $                         ldu )

                END IF

 *

 *              Overwrite U by left singular vectors of A and VT

 *              by right singular vectors of A

 *              Workspace: need   3*N [e, tauq, taup] + M    [work]

 *              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

             END IF

 *

          END IF

 *

       ELSE

 *

 *        A has more columns than rows. If A has sufficiently more

 *        columns than rows, first reduce using the LQ decomposition (if

 *        sufficient workspace available)

 *

          IF( n.GE.mnthr ) THEN

 *

             IF( wntqn ) THEN

 *

 *              Path 1t (N >> M, JOBZ='N')

 *              No singular vectors to be computed

 *

                itau = 1

                nwork = itau + m

 *

 *              Compute A=L*Q

 *              Workspace: need   M [tau] + M [work]

 *              Workspace: prefer M [tau] + M*NB [work]

 *

                CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Zero out above L

 *

                CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )

                ie = 1

                itauq = ie + m

                itaup = itauq + m

                nwork = itaup + m

 *

 *              Bidiagonalize L in A

 *              Workspace: need   3*M [e, tauq, taup] + M      [work]

 *              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]

 *

                CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),

      $                      work( itaup ), work( nwork ), lwork-nwork+1,

      $                      ierr )

                nwork = ie + m

 *

 *              Perform bidiagonal SVD, computing singular values only

 *              Workspace: need   M [e] + BDSPAC

 *

                CALL dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,

      $                      dum, idum, work( nwork ), iwork, info )

 *

             ELSE IF( wntqo ) THEN

 *

 *              Path 2t (N >> M, JOBZ='O')

 *              M right singular vectors to be overwritten on A and

 *              M left singular vectors to be computed in U

 *

                ivt = 1

 *

 *              WORK(IVT) is M by M

 *              WORK(IL)  is M by M; it is later resized to M by chunk for gemm

 *

                il = ivt + m*m

                IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN

                   ldwrkl = m

                   chunk = n

                ELSE

                   ldwrkl = m

                   chunk = ( lwork - m*m ) / m

                END IF

                itau = il + ldwrkl*m

                nwork = itau + m

 *

 *              Compute A=L*Q

 *              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]

 *              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]

 *

                CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Copy L to WORK(IL), zeroing about above it

 *

                CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )

                CALL dlaset( 'U', m - 1, m - 1, zero, zero,

      $                      work( il + ldwrkl ), ldwrkl )

 *

 *              Generate Q in A

 *              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]

 *              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]

 *

                CALL dorglq( m, n, m, a, lda, work( itau ),

      $                      work( nwork ), lwork - nwork + 1, ierr )

                ie = itau

                itauq = ie + m

                itaup = itauq + m

                nwork = itaup + m

 *

 *              Bidiagonalize L in WORK(IL)

 *              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work]

 *              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]

 *

                CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),

      $                      work( itauq ), work( itaup ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U, and computing right singular

 *              vectors of bidiagonal matrix in WORK(IVT)

 *              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,

      $                      work( ivt ), m, dum, idum, work( nwork ),

      $                      iwork, info )

 *

 *              Overwrite U by left singular vectors of L and WORK(IVT)

 *              by right singular vectors of L

 *              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work]

 *              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,

      $                      work( itaup ), work( ivt ), m,

      $                      work( nwork ), lwork - nwork + 1, ierr )

 *

 *              Multiply right singular vectors of L in WORK(IVT) by Q

 *              in A, storing result in WORK(IL) and copying to A

 *              Workspace: need   M*M [VT] + M*M [L]

 *              Workspace: prefer M*M [VT] + M*N [L]

 *              At this point, L is resized as M by chunk.

 *

                DO 30 i = 1, n, chunk

                   blk = min( n - i + 1, chunk )

                   CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,

      $                        a( 1, i ), lda, zero, work( il ), ldwrkl )

                   CALL dlacpy( 'F', m, blk, work( il ), ldwrkl,

      $                         a( 1, i ), lda )

    30          CONTINUE

 *

             ELSE IF( wntqs ) THEN

 *

 *              Path 3t (N >> M, JOBZ='S')

 *              M right singular vectors to be computed in VT and

 *              M left singular vectors to be computed in U

 *

                il = 1

 *

 *              WORK(IL) is M by M

 *

                ldwrkl = m

                itau = il + ldwrkl*m

                nwork = itau + m

 *

 *              Compute A=L*Q

 *              Workspace: need   M*M [L] + M [tau] + M    [work]

 *              Workspace: prefer M*M [L] + M [tau] + M*NB [work]

 *

                CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Copy L to WORK(IL), zeroing out above it

 *

                CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )

                CALL dlaset( 'U', m - 1, m - 1, zero, zero,

      $                      work( il + ldwrkl ), ldwrkl )

 *

 *              Generate Q in A

 *              Workspace: need   M*M [L] + M [tau] + M    [work]

 *              Workspace: prefer M*M [L] + M [tau] + M*NB [work]

 *

                CALL dorglq( m, n, m, a, lda, work( itau ),

      $                      work( nwork ), lwork - nwork + 1, ierr )

                ie = itau

                itauq = ie + m

                itaup = itauq + m

                nwork = itaup + m

 *

 *              Bidiagonalize L in WORK(IU).

 *              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work]

 *              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]

 *

                CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),

      $                      work( itauq ), work( itaup ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in VT

 *              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,

      $                      ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Overwrite U by left singular vectors of L and VT

 *              by right singular vectors of L

 *              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work]

 *              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

 *              Multiply right singular vectors of L in WORK(IL) by

 *              Q in A, storing result in VT

 *              Workspace: need   M*M [L]

 *

                CALL dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )

                CALL dgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,

      $                     a, lda, zero, vt, ldvt )

 *

             ELSE IF( wntqa ) THEN

 *

 *              Path 4t (N >> M, JOBZ='A')

 *              N right singular vectors to be computed in VT and

 *              M left singular vectors to be computed in U

 *

                ivt = 1

 *

 *              WORK(IVT) is M by M

 *

                ldwkvt = m

                itau = ivt + ldwkvt*m

                nwork = itau + m

 *

 *              Compute A=L*Q, copying result to VT

 *              Workspace: need   M*M [VT] + M [tau] + M    [work]

 *              Workspace: prefer M*M [VT] + M [tau] + M*NB [work]

 *

                CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )

 *

 *              Generate Q in VT

 *              Workspace: need   M*M [VT] + M [tau] + N    [work]

 *              Workspace: prefer M*M [VT] + M [tau] + N*NB [work]

 *

                CALL dorglq( n, n, m, vt, ldvt, work( itau ),

      $                      work( nwork ), lwork - nwork + 1, ierr )

 *

 *              Produce L in A, zeroing out other entries

 *

                CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )

                ie = itau

                itauq = ie + m

                itaup = itauq + m

                nwork = itaup + m

 *

 *              Bidiagonalize L in A

 *              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work]

 *              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]

 *

                CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),

      $                      work( itaup ), work( nwork ), lwork-nwork+1,

      $                      ierr )

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in WORK(IVT)

 *              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,

      $                      work( ivt ), ldwkvt, dum, idum,

      $                      work( nwork ), iwork, info )

 *

 *              Overwrite U by left singular vectors of L and WORK(IVT)

 *              by right singular vectors of L

 *              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work]

 *              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, m, a, lda,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', m, m, m, a, lda,

      $                      work( itaup ), work( ivt ), ldwkvt,

      $                      work( nwork ), lwork - nwork + 1, ierr )

 *

 *              Multiply right singular vectors of L in WORK(IVT) by

 *              Q in VT, storing result in A

 *              Workspace: need   M*M [VT]

 *

                CALL dgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,

      $                     vt, ldvt, zero, a, lda )

 *

 *              Copy right singular vectors of A from A to VT

 *

                CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )

 *

             END IF

 *

          ELSE

 *

 *           N .LT. MNTHR

 *

 *           Path 5t (N > M, but not much larger)

 *           Reduce to bidiagonal form without LQ decomposition

 *

             ie = 1

             itauq = ie + m

             itaup = itauq + m

             nwork = itaup + m

 *

 *           Bidiagonalize A

 *           Workspace: need   3*M [e, tauq, taup] + N        [work]

 *           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]

 *

             CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),

      $                   work( itaup ), work( nwork ), lwork-nwork+1,

      $                   ierr )

             IF( wntqn ) THEN

 *

 *              Path 5tn (N > M, JOBZ='N')

 *              Perform bidiagonal SVD, only computing singular values

 *              Workspace: need   3*M [e, tauq, taup] + BDSPAC

 *

                CALL dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,

      $                      dum, idum, work( nwork ), iwork, info )

             ELSE IF( wntqo ) THEN

 *              Path 5to (N > M, JOBZ='O')

                ldwkvt = m

                ivt = nwork

                IF( lwork .GE. m*n + 3*m + bdspac ) THEN

 *

 *                 WORK( IVT ) is M by N

 *

                   CALL dlaset( 'F', m, n, zero, zero, work( ivt ),

      $                         ldwkvt )

                   nwork = ivt + ldwkvt*n

 *                 IL is unused; silence compile warnings

                   il = -1

                ELSE

 *

 *                 WORK( IVT ) is M by M

 *

                   nwork = ivt + ldwkvt*m

                   il = nwork

 *

 *                 WORK(IL) is M by CHUNK

 *

                   chunk = ( lwork - m*m - 3*m ) / m

                END IF

 *

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in WORK(IVT)

 *              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC

 *

                CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,

      $                      work( ivt ), ldwkvt, dum, idum,

      $                      work( nwork ), iwork, info )

 *

 *              Overwrite U by left singular vectors of A

 *              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]

 *              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

 *

                IF( lwork .GE. m*n + 3*m + bdspac ) THEN

 *

 *                 Path 5to-fast

 *                 Overwrite WORK(IVT) by left singular vectors of A

 *                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work]

 *                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]

 *

                   CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,

      $                         work( itaup ), work( ivt ), ldwkvt,

      $                         work( nwork ), lwork - nwork + 1, ierr )

 *

 *                 Copy right singular vectors of A from WORK(IVT) to A

 *

                   CALL dlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )

                ELSE

 *

 *                 Path 5to-slow

 *                 Generate P**T in A

 *                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]

 *                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]

 *

                   CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),

      $                         work( nwork ), lwork - nwork + 1, ierr )

 *

 *                 Multiply Q in A by right singular vectors of

 *                 bidiagonal matrix in WORK(IVT), storing result in

 *                 WORK(IL) and copying to A

 *                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L]

 *                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L]

 *

                   DO 40 i = 1, n, chunk

                      blk = min( n - i + 1, chunk )

                      CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ),

      $                           ldwkvt, a( 1, i ), lda, zero,

      $                           work( il ), m )

                      CALL dlacpy( 'F', m, blk, work( il ), m, a( 1, i ),

      $                            lda )

    40             CONTINUE

                END IF

             ELSE IF( wntqs ) THEN

 *

 *              Path 5ts (N > M, JOBZ='S')

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in VT

 *              Workspace: need   3*M [e, tauq, taup] + BDSPAC

 *

                CALL dlaset( 'F', m, n, zero, zero, vt, ldvt )

                CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,

      $                      ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Overwrite U by left singular vectors of A and VT

 *              by right singular vectors of A

 *              Workspace: need   3*M [e, tauq, taup] + M    [work]

 *              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

             ELSE IF( wntqa ) THEN

 *

 *              Path 5ta (N > M, JOBZ='A')

 *              Perform bidiagonal SVD, computing left singular vectors

 *              of bidiagonal matrix in U and computing right singular

 *              vectors of bidiagonal matrix in VT

 *              Workspace: need   3*M [e, tauq, taup] + BDSPAC

 *

                CALL dlaset( 'F', n, n, zero, zero, vt, ldvt )

                CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,

      $                      ldvt, dum, idum, work( nwork ), iwork,

      $                      info )

 *

 *              Set the right corner of VT to identity matrix

 *

                IF( n.GT.m ) THEN

                   CALL dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),

      $                         ldvt )

                END IF

 *

 *              Overwrite U by left singular vectors of A and VT

 *              by right singular vectors of A

 *              Workspace: need   3*M [e, tauq, taup] + N    [work]

 *              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]

 *

                CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,

      $                      work( itauq ), u, ldu, work( nwork ),

      $                      lwork - nwork + 1, ierr )

                CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,

      $                      work( itaup ), vt, ldvt, work( nwork ),

      $                      lwork - nwork + 1, ierr )

             END IF

 *

          END IF

 *

       END IF

 *

 *     Undo scaling if necessary

 *

       IF( iscl.EQ.1 ) THEN

          IF( anrm.GT.bignum )

      $      CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,

      $                   ierr )

          IF( anrm.LT.smlnum )

      $      CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,

      $                   ierr )

       END IF

 *

 *     Return optimal workspace in WORK(1)

 *

       work( 1 ) = maxwrk

 *

       RETURN

 *

 *     End of DGESDD

 *

       END

dgebrd
subroutine dgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,                                                                                           INFO)
DGEBRD
Definition: dgebrd.f:207

dlaset
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:112

dormbr
subroutine dormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,                                                                                           LDC, WORK, LWORK, INFO)
DORMBR
Definition: dormbr.f:197

dorglq
subroutine dorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGLQ
Definition: dorglq.f:129

dlacpy
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105

dlascl
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:145

dgemm
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189

dgelqf
subroutine dgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGELQF
Definition: dgelqf.f:137

xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62

dgesdd
subroutine dgesdd(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,                                                                                           WORK, LWORK, IWORK, INFO)
DGESDD
Definition: dgesdd.f:221

dorgbr
subroutine dorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGBR
Definition: dorgbr.f:159

dbdsdc
subroutine dbdsdc(UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ,                                                                                           WORK, IWORK, INFO)
DBDSDC
Definition: dbdsdc.f:207

dgeqrf
subroutine dgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGEQRF
Definition: dgeqrf.f:138

dorgqr
subroutine dorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGQR
Definition: dorgqr.f:130