LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
dgesvd.f
Go to the documentation of this file.
1 *> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
30 * $ VT( LDVT, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> DGESVD computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, optionally computing the left and/or right singular
41 *> vectors. The SVD is written
42 *>
43 *> A = U * SIGMA * transpose(V)
44 *>
45 *> where SIGMA is an M-by-N matrix which is zero except for its
46 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48 *> are the singular values of A; they are real and non-negative, and
49 *> are returned in descending order. The first min(m,n) columns of
50 *> U and V are the left and right singular vectors of A.
51 *>
52 *> Note that the routine returns V**T, not V.
53 *> \endverbatim
54 *
55 * Arguments:
56 * ==========
57 *
58 *> \param[in] JOBU
59 *> \verbatim
60 *> JOBU is CHARACTER*1
61 *> Specifies options for computing all or part of the matrix U:
62 *> = 'A': all M columns of U are returned in array U:
63 *> = 'S': the first min(m,n) columns of U (the left singular
64 *> vectors) are returned in the array U;
65 *> = 'O': the first min(m,n) columns of U (the left singular
66 *> vectors) are overwritten on the array A;
67 *> = 'N': no columns of U (no left singular vectors) are
68 *> computed.
69 *> \endverbatim
70 *>
71 *> \param[in] JOBVT
72 *> \verbatim
73 *> JOBVT is CHARACTER*1
74 *> Specifies options for computing all or part of the matrix
75 *> V**T:
76 *> = 'A': all N rows of V**T are returned in the array VT;
77 *> = 'S': the first min(m,n) rows of V**T (the right singular
78 *> vectors) are returned in the array VT;
79 *> = 'O': the first min(m,n) rows of V**T (the right singular
80 *> vectors) are overwritten on the array A;
81 *> = 'N': no rows of V**T (no right singular vectors) are
82 *> computed.
83 *>
84 *> JOBVT and JOBU cannot both be 'O'.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is DOUBLE PRECISION array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBU = 'O', A is overwritten with the first min(m,n)
105 *> columns of U (the left singular vectors,
106 *> stored columnwise);
107 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108 *> rows of V**T (the right singular vectors,
109 *> stored rowwise);
110 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111 *> are destroyed.
112 *> \endverbatim
113 *>
114 *> \param[in] LDA
115 *> \verbatim
116 *> LDA is INTEGER
117 *> The leading dimension of the array A. LDA >= max(1,M).
118 *> \endverbatim
119 *>
120 *> \param[out] S
121 *> \verbatim
122 *> S is DOUBLE PRECISION array, dimension (min(M,N))
123 *> The singular values of A, sorted so that S(i) >= S(i+1).
124 *> \endverbatim
125 *>
126 *> \param[out] U
127 *> \verbatim
128 *> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130 *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131 *> if JOBU = 'S', U contains the first min(m,n) columns of U
132 *> (the left singular vectors, stored columnwise);
133 *> if JOBU = 'N' or 'O', U is not referenced.
134 *> \endverbatim
135 *>
136 *> \param[in] LDU
137 *> \verbatim
138 *> LDU is INTEGER
139 *> The leading dimension of the array U. LDU >= 1; if
140 *> JOBU = 'S' or 'A', LDU >= M.
141 *> \endverbatim
142 *>
143 *> \param[out] VT
144 *> \verbatim
145 *> VT is DOUBLE PRECISION array, dimension (LDVT,N)
146 *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147 *> V**T;
148 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149 *> V**T (the right singular vectors, stored rowwise);
150 *> if JOBVT = 'N' or 'O', VT is not referenced.
151 *> \endverbatim
152 *>
153 *> \param[in] LDVT
154 *> \verbatim
155 *> LDVT is INTEGER
156 *> The leading dimension of the array VT. LDVT >= 1; if
157 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158 *> \endverbatim
159 *>
160 *> \param[out] WORK
161 *> \verbatim
162 *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164 *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165 *> superdiagonal elements of an upper bidiagonal matrix B
166 *> whose diagonal is in S (not necessarily sorted). B
167 *> satisfies A = U * B * VT, so it has the same singular values
168 *> as A, and singular vectors related by U and VT.
169 *> \endverbatim
170 *>
171 *> \param[in] LWORK
172 *> \verbatim
173 *> LWORK is INTEGER
174 *> The dimension of the array WORK.
175 *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176 *> - PATH 1 (M much larger than N, JOBU='N')
177 *> - PATH 1t (N much larger than M, JOBVT='N')
178 *> LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
179 *> For good performance, LWORK should generally be larger.
180 *>
181 *> If LWORK = -1, then a workspace query is assumed; the routine
182 *> only calculates the optimal size of the WORK array, returns
183 *> this value as the first entry of the WORK array, and no error
184 *> message related to LWORK is issued by XERBLA.
185 *> \endverbatim
186 *>
187 *> \param[out] INFO
188 *> \verbatim
189 *> INFO is INTEGER
190 *> = 0: successful exit.
191 *> < 0: if INFO = -i, the i-th argument had an illegal value.
192 *> > 0: if DBDSQR did not converge, INFO specifies how many
193 *> superdiagonals of an intermediate bidiagonal form B
194 *> did not converge to zero. See the description of WORK
195 *> above for details.
196 *> \endverbatim
197 *
198 * Authors:
199 * ========
200 *
201 *> \author Univ. of Tennessee
202 *> \author Univ. of California Berkeley
203 *> \author Univ. of Colorado Denver
204 *> \author NAG Ltd.
205 *
206 *> \ingroup doubleGEsing
207 *
208 * =====================================================================
209  SUBROUTINE dgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
210  $ VT, LDVT, WORK, LWORK, INFO )
211 *
212 * -- LAPACK driver routine --
213 * -- LAPACK is a software package provided by Univ. of Tennessee, --
214 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215 *
216 * .. Scalar Arguments ..
217  CHARACTER JOBU, JOBVT
218  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219 * ..
220 * .. Array Arguments ..
221  DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
222  $ vt( ldvt, * ), work( * )
223 * ..
224 *
225 * =====================================================================
226 *
227 * .. Parameters ..
228  DOUBLE PRECISION ZERO, ONE
229  parameter( zero = 0.0d0, one = 1.0d0 )
230 * ..
231 * .. Local Scalars ..
232  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233  $ wntva, wntvas, wntvn, wntvo, wntvs
234  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
236  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
237  $ nrvt, wrkbl
238  INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
239  $ lwork_dgebrd, lwork_dorgbr_p, lwork_dorgbr_q,
240  $ lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m
241  DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
242 * ..
243 * .. Local Arrays ..
244  DOUBLE PRECISION DUM( 1 )
245 * ..
246 * .. External Subroutines ..
247  EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
249  $ xerbla
250 * ..
251 * .. External Functions ..
252  LOGICAL LSAME
253  INTEGER ILAENV
254  DOUBLE PRECISION DLAMCH, DLANGE
255  EXTERNAL lsame, ilaenv, dlamch, dlange
256 * ..
257 * .. Intrinsic Functions ..
258  INTRINSIC max, min, sqrt
259 * ..
260 * .. Executable Statements ..
261 *
262 * Test the input arguments
263 *
264  info = 0
265  minmn = min( m, n )
266  wntua = lsame( jobu, 'A' )
267  wntus = lsame( jobu, 'S' )
268  wntuas = wntua .OR. wntus
269  wntuo = lsame( jobu, 'O' )
270  wntun = lsame( jobu, 'N' )
271  wntva = lsame( jobvt, 'A' )
272  wntvs = lsame( jobvt, 'S' )
273  wntvas = wntva .OR. wntvs
274  wntvo = lsame( jobvt, 'O' )
275  wntvn = lsame( jobvt, 'N' )
276  lquery = ( lwork.EQ.-1 )
277 *
278  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279  info = -1
280  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281  $ ( wntvo .AND. wntuo ) ) THEN
282  info = -2
283  ELSE IF( m.LT.0 ) THEN
284  info = -3
285  ELSE IF( n.LT.0 ) THEN
286  info = -4
287  ELSE IF( lda.LT.max( 1, m ) ) THEN
288  info = -6
289  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290  info = -9
291  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293  info = -11
294  END IF
295 *
296 * Compute workspace
297 * (Note: Comments in the code beginning "Workspace:" describe the
298 * minimal amount of workspace needed at that point in the code,
299 * as well as the preferred amount for good performance.
300 * NB refers to the optimal block size for the immediately
301 * following subroutine, as returned by ILAENV.)
302 *
303  IF( info.EQ.0 ) THEN
304  minwrk = 1
305  maxwrk = 1
306  IF( m.GE.n .AND. minmn.GT.0 ) THEN
307 *
308 * Compute space needed for DBDSQR
309 *
310  mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
311  bdspac = 5*n
312 * Compute space needed for DGEQRF
313  CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314  lwork_dgeqrf = int( dum(1) )
315 * Compute space needed for DORGQR
316  CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_dorgqr_n = int( dum(1) )
318  CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319  lwork_dorgqr_m = int( dum(1) )
320 * Compute space needed for DGEBRD
321  CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
322  $ dum(1), dum(1), -1, ierr )
323  lwork_dgebrd = int( dum(1) )
324 * Compute space needed for DORGBR P
325  CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
326  $ dum(1), -1, ierr )
327  lwork_dorgbr_p = int( dum(1) )
328 * Compute space needed for DORGBR Q
329  CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
330  $ dum(1), -1, ierr )
331  lwork_dorgbr_q = int( dum(1) )
332 *
333  IF( m.GE.mnthr ) THEN
334  IF( wntun ) THEN
335 *
336 * Path 1 (M much larger than N, JOBU='N')
337 *
338  maxwrk = n + lwork_dgeqrf
339  maxwrk = max( maxwrk, 3*n + lwork_dgebrd )
340  IF( wntvo .OR. wntvas )
341  $ maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
342  maxwrk = max( maxwrk, bdspac )
343  minwrk = max( 4*n, bdspac )
344  ELSE IF( wntuo .AND. wntvn ) THEN
345 *
346 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347 *
348  wrkbl = n + lwork_dgeqrf
349  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
350  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
351  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
352  wrkbl = max( wrkbl, bdspac )
353  maxwrk = max( n*n + wrkbl, n*n + m*n + n )
354  minwrk = max( 3*n + m, bdspac )
355  ELSE IF( wntuo .AND. wntvas ) THEN
356 *
357 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358 * 'A')
359 *
360  wrkbl = n + lwork_dgeqrf
361  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
362  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
363  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
364  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
365  wrkbl = max( wrkbl, bdspac )
366  maxwrk = max( n*n + wrkbl, n*n + m*n + n )
367  minwrk = max( 3*n + m, bdspac )
368  ELSE IF( wntus .AND. wntvn ) THEN
369 *
370 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371 *
372  wrkbl = n + lwork_dgeqrf
373  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
374  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
375  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
376  wrkbl = max( wrkbl, bdspac )
377  maxwrk = n*n + wrkbl
378  minwrk = max( 3*n + m, bdspac )
379  ELSE IF( wntus .AND. wntvo ) THEN
380 *
381 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382 *
383  wrkbl = n + lwork_dgeqrf
384  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
385  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
386  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
387  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
388  wrkbl = max( wrkbl, bdspac )
389  maxwrk = 2*n*n + wrkbl
390  minwrk = max( 3*n + m, bdspac )
391  ELSE IF( wntus .AND. wntvas ) THEN
392 *
393 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394 * 'A')
395 *
396  wrkbl = n + lwork_dgeqrf
397  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
398  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
399  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
400  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
401  wrkbl = max( wrkbl, bdspac )
402  maxwrk = n*n + wrkbl
403  minwrk = max( 3*n + m, bdspac )
404  ELSE IF( wntua .AND. wntvn ) THEN
405 *
406 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407 *
408  wrkbl = n + lwork_dgeqrf
409  wrkbl = max( wrkbl, n + lwork_dorgqr_m )
410  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
411  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
412  wrkbl = max( wrkbl, bdspac )
413  maxwrk = n*n + wrkbl
414  minwrk = max( 3*n + m, bdspac )
415  ELSE IF( wntua .AND. wntvo ) THEN
416 *
417 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418 *
419  wrkbl = n + lwork_dgeqrf
420  wrkbl = max( wrkbl, n + lwork_dorgqr_m )
421  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
422  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
423  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
424  wrkbl = max( wrkbl, bdspac )
425  maxwrk = 2*n*n + wrkbl
426  minwrk = max( 3*n + m, bdspac )
427  ELSE IF( wntua .AND. wntvas ) THEN
428 *
429 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430 * 'A')
431 *
432  wrkbl = n + lwork_dgeqrf
433  wrkbl = max( wrkbl, n + lwork_dorgqr_m )
434  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
435  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
436  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
437  wrkbl = max( wrkbl, bdspac )
438  maxwrk = n*n + wrkbl
439  minwrk = max( 3*n + m, bdspac )
440  END IF
441  ELSE
442 *
443 * Path 10 (M at least N, but not much larger)
444 *
445  CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
446  $ dum(1), dum(1), -1, ierr )
447  lwork_dgebrd = int( dum(1) )
448  maxwrk = 3*n + lwork_dgebrd
449  IF( wntus .OR. wntuo ) THEN
450  CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
451  $ dum(1), -1, ierr )
452  lwork_dorgbr_q = int( dum(1) )
453  maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
454  END IF
455  IF( wntua ) THEN
456  CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
457  $ dum(1), -1, ierr )
458  lwork_dorgbr_q = int( dum(1) )
459  maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
460  END IF
461  IF( .NOT.wntvn ) THEN
462  maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
463  END IF
464  maxwrk = max( maxwrk, bdspac )
465  minwrk = max( 3*n + m, bdspac )
466  END IF
467  ELSE IF( minmn.GT.0 ) THEN
468 *
469 * Compute space needed for DBDSQR
470 *
471  mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
472  bdspac = 5*m
473 * Compute space needed for DGELQF
474  CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475  lwork_dgelqf = int( dum(1) )
476 * Compute space needed for DORGLQ
477  CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478  lwork_dorglq_n = int( dum(1) )
479  CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480  lwork_dorglq_m = int( dum(1) )
481 * Compute space needed for DGEBRD
482  CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
483  $ dum(1), dum(1), -1, ierr )
484  lwork_dgebrd = int( dum(1) )
485 * Compute space needed for DORGBR P
486  CALL dorgbr( 'P', m, m, m, a, n, dum(1),
487  $ dum(1), -1, ierr )
488  lwork_dorgbr_p = int( dum(1) )
489 * Compute space needed for DORGBR Q
490  CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
491  $ dum(1), -1, ierr )
492  lwork_dorgbr_q = int( dum(1) )
493  IF( n.GE.mnthr ) THEN
494  IF( wntvn ) THEN
495 *
496 * Path 1t(N much larger than M, JOBVT='N')
497 *
498  maxwrk = m + lwork_dgelqf
499  maxwrk = max( maxwrk, 3*m + lwork_dgebrd )
500  IF( wntuo .OR. wntuas )
501  $ maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
502  maxwrk = max( maxwrk, bdspac )
503  minwrk = max( 4*m, bdspac )
504  ELSE IF( wntvo .AND. wntun ) THEN
505 *
506 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507 *
508  wrkbl = m + lwork_dgelqf
509  wrkbl = max( wrkbl, m + lwork_dorglq_m )
510  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
511  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
512  wrkbl = max( wrkbl, bdspac )
513  maxwrk = max( m*m + wrkbl, m*m + m*n + m )
514  minwrk = max( 3*m + n, bdspac )
515  ELSE IF( wntvo .AND. wntuas ) THEN
516 *
517 * Path 3t(N much larger than M, JOBU='S' or 'A',
518 * JOBVT='O')
519 *
520  wrkbl = m + lwork_dgelqf
521  wrkbl = max( wrkbl, m + lwork_dorglq_m )
522  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
523  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
524  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
525  wrkbl = max( wrkbl, bdspac )
526  maxwrk = max( m*m + wrkbl, m*m + m*n + m )
527  minwrk = max( 3*m + n, bdspac )
528  ELSE IF( wntvs .AND. wntun ) THEN
529 *
530 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531 *
532  wrkbl = m + lwork_dgelqf
533  wrkbl = max( wrkbl, m + lwork_dorglq_m )
534  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
535  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
536  wrkbl = max( wrkbl, bdspac )
537  maxwrk = m*m + wrkbl
538  minwrk = max( 3*m + n, bdspac )
539  ELSE IF( wntvs .AND. wntuo ) THEN
540 *
541 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542 *
543  wrkbl = m + lwork_dgelqf
544  wrkbl = max( wrkbl, m + lwork_dorglq_m )
545  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
546  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
547  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
548  wrkbl = max( wrkbl, bdspac )
549  maxwrk = 2*m*m + wrkbl
550  minwrk = max( 3*m + n, bdspac )
551  ELSE IF( wntvs .AND. wntuas ) THEN
552 *
553 * Path 6t(N much larger than M, JOBU='S' or 'A',
554 * JOBVT='S')
555 *
556  wrkbl = m + lwork_dgelqf
557  wrkbl = max( wrkbl, m + lwork_dorglq_m )
558  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
559  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
560  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
561  wrkbl = max( wrkbl, bdspac )
562  maxwrk = m*m + wrkbl
563  minwrk = max( 3*m + n, bdspac )
564  ELSE IF( wntva .AND. wntun ) THEN
565 *
566 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
567 *
568  wrkbl = m + lwork_dgelqf
569  wrkbl = max( wrkbl, m + lwork_dorglq_n )
570  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
571  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
572  wrkbl = max( wrkbl, bdspac )
573  maxwrk = m*m + wrkbl
574  minwrk = max( 3*m + n, bdspac )
575  ELSE IF( wntva .AND. wntuo ) THEN
576 *
577 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
578 *
579  wrkbl = m + lwork_dgelqf
580  wrkbl = max( wrkbl, m + lwork_dorglq_n )
581  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
582  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
583  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
584  wrkbl = max( wrkbl, bdspac )
585  maxwrk = 2*m*m + wrkbl
586  minwrk = max( 3*m + n, bdspac )
587  ELSE IF( wntva .AND. wntuas ) THEN
588 *
589 * Path 9t(N much larger than M, JOBU='S' or 'A',
590 * JOBVT='A')
591 *
592  wrkbl = m + lwork_dgelqf
593  wrkbl = max( wrkbl, m + lwork_dorglq_n )
594  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
595  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
596  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
597  wrkbl = max( wrkbl, bdspac )
598  maxwrk = m*m + wrkbl
599  minwrk = max( 3*m + n, bdspac )
600  END IF
601  ELSE
602 *
603 * Path 10t(N greater than M, but not much larger)
604 *
605  CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
606  $ dum(1), dum(1), -1, ierr )
607  lwork_dgebrd = int( dum(1) )
608  maxwrk = 3*m + lwork_dgebrd
609  IF( wntvs .OR. wntvo ) THEN
610 * Compute space needed for DORGBR P
611  CALL dorgbr( 'P', m, n, m, a, n, dum(1),
612  $ dum(1), -1, ierr )
613  lwork_dorgbr_p = int( dum(1) )
614  maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
615  END IF
616  IF( wntva ) THEN
617  CALL dorgbr( 'P', n, n, m, a, n, dum(1),
618  $ dum(1), -1, ierr )
619  lwork_dorgbr_p = int( dum(1) )
620  maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
621  END IF
622  IF( .NOT.wntun ) THEN
623  maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
624  END IF
625  maxwrk = max( maxwrk, bdspac )
626  minwrk = max( 3*m + n, bdspac )
627  END IF
628  END IF
629  maxwrk = max( maxwrk, minwrk )
630  work( 1 ) = maxwrk
631 *
632  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
633  info = -13
634  END IF
635  END IF
636 *
637  IF( info.NE.0 ) THEN
638  CALL xerbla( 'DGESVD', -info )
639  RETURN
640  ELSE IF( lquery ) THEN
641  RETURN
642  END IF
643 *
644 * Quick return if possible
645 *
646  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
647  RETURN
648  END IF
649 *
650 * Get machine constants
651 *
652  eps = dlamch( 'P' )
653  smlnum = sqrt( dlamch( 'S' ) ) / eps
654  bignum = one / smlnum
655 *
656 * Scale A if max element outside range [SMLNUM,BIGNUM]
657 *
658  anrm = dlange( 'M', m, n, a, lda, dum )
659  iscl = 0
660  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
661  iscl = 1
662  CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
663  ELSE IF( anrm.GT.bignum ) THEN
664  iscl = 1
665  CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
666  END IF
667 *
668  IF( m.GE.n ) THEN
669 *
670 * A has at least as many rows as columns. If A has sufficiently
671 * more rows than columns, first reduce using the QR
672 * decomposition (if sufficient workspace available)
673 *
674  IF( m.GE.mnthr ) THEN
675 *
676  IF( wntun ) THEN
677 *
678 * Path 1 (M much larger than N, JOBU='N')
679 * No left singular vectors to be computed
680 *
681  itau = 1
682  iwork = itau + n
683 *
684 * Compute A=Q*R
685 * (Workspace: need 2*N, prefer N + N*NB)
686 *
687  CALL dgeqrf( m, n, a, lda, work( itau ), work( iwork ),
688  $ lwork-iwork+1, ierr )
689 *
690 * Zero out below R
691 *
692  IF( n .GT. 1 ) THEN
693  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
694  $ lda )
695  END IF
696  ie = 1
697  itauq = ie + n
698  itaup = itauq + n
699  iwork = itaup + n
700 *
701 * Bidiagonalize R in A
702 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
703 *
704  CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
705  $ work( itaup ), work( iwork ), lwork-iwork+1,
706  $ ierr )
707  ncvt = 0
708  IF( wntvo .OR. wntvas ) THEN
709 *
710 * If right singular vectors desired, generate P'.
711 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
712 *
713  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
714  $ work( iwork ), lwork-iwork+1, ierr )
715  ncvt = n
716  END IF
717  iwork = ie + n
718 *
719 * Perform bidiagonal QR iteration, computing right
720 * singular vectors of A in A if desired
721 * (Workspace: need BDSPAC)
722 *
723  CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
724  $ dum, 1, dum, 1, work( iwork ), info )
725 *
726 * If right singular vectors desired in VT, copy them there
727 *
728  IF( wntvas )
729  $ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
730 *
731  ELSE IF( wntuo .AND. wntvn ) THEN
732 *
733 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734 * N left singular vectors to be overwritten on A and
735 * no right singular vectors to be computed
736 *
737  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
738 *
739 * Sufficient workspace for a fast algorithm
740 *
741  ir = 1
742  IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
743 *
744 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
745 *
746  ldwrku = lda
747  ldwrkr = lda
748  ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
749 *
750 * WORK(IU) is LDA by N, WORK(IR) is N by N
751 *
752  ldwrku = lda
753  ldwrkr = n
754  ELSE
755 *
756 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757 *
758  ldwrku = ( lwork-n*n-n ) / n
759  ldwrkr = n
760  END IF
761  itau = ir + ldwrkr*n
762  iwork = itau + n
763 *
764 * Compute A=Q*R
765 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
766 *
767  CALL dgeqrf( m, n, a, lda, work( itau ),
768  $ work( iwork ), lwork-iwork+1, ierr )
769 *
770 * Copy R to WORK(IR) and zero out below it
771 *
772  CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
773  CALL dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
774  $ ldwrkr )
775 *
776 * Generate Q in A
777 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
778 *
779  CALL dorgqr( m, n, n, a, lda, work( itau ),
780  $ work( iwork ), lwork-iwork+1, ierr )
781  ie = itau
782  itauq = ie + n
783  itaup = itauq + n
784  iwork = itaup + n
785 *
786 * Bidiagonalize R in WORK(IR)
787 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
788 *
789  CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
790  $ work( itauq ), work( itaup ),
791  $ work( iwork ), lwork-iwork+1, ierr )
792 *
793 * Generate left vectors bidiagonalizing R
794 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
795 *
796  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
797  $ work( itauq ), work( iwork ),
798  $ lwork-iwork+1, ierr )
799  iwork = ie + n
800 *
801 * Perform bidiagonal QR iteration, computing left
802 * singular vectors of R in WORK(IR)
803 * (Workspace: need N*N + BDSPAC)
804 *
805  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
806  $ work( ir ), ldwrkr, dum, 1,
807  $ work( iwork ), info )
808  iu = ie + n
809 *
810 * Multiply Q in A by left singular vectors of R in
811 * WORK(IR), storing result in WORK(IU) and copying to A
812 * (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
813 *
814  DO 10 i = 1, m, ldwrku
815  chunk = min( m-i+1, ldwrku )
816  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
817  $ lda, work( ir ), ldwrkr, zero,
818  $ work( iu ), ldwrku )
819  CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
820  $ a( i, 1 ), lda )
821  10 CONTINUE
822 *
823  ELSE
824 *
825 * Insufficient workspace for a fast algorithm
826 *
827  ie = 1
828  itauq = ie + n
829  itaup = itauq + n
830  iwork = itaup + n
831 *
832 * Bidiagonalize A
833 * (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
834 *
835  CALL dgebrd( m, n, a, lda, s, work( ie ),
836  $ work( itauq ), work( itaup ),
837  $ work( iwork ), lwork-iwork+1, ierr )
838 *
839 * Generate left vectors bidiagonalizing A
840 * (Workspace: need 4*N, prefer 3*N + N*NB)
841 *
842  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
843  $ work( iwork ), lwork-iwork+1, ierr )
844  iwork = ie + n
845 *
846 * Perform bidiagonal QR iteration, computing left
847 * singular vectors of A in A
848 * (Workspace: need BDSPAC)
849 *
850  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
851  $ a, lda, dum, 1, work( iwork ), info )
852 *
853  END IF
854 *
855  ELSE IF( wntuo .AND. wntvas ) THEN
856 *
857 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858 * N left singular vectors to be overwritten on A and
859 * N right singular vectors to be computed in VT
860 *
861  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
862 *
863 * Sufficient workspace for a fast algorithm
864 *
865  ir = 1
866  IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
867 *
868 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
869 *
870  ldwrku = lda
871  ldwrkr = lda
872  ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
873 *
874 * WORK(IU) is LDA by N and WORK(IR) is N by N
875 *
876  ldwrku = lda
877  ldwrkr = n
878  ELSE
879 *
880 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881 *
882  ldwrku = ( lwork-n*n-n ) / n
883  ldwrkr = n
884  END IF
885  itau = ir + ldwrkr*n
886  iwork = itau + n
887 *
888 * Compute A=Q*R
889 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
890 *
891  CALL dgeqrf( m, n, a, lda, work( itau ),
892  $ work( iwork ), lwork-iwork+1, ierr )
893 *
894 * Copy R to VT, zeroing out below it
895 *
896  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
897  IF( n.GT.1 )
898  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
899  $ vt( 2, 1 ), ldvt )
900 *
901 * Generate Q in A
902 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
903 *
904  CALL dorgqr( m, n, n, a, lda, work( itau ),
905  $ work( iwork ), lwork-iwork+1, ierr )
906  ie = itau
907  itauq = ie + n
908  itaup = itauq + n
909  iwork = itaup + n
910 *
911 * Bidiagonalize R in VT, copying result to WORK(IR)
912 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
913 *
914  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
915  $ work( itauq ), work( itaup ),
916  $ work( iwork ), lwork-iwork+1, ierr )
917  CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
918 *
919 * Generate left vectors bidiagonalizing R in WORK(IR)
920 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
921 *
922  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
923  $ work( itauq ), work( iwork ),
924  $ lwork-iwork+1, ierr )
925 *
926 * Generate right vectors bidiagonalizing R in VT
927 * (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
928 *
929  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
930  $ work( iwork ), lwork-iwork+1, ierr )
931  iwork = ie + n
932 *
933 * Perform bidiagonal QR iteration, computing left
934 * singular vectors of R in WORK(IR) and computing right
935 * singular vectors of R in VT
936 * (Workspace: need N*N + BDSPAC)
937 *
938  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
939  $ work( ir ), ldwrkr, dum, 1,
940  $ work( iwork ), info )
941  iu = ie + n
942 *
943 * Multiply Q in A by left singular vectors of R in
944 * WORK(IR), storing result in WORK(IU) and copying to A
945 * (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
946 *
947  DO 20 i = 1, m, ldwrku
948  chunk = min( m-i+1, ldwrku )
949  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
950  $ lda, work( ir ), ldwrkr, zero,
951  $ work( iu ), ldwrku )
952  CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
953  $ a( i, 1 ), lda )
954  20 CONTINUE
955 *
956  ELSE
957 *
958 * Insufficient workspace for a fast algorithm
959 *
960  itau = 1
961  iwork = itau + n
962 *
963 * Compute A=Q*R
964 * (Workspace: need 2*N, prefer N + N*NB)
965 *
966  CALL dgeqrf( m, n, a, lda, work( itau ),
967  $ work( iwork ), lwork-iwork+1, ierr )
968 *
969 * Copy R to VT, zeroing out below it
970 *
971  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
972  IF( n.GT.1 )
973  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
974  $ vt( 2, 1 ), ldvt )
975 *
976 * Generate Q in A
977 * (Workspace: need 2*N, prefer N + N*NB)
978 *
979  CALL dorgqr( m, n, n, a, lda, work( itau ),
980  $ work( iwork ), lwork-iwork+1, ierr )
981  ie = itau
982  itauq = ie + n
983  itaup = itauq + n
984  iwork = itaup + n
985 *
986 * Bidiagonalize R in VT
987 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
988 *
989  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
990  $ work( itauq ), work( itaup ),
991  $ work( iwork ), lwork-iwork+1, ierr )
992 *
993 * Multiply Q in A by left vectors bidiagonalizing R
994 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
995 *
996  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
997  $ work( itauq ), a, lda, work( iwork ),
998  $ lwork-iwork+1, ierr )
999 *
1000 * Generate right vectors bidiagonalizing R in VT
1001 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1002 *
1003  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1004  $ work( iwork ), lwork-iwork+1, ierr )
1005  iwork = ie + n
1006 *
1007 * Perform bidiagonal QR iteration, computing left
1008 * singular vectors of A in A and computing right
1009 * singular vectors of A in VT
1010 * (Workspace: need BDSPAC)
1011 *
1012  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1013  $ a, lda, dum, 1, work( iwork ), info )
1014 *
1015  END IF
1016 *
1017  ELSE IF( wntus ) THEN
1018 *
1019  IF( wntvn ) THEN
1020 *
1021 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022 * N left singular vectors to be computed in U and
1023 * no right singular vectors to be computed
1024 *
1025  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1026 *
1027 * Sufficient workspace for a fast algorithm
1028 *
1029  ir = 1
1030  IF( lwork.GE.wrkbl+lda*n ) THEN
1031 *
1032 * WORK(IR) is LDA by N
1033 *
1034  ldwrkr = lda
1035  ELSE
1036 *
1037 * WORK(IR) is N by N
1038 *
1039  ldwrkr = n
1040  END IF
1041  itau = ir + ldwrkr*n
1042  iwork = itau + n
1043 *
1044 * Compute A=Q*R
1045 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1046 *
1047  CALL dgeqrf( m, n, a, lda, work( itau ),
1048  $ work( iwork ), lwork-iwork+1, ierr )
1049 *
1050 * Copy R to WORK(IR), zeroing out below it
1051 *
1052  CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1053  $ ldwrkr )
1054  CALL dlaset( 'L', n-1, n-1, zero, zero,
1055  $ work( ir+1 ), ldwrkr )
1056 *
1057 * Generate Q in A
1058 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1059 *
1060  CALL dorgqr( m, n, n, a, lda, work( itau ),
1061  $ work( iwork ), lwork-iwork+1, ierr )
1062  ie = itau
1063  itauq = ie + n
1064  itaup = itauq + n
1065  iwork = itaup + n
1066 *
1067 * Bidiagonalize R in WORK(IR)
1068 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1069 *
1070  CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1071  $ work( ie ), work( itauq ),
1072  $ work( itaup ), work( iwork ),
1073  $ lwork-iwork+1, ierr )
1074 *
1075 * Generate left vectors bidiagonalizing R in WORK(IR)
1076 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1077 *
1078  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1079  $ work( itauq ), work( iwork ),
1080  $ lwork-iwork+1, ierr )
1081  iwork = ie + n
1082 *
1083 * Perform bidiagonal QR iteration, computing left
1084 * singular vectors of R in WORK(IR)
1085 * (Workspace: need N*N + BDSPAC)
1086 *
1087  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1088  $ 1, work( ir ), ldwrkr, dum, 1,
1089  $ work( iwork ), info )
1090 *
1091 * Multiply Q in A by left singular vectors of R in
1092 * WORK(IR), storing result in U
1093 * (Workspace: need N*N)
1094 *
1095  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1096  $ work( ir ), ldwrkr, zero, u, ldu )
1097 *
1098  ELSE
1099 *
1100 * Insufficient workspace for a fast algorithm
1101 *
1102  itau = 1
1103  iwork = itau + n
1104 *
1105 * Compute A=Q*R, copying result to U
1106 * (Workspace: need 2*N, prefer N + N*NB)
1107 *
1108  CALL dgeqrf( m, n, a, lda, work( itau ),
1109  $ work( iwork ), lwork-iwork+1, ierr )
1110  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1111 *
1112 * Generate Q in U
1113 * (Workspace: need 2*N, prefer N + N*NB)
1114 *
1115  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1116  $ work( iwork ), lwork-iwork+1, ierr )
1117  ie = itau
1118  itauq = ie + n
1119  itaup = itauq + n
1120  iwork = itaup + n
1121 *
1122 * Zero out below R in A
1123 *
1124  IF( n .GT. 1 ) THEN
1125  CALL dlaset( 'L', n-1, n-1, zero, zero,
1126  $ a( 2, 1 ), lda )
1127  END IF
1128 *
1129 * Bidiagonalize R in A
1130 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1131 *
1132  CALL dgebrd( n, n, a, lda, s, work( ie ),
1133  $ work( itauq ), work( itaup ),
1134  $ work( iwork ), lwork-iwork+1, ierr )
1135 *
1136 * Multiply Q in U by left vectors bidiagonalizing R
1137 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1138 *
1139  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1140  $ work( itauq ), u, ldu, work( iwork ),
1141  $ lwork-iwork+1, ierr )
1142  iwork = ie + n
1143 *
1144 * Perform bidiagonal QR iteration, computing left
1145 * singular vectors of A in U
1146 * (Workspace: need BDSPAC)
1147 *
1148  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1149  $ 1, u, ldu, dum, 1, work( iwork ),
1150  $ info )
1151 *
1152  END IF
1153 *
1154  ELSE IF( wntvo ) THEN
1155 *
1156 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1157 * N left singular vectors to be computed in U and
1158 * N right singular vectors to be overwritten on A
1159 *
1160  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1161 *
1162 * Sufficient workspace for a fast algorithm
1163 *
1164  iu = 1
1165  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1166 *
1167 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1168 *
1169  ldwrku = lda
1170  ir = iu + ldwrku*n
1171  ldwrkr = lda
1172  ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1173 *
1174 * WORK(IU) is LDA by N and WORK(IR) is N by N
1175 *
1176  ldwrku = lda
1177  ir = iu + ldwrku*n
1178  ldwrkr = n
1179  ELSE
1180 *
1181 * WORK(IU) is N by N and WORK(IR) is N by N
1182 *
1183  ldwrku = n
1184  ir = iu + ldwrku*n
1185  ldwrkr = n
1186  END IF
1187  itau = ir + ldwrkr*n
1188  iwork = itau + n
1189 *
1190 * Compute A=Q*R
1191 * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1192 *
1193  CALL dgeqrf( m, n, a, lda, work( itau ),
1194  $ work( iwork ), lwork-iwork+1, ierr )
1195 *
1196 * Copy R to WORK(IU), zeroing out below it
1197 *
1198  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1199  $ ldwrku )
1200  CALL dlaset( 'L', n-1, n-1, zero, zero,
1201  $ work( iu+1 ), ldwrku )
1202 *
1203 * Generate Q in A
1204 * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1205 *
1206  CALL dorgqr( m, n, n, a, lda, work( itau ),
1207  $ work( iwork ), lwork-iwork+1, ierr )
1208  ie = itau
1209  itauq = ie + n
1210  itaup = itauq + n
1211  iwork = itaup + n
1212 *
1213 * Bidiagonalize R in WORK(IU), copying result to
1214 * WORK(IR)
1215 * (Workspace: need 2*N*N + 4*N,
1216 * prefer 2*N*N+3*N+2*N*NB)
1217 *
1218  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1219  $ work( ie ), work( itauq ),
1220  $ work( itaup ), work( iwork ),
1221  $ lwork-iwork+1, ierr )
1222  CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1223  $ work( ir ), ldwrkr )
1224 *
1225 * Generate left bidiagonalizing vectors in WORK(IU)
1226 * (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1227 *
1228  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1229  $ work( itauq ), work( iwork ),
1230  $ lwork-iwork+1, ierr )
1231 *
1232 * Generate right bidiagonalizing vectors in WORK(IR)
1233 * (Workspace: need 2*N*N + 4*N-1,
1234 * prefer 2*N*N+3*N+(N-1)*NB)
1235 *
1236  CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1237  $ work( itaup ), work( iwork ),
1238  $ lwork-iwork+1, ierr )
1239  iwork = ie + n
1240 *
1241 * Perform bidiagonal QR iteration, computing left
1242 * singular vectors of R in WORK(IU) and computing
1243 * right singular vectors of R in WORK(IR)
1244 * (Workspace: need 2*N*N + BDSPAC)
1245 *
1246  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1247  $ work( ir ), ldwrkr, work( iu ),
1248  $ ldwrku, dum, 1, work( iwork ), info )
1249 *
1250 * Multiply Q in A by left singular vectors of R in
1251 * WORK(IU), storing result in U
1252 * (Workspace: need N*N)
1253 *
1254  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1255  $ work( iu ), ldwrku, zero, u, ldu )
1256 *
1257 * Copy right singular vectors of R to A
1258 * (Workspace: need N*N)
1259 *
1260  CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1261  $ lda )
1262 *
1263  ELSE
1264 *
1265 * Insufficient workspace for a fast algorithm
1266 *
1267  itau = 1
1268  iwork = itau + n
1269 *
1270 * Compute A=Q*R, copying result to U
1271 * (Workspace: need 2*N, prefer N + N*NB)
1272 *
1273  CALL dgeqrf( m, n, a, lda, work( itau ),
1274  $ work( iwork ), lwork-iwork+1, ierr )
1275  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1276 *
1277 * Generate Q in U
1278 * (Workspace: need 2*N, prefer N + N*NB)
1279 *
1280  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1281  $ work( iwork ), lwork-iwork+1, ierr )
1282  ie = itau
1283  itauq = ie + n
1284  itaup = itauq + n
1285  iwork = itaup + n
1286 *
1287 * Zero out below R in A
1288 *
1289  IF( n .GT. 1 ) THEN
1290  CALL dlaset( 'L', n-1, n-1, zero, zero,
1291  $ a( 2, 1 ), lda )
1292  END IF
1293 *
1294 * Bidiagonalize R in A
1295 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1296 *
1297  CALL dgebrd( n, n, a, lda, s, work( ie ),
1298  $ work( itauq ), work( itaup ),
1299  $ work( iwork ), lwork-iwork+1, ierr )
1300 *
1301 * Multiply Q in U by left vectors bidiagonalizing R
1302 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1303 *
1304  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1305  $ work( itauq ), u, ldu, work( iwork ),
1306  $ lwork-iwork+1, ierr )
1307 *
1308 * Generate right vectors bidiagonalizing R in A
1309 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1310 *
1311  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1312  $ work( iwork ), lwork-iwork+1, ierr )
1313  iwork = ie + n
1314 *
1315 * Perform bidiagonal QR iteration, computing left
1316 * singular vectors of A in U and computing right
1317 * singular vectors of A in A
1318 * (Workspace: need BDSPAC)
1319 *
1320  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1321  $ lda, u, ldu, dum, 1, work( iwork ),
1322  $ info )
1323 *
1324  END IF
1325 *
1326  ELSE IF( wntvas ) THEN
1327 *
1328 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1329 * or 'A')
1330 * N left singular vectors to be computed in U and
1331 * N right singular vectors to be computed in VT
1332 *
1333  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1334 *
1335 * Sufficient workspace for a fast algorithm
1336 *
1337  iu = 1
1338  IF( lwork.GE.wrkbl+lda*n ) THEN
1339 *
1340 * WORK(IU) is LDA by N
1341 *
1342  ldwrku = lda
1343  ELSE
1344 *
1345 * WORK(IU) is N by N
1346 *
1347  ldwrku = n
1348  END IF
1349  itau = iu + ldwrku*n
1350  iwork = itau + n
1351 *
1352 * Compute A=Q*R
1353 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1354 *
1355  CALL dgeqrf( m, n, a, lda, work( itau ),
1356  $ work( iwork ), lwork-iwork+1, ierr )
1357 *
1358 * Copy R to WORK(IU), zeroing out below it
1359 *
1360  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1361  $ ldwrku )
1362  CALL dlaset( 'L', n-1, n-1, zero, zero,
1363  $ work( iu+1 ), ldwrku )
1364 *
1365 * Generate Q in A
1366 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1367 *
1368  CALL dorgqr( m, n, n, a, lda, work( itau ),
1369  $ work( iwork ), lwork-iwork+1, ierr )
1370  ie = itau
1371  itauq = ie + n
1372  itaup = itauq + n
1373  iwork = itaup + n
1374 *
1375 * Bidiagonalize R in WORK(IU), copying result to VT
1376 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1377 *
1378  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1379  $ work( ie ), work( itauq ),
1380  $ work( itaup ), work( iwork ),
1381  $ lwork-iwork+1, ierr )
1382  CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1383  $ ldvt )
1384 *
1385 * Generate left bidiagonalizing vectors in WORK(IU)
1386 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1387 *
1388  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1389  $ work( itauq ), work( iwork ),
1390  $ lwork-iwork+1, ierr )
1391 *
1392 * Generate right bidiagonalizing vectors in VT
1393 * (Workspace: need N*N + 4*N-1,
1394 * prefer N*N+3*N+(N-1)*NB)
1395 *
1396  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1397  $ work( iwork ), lwork-iwork+1, ierr )
1398  iwork = ie + n
1399 *
1400 * Perform bidiagonal QR iteration, computing left
1401 * singular vectors of R in WORK(IU) and computing
1402 * right singular vectors of R in VT
1403 * (Workspace: need N*N + BDSPAC)
1404 *
1405  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1406  $ ldvt, work( iu ), ldwrku, dum, 1,
1407  $ work( iwork ), info )
1408 *
1409 * Multiply Q in A by left singular vectors of R in
1410 * WORK(IU), storing result in U
1411 * (Workspace: need N*N)
1412 *
1413  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1414  $ work( iu ), ldwrku, zero, u, ldu )
1415 *
1416  ELSE
1417 *
1418 * Insufficient workspace for a fast algorithm
1419 *
1420  itau = 1
1421  iwork = itau + n
1422 *
1423 * Compute A=Q*R, copying result to U
1424 * (Workspace: need 2*N, prefer N + N*NB)
1425 *
1426  CALL dgeqrf( m, n, a, lda, work( itau ),
1427  $ work( iwork ), lwork-iwork+1, ierr )
1428  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1429 *
1430 * Generate Q in U
1431 * (Workspace: need 2*N, prefer N + N*NB)
1432 *
1433  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1434  $ work( iwork ), lwork-iwork+1, ierr )
1435 *
1436 * Copy R to VT, zeroing out below it
1437 *
1438  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1439  IF( n.GT.1 )
1440  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1441  $ vt( 2, 1 ), ldvt )
1442  ie = itau
1443  itauq = ie + n
1444  itaup = itauq + n
1445  iwork = itaup + n
1446 *
1447 * Bidiagonalize R in VT
1448 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1449 *
1450  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1451  $ work( itauq ), work( itaup ),
1452  $ work( iwork ), lwork-iwork+1, ierr )
1453 *
1454 * Multiply Q in U by left bidiagonalizing vectors
1455 * in VT
1456 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1457 *
1458  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1459  $ work( itauq ), u, ldu, work( iwork ),
1460  $ lwork-iwork+1, ierr )
1461 *
1462 * Generate right bidiagonalizing vectors in VT
1463 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1464 *
1465  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1466  $ work( iwork ), lwork-iwork+1, ierr )
1467  iwork = ie + n
1468 *
1469 * Perform bidiagonal QR iteration, computing left
1470 * singular vectors of A in U and computing right
1471 * singular vectors of A in VT
1472 * (Workspace: need BDSPAC)
1473 *
1474  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1475  $ ldvt, u, ldu, dum, 1, work( iwork ),
1476  $ info )
1477 *
1478  END IF
1479 *
1480  END IF
1481 *
1482  ELSE IF( wntua ) THEN
1483 *
1484  IF( wntvn ) THEN
1485 *
1486 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1487 * M left singular vectors to be computed in U and
1488 * no right singular vectors to be computed
1489 *
1490  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1491 *
1492 * Sufficient workspace for a fast algorithm
1493 *
1494  ir = 1
1495  IF( lwork.GE.wrkbl+lda*n ) THEN
1496 *
1497 * WORK(IR) is LDA by N
1498 *
1499  ldwrkr = lda
1500  ELSE
1501 *
1502 * WORK(IR) is N by N
1503 *
1504  ldwrkr = n
1505  END IF
1506  itau = ir + ldwrkr*n
1507  iwork = itau + n
1508 *
1509 * Compute A=Q*R, copying result to U
1510 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1511 *
1512  CALL dgeqrf( m, n, a, lda, work( itau ),
1513  $ work( iwork ), lwork-iwork+1, ierr )
1514  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1515 *
1516 * Copy R to WORK(IR), zeroing out below it
1517 *
1518  CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1519  $ ldwrkr )
1520  CALL dlaset( 'L', n-1, n-1, zero, zero,
1521  $ work( ir+1 ), ldwrkr )
1522 *
1523 * Generate Q in U
1524 * (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1525 *
1526  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1527  $ work( iwork ), lwork-iwork+1, ierr )
1528  ie = itau
1529  itauq = ie + n
1530  itaup = itauq + n
1531  iwork = itaup + n
1532 *
1533 * Bidiagonalize R in WORK(IR)
1534 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1535 *
1536  CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1537  $ work( ie ), work( itauq ),
1538  $ work( itaup ), work( iwork ),
1539  $ lwork-iwork+1, ierr )
1540 *
1541 * Generate left bidiagonalizing vectors in WORK(IR)
1542 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1543 *
1544  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1545  $ work( itauq ), work( iwork ),
1546  $ lwork-iwork+1, ierr )
1547  iwork = ie + n
1548 *
1549 * Perform bidiagonal QR iteration, computing left
1550 * singular vectors of R in WORK(IR)
1551 * (Workspace: need N*N + BDSPAC)
1552 *
1553  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1554  $ 1, work( ir ), ldwrkr, dum, 1,
1555  $ work( iwork ), info )
1556 *
1557 * Multiply Q in U by left singular vectors of R in
1558 * WORK(IR), storing result in A
1559 * (Workspace: need N*N)
1560 *
1561  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1562  $ work( ir ), ldwrkr, zero, a, lda )
1563 *
1564 * Copy left singular vectors of A from A to U
1565 *
1566  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1567 *
1568  ELSE
1569 *
1570 * Insufficient workspace for a fast algorithm
1571 *
1572  itau = 1
1573  iwork = itau + n
1574 *
1575 * Compute A=Q*R, copying result to U
1576 * (Workspace: need 2*N, prefer N + N*NB)
1577 *
1578  CALL dgeqrf( m, n, a, lda, work( itau ),
1579  $ work( iwork ), lwork-iwork+1, ierr )
1580  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1581 *
1582 * Generate Q in U
1583 * (Workspace: need N + M, prefer N + M*NB)
1584 *
1585  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1586  $ work( iwork ), lwork-iwork+1, ierr )
1587  ie = itau
1588  itauq = ie + n
1589  itaup = itauq + n
1590  iwork = itaup + n
1591 *
1592 * Zero out below R in A
1593 *
1594  IF( n .GT. 1 ) THEN
1595  CALL dlaset( 'L', n-1, n-1, zero, zero,
1596  $ a( 2, 1 ), lda )
1597  END IF
1598 *
1599 * Bidiagonalize R in A
1600 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1601 *
1602  CALL dgebrd( n, n, a, lda, s, work( ie ),
1603  $ work( itauq ), work( itaup ),
1604  $ work( iwork ), lwork-iwork+1, ierr )
1605 *
1606 * Multiply Q in U by left bidiagonalizing vectors
1607 * in A
1608 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1609 *
1610  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1611  $ work( itauq ), u, ldu, work( iwork ),
1612  $ lwork-iwork+1, ierr )
1613  iwork = ie + n
1614 *
1615 * Perform bidiagonal QR iteration, computing left
1616 * singular vectors of A in U
1617 * (Workspace: need BDSPAC)
1618 *
1619  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1620  $ 1, u, ldu, dum, 1, work( iwork ),
1621  $ info )
1622 *
1623  END IF
1624 *
1625  ELSE IF( wntvo ) THEN
1626 *
1627 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1628 * M left singular vectors to be computed in U and
1629 * N right singular vectors to be overwritten on A
1630 *
1631  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1632 *
1633 * Sufficient workspace for a fast algorithm
1634 *
1635  iu = 1
1636  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1637 *
1638 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1639 *
1640  ldwrku = lda
1641  ir = iu + ldwrku*n
1642  ldwrkr = lda
1643  ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1644 *
1645 * WORK(IU) is LDA by N and WORK(IR) is N by N
1646 *
1647  ldwrku = lda
1648  ir = iu + ldwrku*n
1649  ldwrkr = n
1650  ELSE
1651 *
1652 * WORK(IU) is N by N and WORK(IR) is N by N
1653 *
1654  ldwrku = n
1655  ir = iu + ldwrku*n
1656  ldwrkr = n
1657  END IF
1658  itau = ir + ldwrkr*n
1659  iwork = itau + n
1660 *
1661 * Compute A=Q*R, copying result to U
1662 * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1663 *
1664  CALL dgeqrf( m, n, a, lda, work( itau ),
1665  $ work( iwork ), lwork-iwork+1, ierr )
1666  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1667 *
1668 * Generate Q in U
1669 * (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1670 *
1671  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1672  $ work( iwork ), lwork-iwork+1, ierr )
1673 *
1674 * Copy R to WORK(IU), zeroing out below it
1675 *
1676  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1677  $ ldwrku )
1678  CALL dlaset( 'L', n-1, n-1, zero, zero,
1679  $ work( iu+1 ), ldwrku )
1680  ie = itau
1681  itauq = ie + n
1682  itaup = itauq + n
1683  iwork = itaup + n
1684 *
1685 * Bidiagonalize R in WORK(IU), copying result to
1686 * WORK(IR)
1687 * (Workspace: need 2*N*N + 4*N,
1688 * prefer 2*N*N+3*N+2*N*NB)
1689 *
1690  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1691  $ work( ie ), work( itauq ),
1692  $ work( itaup ), work( iwork ),
1693  $ lwork-iwork+1, ierr )
1694  CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1695  $ work( ir ), ldwrkr )
1696 *
1697 * Generate left bidiagonalizing vectors in WORK(IU)
1698 * (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1699 *
1700  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1701  $ work( itauq ), work( iwork ),
1702  $ lwork-iwork+1, ierr )
1703 *
1704 * Generate right bidiagonalizing vectors in WORK(IR)
1705 * (Workspace: need 2*N*N + 4*N-1,
1706 * prefer 2*N*N+3*N+(N-1)*NB)
1707 *
1708  CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1709  $ work( itaup ), work( iwork ),
1710  $ lwork-iwork+1, ierr )
1711  iwork = ie + n
1712 *
1713 * Perform bidiagonal QR iteration, computing left
1714 * singular vectors of R in WORK(IU) and computing
1715 * right singular vectors of R in WORK(IR)
1716 * (Workspace: need 2*N*N + BDSPAC)
1717 *
1718  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1719  $ work( ir ), ldwrkr, work( iu ),
1720  $ ldwrku, dum, 1, work( iwork ), info )
1721 *
1722 * Multiply Q in U by left singular vectors of R in
1723 * WORK(IU), storing result in A
1724 * (Workspace: need N*N)
1725 *
1726  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1727  $ work( iu ), ldwrku, zero, a, lda )
1728 *
1729 * Copy left singular vectors of A from A to U
1730 *
1731  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1732 *
1733 * Copy right singular vectors of R from WORK(IR) to A
1734 *
1735  CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1736  $ lda )
1737 *
1738  ELSE
1739 *
1740 * Insufficient workspace for a fast algorithm
1741 *
1742  itau = 1
1743  iwork = itau + n
1744 *
1745 * Compute A=Q*R, copying result to U
1746 * (Workspace: need 2*N, prefer N + N*NB)
1747 *
1748  CALL dgeqrf( m, n, a, lda, work( itau ),
1749  $ work( iwork ), lwork-iwork+1, ierr )
1750  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1751 *
1752 * Generate Q in U
1753 * (Workspace: need N + M, prefer N + M*NB)
1754 *
1755  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1756  $ work( iwork ), lwork-iwork+1, ierr )
1757  ie = itau
1758  itauq = ie + n
1759  itaup = itauq + n
1760  iwork = itaup + n
1761 *
1762 * Zero out below R in A
1763 *
1764  IF( n .GT. 1 ) THEN
1765  CALL dlaset( 'L', n-1, n-1, zero, zero,
1766  $ a( 2, 1 ), lda )
1767  END IF
1768 *
1769 * Bidiagonalize R in A
1770 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1771 *
1772  CALL dgebrd( n, n, a, lda, s, work( ie ),
1773  $ work( itauq ), work( itaup ),
1774  $ work( iwork ), lwork-iwork+1, ierr )
1775 *
1776 * Multiply Q in U by left bidiagonalizing vectors
1777 * in A
1778 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1779 *
1780  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1781  $ work( itauq ), u, ldu, work( iwork ),
1782  $ lwork-iwork+1, ierr )
1783 *
1784 * Generate right bidiagonalizing vectors in A
1785 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1786 *
1787  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1788  $ work( iwork ), lwork-iwork+1, ierr )
1789  iwork = ie + n
1790 *
1791 * Perform bidiagonal QR iteration, computing left
1792 * singular vectors of A in U and computing right
1793 * singular vectors of A in A
1794 * (Workspace: need BDSPAC)
1795 *
1796  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1797  $ lda, u, ldu, dum, 1, work( iwork ),
1798  $ info )
1799 *
1800  END IF
1801 *
1802  ELSE IF( wntvas ) THEN
1803 *
1804 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1805 * or 'A')
1806 * M left singular vectors to be computed in U and
1807 * N right singular vectors to be computed in VT
1808 *
1809  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1810 *
1811 * Sufficient workspace for a fast algorithm
1812 *
1813  iu = 1
1814  IF( lwork.GE.wrkbl+lda*n ) THEN
1815 *
1816 * WORK(IU) is LDA by N
1817 *
1818  ldwrku = lda
1819  ELSE
1820 *
1821 * WORK(IU) is N by N
1822 *
1823  ldwrku = n
1824  END IF
1825  itau = iu + ldwrku*n
1826  iwork = itau + n
1827 *
1828 * Compute A=Q*R, copying result to U
1829 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1830 *
1831  CALL dgeqrf( m, n, a, lda, work( itau ),
1832  $ work( iwork ), lwork-iwork+1, ierr )
1833  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1834 *
1835 * Generate Q in U
1836 * (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1837 *
1838  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1839  $ work( iwork ), lwork-iwork+1, ierr )
1840 *
1841 * Copy R to WORK(IU), zeroing out below it
1842 *
1843  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1844  $ ldwrku )
1845  CALL dlaset( 'L', n-1, n-1, zero, zero,
1846  $ work( iu+1 ), ldwrku )
1847  ie = itau
1848  itauq = ie + n
1849  itaup = itauq + n
1850  iwork = itaup + n
1851 *
1852 * Bidiagonalize R in WORK(IU), copying result to VT
1853 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1854 *
1855  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1856  $ work( ie ), work( itauq ),
1857  $ work( itaup ), work( iwork ),
1858  $ lwork-iwork+1, ierr )
1859  CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1860  $ ldvt )
1861 *
1862 * Generate left bidiagonalizing vectors in WORK(IU)
1863 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1864 *
1865  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1866  $ work( itauq ), work( iwork ),
1867  $ lwork-iwork+1, ierr )
1868 *
1869 * Generate right bidiagonalizing vectors in VT
1870 * (Workspace: need N*N + 4*N-1,
1871 * prefer N*N+3*N+(N-1)*NB)
1872 *
1873  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1874  $ work( iwork ), lwork-iwork+1, ierr )
1875  iwork = ie + n
1876 *
1877 * Perform bidiagonal QR iteration, computing left
1878 * singular vectors of R in WORK(IU) and computing
1879 * right singular vectors of R in VT
1880 * (Workspace: need N*N + BDSPAC)
1881 *
1882  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1883  $ ldvt, work( iu ), ldwrku, dum, 1,
1884  $ work( iwork ), info )
1885 *
1886 * Multiply Q in U by left singular vectors of R in
1887 * WORK(IU), storing result in A
1888 * (Workspace: need N*N)
1889 *
1890  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1891  $ work( iu ), ldwrku, zero, a, lda )
1892 *
1893 * Copy left singular vectors of A from A to U
1894 *
1895  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1896 *
1897  ELSE
1898 *
1899 * Insufficient workspace for a fast algorithm
1900 *
1901  itau = 1
1902  iwork = itau + n
1903 *
1904 * Compute A=Q*R, copying result to U
1905 * (Workspace: need 2*N, prefer N + N*NB)
1906 *
1907  CALL dgeqrf( m, n, a, lda, work( itau ),
1908  $ work( iwork ), lwork-iwork+1, ierr )
1909  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1910 *
1911 * Generate Q in U
1912 * (Workspace: need N + M, prefer N + M*NB)
1913 *
1914  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1915  $ work( iwork ), lwork-iwork+1, ierr )
1916 *
1917 * Copy R from A to VT, zeroing out below it
1918 *
1919  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1920  IF( n.GT.1 )
1921  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1922  $ vt( 2, 1 ), ldvt )
1923  ie = itau
1924  itauq = ie + n
1925  itaup = itauq + n
1926  iwork = itaup + n
1927 *
1928 * Bidiagonalize R in VT
1929 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1930 *
1931  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1932  $ work( itauq ), work( itaup ),
1933  $ work( iwork ), lwork-iwork+1, ierr )
1934 *
1935 * Multiply Q in U by left bidiagonalizing vectors
1936 * in VT
1937 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1938 *
1939  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1940  $ work( itauq ), u, ldu, work( iwork ),
1941  $ lwork-iwork+1, ierr )
1942 *
1943 * Generate right bidiagonalizing vectors in VT
1944 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1945 *
1946  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1947  $ work( iwork ), lwork-iwork+1, ierr )
1948  iwork = ie + n
1949 *
1950 * Perform bidiagonal QR iteration, computing left
1951 * singular vectors of A in U and computing right
1952 * singular vectors of A in VT
1953 * (Workspace: need BDSPAC)
1954 *
1955  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1956  $ ldvt, u, ldu, dum, 1, work( iwork ),
1957  $ info )
1958 *
1959  END IF
1960 *
1961  END IF
1962 *
1963  END IF
1964 *
1965  ELSE
1966 *
1967 * M .LT. MNTHR
1968 *
1969 * Path 10 (M at least N, but not much larger)
1970 * Reduce to bidiagonal form without QR decomposition
1971 *
1972  ie = 1
1973  itauq = ie + n
1974  itaup = itauq + n
1975  iwork = itaup + n
1976 *
1977 * Bidiagonalize A
1978 * (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1979 *
1980  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1981  $ work( itaup ), work( iwork ), lwork-iwork+1,
1982  $ ierr )
1983  IF( wntuas ) THEN
1984 *
1985 * If left singular vectors desired in U, copy result to U
1986 * and generate left bidiagonalizing vectors in U
1987 * (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1988 *
1989  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1990  IF( wntus )
1991  $ ncu = n
1992  IF( wntua )
1993  $ ncu = m
1994  CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1995  $ work( iwork ), lwork-iwork+1, ierr )
1996  END IF
1997  IF( wntvas ) THEN
1998 *
1999 * If right singular vectors desired in VT, copy result to
2000 * VT and generate right bidiagonalizing vectors in VT
2001 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2002 *
2003  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
2004  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2005  $ work( iwork ), lwork-iwork+1, ierr )
2006  END IF
2007  IF( wntuo ) THEN
2008 *
2009 * If left singular vectors desired in A, generate left
2010 * bidiagonalizing vectors in A
2011 * (Workspace: need 4*N, prefer 3*N + N*NB)
2012 *
2013  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2014  $ work( iwork ), lwork-iwork+1, ierr )
2015  END IF
2016  IF( wntvo ) THEN
2017 *
2018 * If right singular vectors desired in A, generate right
2019 * bidiagonalizing vectors in A
2020 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2021 *
2022  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2023  $ work( iwork ), lwork-iwork+1, ierr )
2024  END IF
2025  iwork = ie + n
2026  IF( wntuas .OR. wntuo )
2027  $ nru = m
2028  IF( wntun )
2029  $ nru = 0
2030  IF( wntvas .OR. wntvo )
2031  $ ncvt = n
2032  IF( wntvn )
2033  $ ncvt = 0
2034  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2035 *
2036 * Perform bidiagonal QR iteration, if desired, computing
2037 * left singular vectors in U and computing right singular
2038 * vectors in VT
2039 * (Workspace: need BDSPAC)
2040 *
2041  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2042  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2043  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2044 *
2045 * Perform bidiagonal QR iteration, if desired, computing
2046 * left singular vectors in U and computing right singular
2047 * vectors in A
2048 * (Workspace: need BDSPAC)
2049 *
2050  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2051  $ u, ldu, dum, 1, work( iwork ), info )
2052  ELSE
2053 *
2054 * Perform bidiagonal QR iteration, if desired, computing
2055 * left singular vectors in A and computing right singular
2056 * vectors in VT
2057 * (Workspace: need BDSPAC)
2058 *
2059  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2060  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2061  END IF
2062 *
2063  END IF
2064 *
2065  ELSE
2066 *
2067 * A has more columns than rows. If A has sufficiently more
2068 * columns than rows, first reduce using the LQ decomposition (if
2069 * sufficient workspace available)
2070 *
2071  IF( n.GE.mnthr ) THEN
2072 *
2073  IF( wntvn ) THEN
2074 *
2075 * Path 1t(N much larger than M, JOBVT='N')
2076 * No right singular vectors to be computed
2077 *
2078  itau = 1
2079  iwork = itau + m
2080 *
2081 * Compute A=L*Q
2082 * (Workspace: need 2*M, prefer M + M*NB)
2083 *
2084  CALL dgelqf( m, n, a, lda, work( itau ), work( iwork ),
2085  $ lwork-iwork+1, ierr )
2086 *
2087 * Zero out above L
2088 *
2089  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2090  ie = 1
2091  itauq = ie + m
2092  itaup = itauq + m
2093  iwork = itaup + m
2094 *
2095 * Bidiagonalize L in A
2096 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2097 *
2098  CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2099  $ work( itaup ), work( iwork ), lwork-iwork+1,
2100  $ ierr )
2101  IF( wntuo .OR. wntuas ) THEN
2102 *
2103 * If left singular vectors desired, generate Q
2104 * (Workspace: need 4*M, prefer 3*M + M*NB)
2105 *
2106  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2107  $ work( iwork ), lwork-iwork+1, ierr )
2108  END IF
2109  iwork = ie + m
2110  nru = 0
2111  IF( wntuo .OR. wntuas )
2112  $ nru = m
2113 *
2114 * Perform bidiagonal QR iteration, computing left singular
2115 * vectors of A in A if desired
2116 * (Workspace: need BDSPAC)
2117 *
2118  CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2119  $ lda, dum, 1, work( iwork ), info )
2120 *
2121 * If left singular vectors desired in U, copy them there
2122 *
2123  IF( wntuas )
2124  $ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2125 *
2126  ELSE IF( wntvo .AND. wntun ) THEN
2127 *
2128 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2129 * M right singular vectors to be overwritten on A and
2130 * no left singular vectors to be computed
2131 *
2132  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2133 *
2134 * Sufficient workspace for a fast algorithm
2135 *
2136  ir = 1
2137  IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2138 *
2139 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2140 *
2141  ldwrku = lda
2142  chunk = n
2143  ldwrkr = lda
2144  ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2145 *
2146 * WORK(IU) is LDA by N and WORK(IR) is M by M
2147 *
2148  ldwrku = lda
2149  chunk = n
2150  ldwrkr = m
2151  ELSE
2152 *
2153 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2154 *
2155  ldwrku = m
2156  chunk = ( lwork-m*m-m ) / m
2157  ldwrkr = m
2158  END IF
2159  itau = ir + ldwrkr*m
2160  iwork = itau + m
2161 *
2162 * Compute A=L*Q
2163 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2164 *
2165  CALL dgelqf( m, n, a, lda, work( itau ),
2166  $ work( iwork ), lwork-iwork+1, ierr )
2167 *
2168 * Copy L to WORK(IR) and zero out above it
2169 *
2170  CALL dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2171  CALL dlaset( 'U', m-1, m-1, zero, zero,
2172  $ work( ir+ldwrkr ), ldwrkr )
2173 *
2174 * Generate Q in A
2175 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2176 *
2177  CALL dorglq( m, n, m, a, lda, work( itau ),
2178  $ work( iwork ), lwork-iwork+1, ierr )
2179  ie = itau
2180  itauq = ie + m
2181  itaup = itauq + m
2182  iwork = itaup + m
2183 *
2184 * Bidiagonalize L in WORK(IR)
2185 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2186 *
2187  CALL dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2188  $ work( itauq ), work( itaup ),
2189  $ work( iwork ), lwork-iwork+1, ierr )
2190 *
2191 * Generate right vectors bidiagonalizing L
2192 * (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2193 *
2194  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2195  $ work( itaup ), work( iwork ),
2196  $ lwork-iwork+1, ierr )
2197  iwork = ie + m
2198 *
2199 * Perform bidiagonal QR iteration, computing right
2200 * singular vectors of L in WORK(IR)
2201 * (Workspace: need M*M + BDSPAC)
2202 *
2203  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2204  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2205  $ work( iwork ), info )
2206  iu = ie + m
2207 *
2208 * Multiply right singular vectors of L in WORK(IR) by Q
2209 * in A, storing result in WORK(IU) and copying to A
2210 * (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2211 *
2212  DO 30 i = 1, n, chunk
2213  blk = min( n-i+1, chunk )
2214  CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2215  $ ldwrkr, a( 1, i ), lda, zero,
2216  $ work( iu ), ldwrku )
2217  CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2218  $ a( 1, i ), lda )
2219  30 CONTINUE
2220 *
2221  ELSE
2222 *
2223 * Insufficient workspace for a fast algorithm
2224 *
2225  ie = 1
2226  itauq = ie + m
2227  itaup = itauq + m
2228  iwork = itaup + m
2229 *
2230 * Bidiagonalize A
2231 * (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2232 *
2233  CALL dgebrd( m, n, a, lda, s, work( ie ),
2234  $ work( itauq ), work( itaup ),
2235  $ work( iwork ), lwork-iwork+1, ierr )
2236 *
2237 * Generate right vectors bidiagonalizing A
2238 * (Workspace: need 4*M, prefer 3*M + M*NB)
2239 *
2240  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2241  $ work( iwork ), lwork-iwork+1, ierr )
2242  iwork = ie + m
2243 *
2244 * Perform bidiagonal QR iteration, computing right
2245 * singular vectors of A in A
2246 * (Workspace: need BDSPAC)
2247 *
2248  CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2249  $ dum, 1, dum, 1, work( iwork ), info )
2250 *
2251  END IF
2252 *
2253  ELSE IF( wntvo .AND. wntuas ) THEN
2254 *
2255 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2256 * M right singular vectors to be overwritten on A and
2257 * M left singular vectors to be computed in U
2258 *
2259  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2260 *
2261 * Sufficient workspace for a fast algorithm
2262 *
2263  ir = 1
2264  IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2265 *
2266 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2267 *
2268  ldwrku = lda
2269  chunk = n
2270  ldwrkr = lda
2271  ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2272 *
2273 * WORK(IU) is LDA by N and WORK(IR) is M by M
2274 *
2275  ldwrku = lda
2276  chunk = n
2277  ldwrkr = m
2278  ELSE
2279 *
2280 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2281 *
2282  ldwrku = m
2283  chunk = ( lwork-m*m-m ) / m
2284  ldwrkr = m
2285  END IF
2286  itau = ir + ldwrkr*m
2287  iwork = itau + m
2288 *
2289 * Compute A=L*Q
2290 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2291 *
2292  CALL dgelqf( m, n, a, lda, work( itau ),
2293  $ work( iwork ), lwork-iwork+1, ierr )
2294 *
2295 * Copy L to U, zeroing about above it
2296 *
2297  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2298  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2299  $ ldu )
2300 *
2301 * Generate Q in A
2302 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2303 *
2304  CALL dorglq( m, n, m, a, lda, work( itau ),
2305  $ work( iwork ), lwork-iwork+1, ierr )
2306  ie = itau
2307  itauq = ie + m
2308  itaup = itauq + m
2309  iwork = itaup + m
2310 *
2311 * Bidiagonalize L in U, copying result to WORK(IR)
2312 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2313 *
2314  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2315  $ work( itauq ), work( itaup ),
2316  $ work( iwork ), lwork-iwork+1, ierr )
2317  CALL dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2318 *
2319 * Generate right vectors bidiagonalizing L in WORK(IR)
2320 * (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2321 *
2322  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2323  $ work( itaup ), work( iwork ),
2324  $ lwork-iwork+1, ierr )
2325 *
2326 * Generate left vectors bidiagonalizing L in U
2327 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2328 *
2329  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2330  $ work( iwork ), lwork-iwork+1, ierr )
2331  iwork = ie + m
2332 *
2333 * Perform bidiagonal QR iteration, computing left
2334 * singular vectors of L in U, and computing right
2335 * singular vectors of L in WORK(IR)
2336 * (Workspace: need M*M + BDSPAC)
2337 *
2338  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2339  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2340  $ work( iwork ), info )
2341  iu = ie + m
2342 *
2343 * Multiply right singular vectors of L in WORK(IR) by Q
2344 * in A, storing result in WORK(IU) and copying to A
2345 * (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2346 *
2347  DO 40 i = 1, n, chunk
2348  blk = min( n-i+1, chunk )
2349  CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2350  $ ldwrkr, a( 1, i ), lda, zero,
2351  $ work( iu ), ldwrku )
2352  CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2353  $ a( 1, i ), lda )
2354  40 CONTINUE
2355 *
2356  ELSE
2357 *
2358 * Insufficient workspace for a fast algorithm
2359 *
2360  itau = 1
2361  iwork = itau + m
2362 *
2363 * Compute A=L*Q
2364 * (Workspace: need 2*M, prefer M + M*NB)
2365 *
2366  CALL dgelqf( m, n, a, lda, work( itau ),
2367  $ work( iwork ), lwork-iwork+1, ierr )
2368 *
2369 * Copy L to U, zeroing out above it
2370 *
2371  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2372  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2373  $ ldu )
2374 *
2375 * Generate Q in A
2376 * (Workspace: need 2*M, prefer M + M*NB)
2377 *
2378  CALL dorglq( m, n, m, a, lda, work( itau ),
2379  $ work( iwork ), lwork-iwork+1, ierr )
2380  ie = itau
2381  itauq = ie + m
2382  itaup = itauq + m
2383  iwork = itaup + m
2384 *
2385 * Bidiagonalize L in U
2386 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2387 *
2388  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2389  $ work( itauq ), work( itaup ),
2390  $ work( iwork ), lwork-iwork+1, ierr )
2391 *
2392 * Multiply right vectors bidiagonalizing L by Q in A
2393 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2394 *
2395  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2396  $ work( itaup ), a, lda, work( iwork ),
2397  $ lwork-iwork+1, ierr )
2398 *
2399 * Generate left vectors bidiagonalizing L in U
2400 * (Workspace: need 4*M, prefer 3*M + M*NB)
2401 *
2402  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2403  $ work( iwork ), lwork-iwork+1, ierr )
2404  iwork = ie + m
2405 *
2406 * Perform bidiagonal QR iteration, computing left
2407 * singular vectors of A in U and computing right
2408 * singular vectors of A in A
2409 * (Workspace: need BDSPAC)
2410 *
2411  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2412  $ u, ldu, dum, 1, work( iwork ), info )
2413 *
2414  END IF
2415 *
2416  ELSE IF( wntvs ) THEN
2417 *
2418  IF( wntun ) THEN
2419 *
2420 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2421 * M right singular vectors to be computed in VT and
2422 * no left singular vectors to be computed
2423 *
2424  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2425 *
2426 * Sufficient workspace for a fast algorithm
2427 *
2428  ir = 1
2429  IF( lwork.GE.wrkbl+lda*m ) THEN
2430 *
2431 * WORK(IR) is LDA by M
2432 *
2433  ldwrkr = lda
2434  ELSE
2435 *
2436 * WORK(IR) is M by M
2437 *
2438  ldwrkr = m
2439  END IF
2440  itau = ir + ldwrkr*m
2441  iwork = itau + m
2442 *
2443 * Compute A=L*Q
2444 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2445 *
2446  CALL dgelqf( m, n, a, lda, work( itau ),
2447  $ work( iwork ), lwork-iwork+1, ierr )
2448 *
2449 * Copy L to WORK(IR), zeroing out above it
2450 *
2451  CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2452  $ ldwrkr )
2453  CALL dlaset( 'U', m-1, m-1, zero, zero,
2454  $ work( ir+ldwrkr ), ldwrkr )
2455 *
2456 * Generate Q in A
2457 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2458 *
2459  CALL dorglq( m, n, m, a, lda, work( itau ),
2460  $ work( iwork ), lwork-iwork+1, ierr )
2461  ie = itau
2462  itauq = ie + m
2463  itaup = itauq + m
2464  iwork = itaup + m
2465 *
2466 * Bidiagonalize L in WORK(IR)
2467 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2468 *
2469  CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2470  $ work( ie ), work( itauq ),
2471  $ work( itaup ), work( iwork ),
2472  $ lwork-iwork+1, ierr )
2473 *
2474 * Generate right vectors bidiagonalizing L in
2475 * WORK(IR)
2476 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2477 *
2478  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2479  $ work( itaup ), work( iwork ),
2480  $ lwork-iwork+1, ierr )
2481  iwork = ie + m
2482 *
2483 * Perform bidiagonal QR iteration, computing right
2484 * singular vectors of L in WORK(IR)
2485 * (Workspace: need M*M + BDSPAC)
2486 *
2487  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2488  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2489  $ work( iwork ), info )
2490 *
2491 * Multiply right singular vectors of L in WORK(IR) by
2492 * Q in A, storing result in VT
2493 * (Workspace: need M*M)
2494 *
2495  CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2496  $ ldwrkr, a, lda, zero, vt, ldvt )
2497 *
2498  ELSE
2499 *
2500 * Insufficient workspace for a fast algorithm
2501 *
2502  itau = 1
2503  iwork = itau + m
2504 *
2505 * Compute A=L*Q
2506 * (Workspace: need 2*M, prefer M + M*NB)
2507 *
2508  CALL dgelqf( m, n, a, lda, work( itau ),
2509  $ work( iwork ), lwork-iwork+1, ierr )
2510 *
2511 * Copy result to VT
2512 *
2513  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2514 *
2515 * Generate Q in VT
2516 * (Workspace: need 2*M, prefer M + M*NB)
2517 *
2518  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2519  $ work( iwork ), lwork-iwork+1, ierr )
2520  ie = itau
2521  itauq = ie + m
2522  itaup = itauq + m
2523  iwork = itaup + m
2524 *
2525 * Zero out above L in A
2526 *
2527  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2528  $ lda )
2529 *
2530 * Bidiagonalize L in A
2531 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2532 *
2533  CALL dgebrd( m, m, a, lda, s, work( ie ),
2534  $ work( itauq ), work( itaup ),
2535  $ work( iwork ), lwork-iwork+1, ierr )
2536 *
2537 * Multiply right vectors bidiagonalizing L by Q in VT
2538 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2539 *
2540  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2541  $ work( itaup ), vt, ldvt,
2542  $ work( iwork ), lwork-iwork+1, ierr )
2543  iwork = ie + m
2544 *
2545 * Perform bidiagonal QR iteration, computing right
2546 * singular vectors of A in VT
2547 * (Workspace: need BDSPAC)
2548 *
2549  CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2550  $ ldvt, dum, 1, dum, 1, work( iwork ),
2551  $ info )
2552 *
2553  END IF
2554 *
2555  ELSE IF( wntuo ) THEN
2556 *
2557 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2558 * M right singular vectors to be computed in VT and
2559 * M left singular vectors to be overwritten on A
2560 *
2561  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2562 *
2563 * Sufficient workspace for a fast algorithm
2564 *
2565  iu = 1
2566  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2567 *
2568 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2569 *
2570  ldwrku = lda
2571  ir = iu + ldwrku*m
2572  ldwrkr = lda
2573  ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
2574 *
2575 * WORK(IU) is LDA by M and WORK(IR) is M by M
2576 *
2577  ldwrku = lda
2578  ir = iu + ldwrku*m
2579  ldwrkr = m
2580  ELSE
2581 *
2582 * WORK(IU) is M by M and WORK(IR) is M by M
2583 *
2584  ldwrku = m
2585  ir = iu + ldwrku*m
2586  ldwrkr = m
2587  END IF
2588  itau = ir + ldwrkr*m
2589  iwork = itau + m
2590 *
2591 * Compute A=L*Q
2592 * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2593 *
2594  CALL dgelqf( m, n, a, lda, work( itau ),
2595  $ work( iwork ), lwork-iwork+1, ierr )
2596 *
2597 * Copy L to WORK(IU), zeroing out below it
2598 *
2599  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2600  $ ldwrku )
2601  CALL dlaset( 'U', m-1, m-1, zero, zero,
2602  $ work( iu+ldwrku ), ldwrku )
2603 *
2604 * Generate Q in A
2605 * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2606 *
2607  CALL dorglq( m, n, m, a, lda, work( itau ),
2608  $ work( iwork ), lwork-iwork+1, ierr )
2609  ie = itau
2610  itauq = ie + m
2611  itaup = itauq + m
2612  iwork = itaup + m
2613 *
2614 * Bidiagonalize L in WORK(IU), copying result to
2615 * WORK(IR)
2616 * (Workspace: need 2*M*M + 4*M,
2617 * prefer 2*M*M+3*M+2*M*NB)
2618 *
2619  CALL dgebrd( m, m, work( iu ), ldwrku, s,
2620  $ work( ie ), work( itauq ),
2621  $ work( itaup ), work( iwork ),
2622  $ lwork-iwork+1, ierr )
2623  CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2624  $ work( ir ), ldwrkr )
2625 *
2626 * Generate right bidiagonalizing vectors in WORK(IU)
2627 * (Workspace: need 2*M*M + 4*M-1,
2628 * prefer 2*M*M+3*M+(M-1)*NB)
2629 *
2630  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2631  $ work( itaup ), work( iwork ),
2632  $ lwork-iwork+1, ierr )
2633 *
2634 * Generate left bidiagonalizing vectors in WORK(IR)
2635 * (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2636 *
2637  CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2638  $ work( itauq ), work( iwork ),
2639  $ lwork-iwork+1, ierr )
2640  iwork = ie + m
2641 *
2642 * Perform bidiagonal QR iteration, computing left
2643 * singular vectors of L in WORK(IR) and computing
2644 * right singular vectors of L in WORK(IU)
2645 * (Workspace: need 2*M*M + BDSPAC)
2646 *
2647  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2648  $ work( iu ), ldwrku, work( ir ),
2649  $ ldwrkr, dum, 1, work( iwork ), info )
2650 *
2651 * Multiply right singular vectors of L in WORK(IU) by
2652 * Q in A, storing result in VT
2653 * (Workspace: need M*M)
2654 *
2655  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2656  $ ldwrku, a, lda, zero, vt, ldvt )
2657 *
2658 * Copy left singular vectors of L to A
2659 * (Workspace: need M*M)
2660 *
2661  CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2662  $ lda )
2663 *
2664  ELSE
2665 *
2666 * Insufficient workspace for a fast algorithm
2667 *
2668  itau = 1
2669  iwork = itau + m
2670 *
2671 * Compute A=L*Q, copying result to VT
2672 * (Workspace: need 2*M, prefer M + M*NB)
2673 *
2674  CALL dgelqf( m, n, a, lda, work( itau ),
2675  $ work( iwork ), lwork-iwork+1, ierr )
2676  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2677 *
2678 * Generate Q in VT
2679 * (Workspace: need 2*M, prefer M + M*NB)
2680 *
2681  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2682  $ work( iwork ), lwork-iwork+1, ierr )
2683  ie = itau
2684  itauq = ie + m
2685  itaup = itauq + m
2686  iwork = itaup + m
2687 *
2688 * Zero out above L in A
2689 *
2690  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2691  $ lda )
2692 *
2693 * Bidiagonalize L in A
2694 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2695 *
2696  CALL dgebrd( m, m, a, lda, s, work( ie ),
2697  $ work( itauq ), work( itaup ),
2698  $ work( iwork ), lwork-iwork+1, ierr )
2699 *
2700 * Multiply right vectors bidiagonalizing L by Q in VT
2701 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2702 *
2703  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2704  $ work( itaup ), vt, ldvt,
2705  $ work( iwork ), lwork-iwork+1, ierr )
2706 *
2707 * Generate left bidiagonalizing vectors of L in A
2708 * (Workspace: need 4*M, prefer 3*M + M*NB)
2709 *
2710  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2711  $ work( iwork ), lwork-iwork+1, ierr )
2712  iwork = ie + m
2713 *
2714 * Perform bidiagonal QR iteration, compute left
2715 * singular vectors of A in A and compute right
2716 * singular vectors of A in VT
2717 * (Workspace: need BDSPAC)
2718 *
2719  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2720  $ ldvt, a, lda, dum, 1, work( iwork ),
2721  $ info )
2722 *
2723  END IF
2724 *
2725  ELSE IF( wntuas ) THEN
2726 *
2727 * Path 6t(N much larger than M, JOBU='S' or 'A',
2728 * JOBVT='S')
2729 * M right singular vectors to be computed in VT and
2730 * M left singular vectors to be computed in U
2731 *
2732  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2733 *
2734 * Sufficient workspace for a fast algorithm
2735 *
2736  iu = 1
2737  IF( lwork.GE.wrkbl+lda*m ) THEN
2738 *
2739 * WORK(IU) is LDA by N
2740 *
2741  ldwrku = lda
2742  ELSE
2743 *
2744 * WORK(IU) is LDA by M
2745 *
2746  ldwrku = m
2747  END IF
2748  itau = iu + ldwrku*m
2749  iwork = itau + m
2750 *
2751 * Compute A=L*Q
2752 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2753 *
2754  CALL dgelqf( m, n, a, lda, work( itau ),
2755  $ work( iwork ), lwork-iwork+1, ierr )
2756 *
2757 * Copy L to WORK(IU), zeroing out above it
2758 *
2759  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2760  $ ldwrku )
2761  CALL dlaset( 'U', m-1, m-1, zero, zero,
2762  $ work( iu+ldwrku ), ldwrku )
2763 *
2764 * Generate Q in A
2765 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2766 *
2767  CALL dorglq( m, n, m, a, lda, work( itau ),
2768  $ work( iwork ), lwork-iwork+1, ierr )
2769  ie = itau
2770  itauq = ie + m
2771  itaup = itauq + m
2772  iwork = itaup + m
2773 *
2774 * Bidiagonalize L in WORK(IU), copying result to U
2775 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2776 *
2777  CALL dgebrd( m, m, work( iu ), ldwrku, s,
2778  $ work( ie ), work( itauq ),
2779  $ work( itaup ), work( iwork ),
2780  $ lwork-iwork+1, ierr )
2781  CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2782  $ ldu )
2783 *
2784 * Generate right bidiagonalizing vectors in WORK(IU)
2785 * (Workspace: need M*M + 4*M-1,
2786 * prefer M*M+3*M+(M-1)*NB)
2787 *
2788  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2789  $ work( itaup ), work( iwork ),
2790  $ lwork-iwork+1, ierr )
2791 *
2792 * Generate left bidiagonalizing vectors in U
2793 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2794 *
2795  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2796  $ work( iwork ), lwork-iwork+1, ierr )
2797  iwork = ie + m
2798 *
2799 * Perform bidiagonal QR iteration, computing left
2800 * singular vectors of L in U and computing right
2801 * singular vectors of L in WORK(IU)
2802 * (Workspace: need M*M + BDSPAC)
2803 *
2804  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2805  $ work( iu ), ldwrku, u, ldu, dum, 1,
2806  $ work( iwork ), info )
2807 *
2808 * Multiply right singular vectors of L in WORK(IU) by
2809 * Q in A, storing result in VT
2810 * (Workspace: need M*M)
2811 *
2812  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2813  $ ldwrku, a, lda, zero, vt, ldvt )
2814 *
2815  ELSE
2816 *
2817 * Insufficient workspace for a fast algorithm
2818 *
2819  itau = 1
2820  iwork = itau + m
2821 *
2822 * Compute A=L*Q, copying result to VT
2823 * (Workspace: need 2*M, prefer M + M*NB)
2824 *
2825  CALL dgelqf( m, n, a, lda, work( itau ),
2826  $ work( iwork ), lwork-iwork+1, ierr )
2827  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2828 *
2829 * Generate Q in VT
2830 * (Workspace: need 2*M, prefer M + M*NB)
2831 *
2832  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2833  $ work( iwork ), lwork-iwork+1, ierr )
2834 *
2835 * Copy L to U, zeroing out above it
2836 *
2837  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2838  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2839  $ ldu )
2840  ie = itau
2841  itauq = ie + m
2842  itaup = itauq + m
2843  iwork = itaup + m
2844 *
2845 * Bidiagonalize L in U
2846 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2847 *
2848  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2849  $ work( itauq ), work( itaup ),
2850  $ work( iwork ), lwork-iwork+1, ierr )
2851 *
2852 * Multiply right bidiagonalizing vectors in U by Q
2853 * in VT
2854 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2855 *
2856  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2857  $ work( itaup ), vt, ldvt,
2858  $ work( iwork ), lwork-iwork+1, ierr )
2859 *
2860 * Generate left bidiagonalizing vectors in U
2861 * (Workspace: need 4*M, prefer 3*M + M*NB)
2862 *
2863  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2864  $ work( iwork ), lwork-iwork+1, ierr )
2865  iwork = ie + m
2866 *
2867 * Perform bidiagonal QR iteration, computing left
2868 * singular vectors of A in U and computing right
2869 * singular vectors of A in VT
2870 * (Workspace: need BDSPAC)
2871 *
2872  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2873  $ ldvt, u, ldu, dum, 1, work( iwork ),
2874  $ info )
2875 *
2876  END IF
2877 *
2878  END IF
2879 *
2880  ELSE IF( wntva ) THEN
2881 *
2882  IF( wntun ) THEN
2883 *
2884 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2885 * N right singular vectors to be computed in VT and
2886 * no left singular vectors to be computed
2887 *
2888  IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
2889 *
2890 * Sufficient workspace for a fast algorithm
2891 *
2892  ir = 1
2893  IF( lwork.GE.wrkbl+lda*m ) THEN
2894 *
2895 * WORK(IR) is LDA by M
2896 *
2897  ldwrkr = lda
2898  ELSE
2899 *
2900 * WORK(IR) is M by M
2901 *
2902  ldwrkr = m
2903  END IF
2904  itau = ir + ldwrkr*m
2905  iwork = itau + m
2906 *
2907 * Compute A=L*Q, copying result to VT
2908 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2909 *
2910  CALL dgelqf( m, n, a, lda, work( itau ),
2911  $ work( iwork ), lwork-iwork+1, ierr )
2912  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2913 *
2914 * Copy L to WORK(IR), zeroing out above it
2915 *
2916  CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2917  $ ldwrkr )
2918  CALL dlaset( 'U', m-1, m-1, zero, zero,
2919  $ work( ir+ldwrkr ), ldwrkr )
2920 *
2921 * Generate Q in VT
2922 * (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2923 *
2924  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2925  $ work( iwork ), lwork-iwork+1, ierr )
2926  ie = itau
2927  itauq = ie + m
2928  itaup = itauq + m
2929  iwork = itaup + m
2930 *
2931 * Bidiagonalize L in WORK(IR)
2932 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2933 *
2934  CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2935  $ work( ie ), work( itauq ),
2936  $ work( itaup ), work( iwork ),
2937  $ lwork-iwork+1, ierr )
2938 *
2939 * Generate right bidiagonalizing vectors in WORK(IR)
2940 * (Workspace: need M*M + 4*M-1,
2941 * prefer M*M+3*M+(M-1)*NB)
2942 *
2943  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2944  $ work( itaup ), work( iwork ),
2945  $ lwork-iwork+1, ierr )
2946  iwork = ie + m
2947 *
2948 * Perform bidiagonal QR iteration, computing right
2949 * singular vectors of L in WORK(IR)
2950 * (Workspace: need M*M + BDSPAC)
2951 *
2952  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2953  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2954  $ work( iwork ), info )
2955 *
2956 * Multiply right singular vectors of L in WORK(IR) by
2957 * Q in VT, storing result in A
2958 * (Workspace: need M*M)
2959 *
2960  CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2961  $ ldwrkr, vt, ldvt, zero, a, lda )
2962 *
2963 * Copy right singular vectors of A from A to VT
2964 *
2965  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
2966 *
2967  ELSE
2968 *
2969 * Insufficient workspace for a fast algorithm
2970 *
2971  itau = 1
2972  iwork = itau + m
2973 *
2974 * Compute A=L*Q, copying result to VT
2975 * (Workspace: need 2*M, prefer M + M*NB)
2976 *
2977  CALL dgelqf( m, n, a, lda, work( itau ),
2978  $ work( iwork ), lwork-iwork+1, ierr )
2979  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2980 *
2981 * Generate Q in VT
2982 * (Workspace: need M + N, prefer M + N*NB)
2983 *
2984  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2985  $ work( iwork ), lwork-iwork+1, ierr )
2986  ie = itau
2987  itauq = ie + m
2988  itaup = itauq + m
2989  iwork = itaup + m
2990 *
2991 * Zero out above L in A
2992 *
2993  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2994  $ lda )
2995 *
2996 * Bidiagonalize L in A
2997 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2998 *
2999  CALL dgebrd( m, m, a, lda, s, work( ie ),
3000  $ work( itauq ), work( itaup ),
3001  $ work( iwork ), lwork-iwork+1, ierr )
3002 *
3003 * Multiply right bidiagonalizing vectors in A by Q
3004 * in VT
3005 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
3006 *
3007  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3008  $ work( itaup ), vt, ldvt,
3009  $ work( iwork ), lwork-iwork+1, ierr )
3010  iwork = ie + m
3011 *
3012 * Perform bidiagonal QR iteration, computing right
3013 * singular vectors of A in VT
3014 * (Workspace: need BDSPAC)
3015 *
3016  CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3017  $ ldvt, dum, 1, dum, 1, work( iwork ),
3018  $ info )
3019 *
3020  END IF
3021 *
3022  ELSE IF( wntuo ) THEN
3023 *
3024 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3025 * N right singular vectors to be computed in VT and
3026 * M left singular vectors to be overwritten on A
3027 *
3028  IF( lwork.GE.2*m*m+max( n + m, 4*m, bdspac ) ) THEN
3029 *
3030 * Sufficient workspace for a fast algorithm
3031 *
3032  iu = 1
3033  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3034 *
3035 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3036 *
3037  ldwrku = lda
3038  ir = iu + ldwrku*m
3039  ldwrkr = lda
3040  ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
3041 *
3042 * WORK(IU) is LDA by M and WORK(IR) is M by M
3043 *
3044  ldwrku = lda
3045  ir = iu + ldwrku*m
3046  ldwrkr = m
3047  ELSE
3048 *
3049 * WORK(IU) is M by M and WORK(IR) is M by M
3050 *
3051  ldwrku = m
3052  ir = iu + ldwrku*m
3053  ldwrkr = m
3054  END IF
3055  itau = ir + ldwrkr*m
3056  iwork = itau + m
3057 *
3058 * Compute A=L*Q, copying result to VT
3059 * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3060 *
3061  CALL dgelqf( m, n, a, lda, work( itau ),
3062  $ work( iwork ), lwork-iwork+1, ierr )
3063  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3064 *
3065 * Generate Q in VT
3066 * (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3067 *
3068  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3069  $ work( iwork ), lwork-iwork+1, ierr )
3070 *
3071 * Copy L to WORK(IU), zeroing out above it
3072 *
3073  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3074  $ ldwrku )
3075  CALL dlaset( 'U', m-1, m-1, zero, zero,
3076  $ work( iu+ldwrku ), ldwrku )
3077  ie = itau
3078  itauq = ie + m
3079  itaup = itauq + m
3080  iwork = itaup + m
3081 *
3082 * Bidiagonalize L in WORK(IU), copying result to
3083 * WORK(IR)
3084 * (Workspace: need 2*M*M + 4*M,
3085 * prefer 2*M*M+3*M+2*M*NB)
3086 *
3087  CALL dgebrd( m, m, work( iu ), ldwrku, s,
3088  $ work( ie ), work( itauq ),
3089  $ work( itaup ), work( iwork ),
3090  $ lwork-iwork+1, ierr )
3091  CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3092  $ work( ir ), ldwrkr )
3093 *
3094 * Generate right bidiagonalizing vectors in WORK(IU)
3095 * (Workspace: need 2*M*M + 4*M-1,
3096 * prefer 2*M*M+3*M+(M-1)*NB)
3097 *
3098  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3099  $ work( itaup ), work( iwork ),
3100  $ lwork-iwork+1, ierr )
3101 *
3102 * Generate left bidiagonalizing vectors in WORK(IR)
3103 * (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3104 *
3105  CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3106  $ work( itauq ), work( iwork ),
3107  $ lwork-iwork+1, ierr )
3108  iwork = ie + m
3109 *
3110 * Perform bidiagonal QR iteration, computing left
3111 * singular vectors of L in WORK(IR) and computing
3112 * right singular vectors of L in WORK(IU)
3113 * (Workspace: need 2*M*M + BDSPAC)
3114 *
3115  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3116  $ work( iu ), ldwrku, work( ir ),
3117  $ ldwrkr, dum, 1, work( iwork ), info )
3118 *
3119 * Multiply right singular vectors of L in WORK(IU) by
3120 * Q in VT, storing result in A
3121 * (Workspace: need M*M)
3122 *
3123  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3124  $ ldwrku, vt, ldvt, zero, a, lda )
3125 *
3126 * Copy right singular vectors of A from A to VT
3127 *
3128  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3129 *
3130 * Copy left singular vectors of A from WORK(IR) to A
3131 *
3132  CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3133  $ lda )
3134 *
3135  ELSE
3136 *
3137 * Insufficient workspace for a fast algorithm
3138 *
3139  itau = 1
3140  iwork = itau + m
3141 *
3142 * Compute A=L*Q, copying result to VT
3143 * (Workspace: need 2*M, prefer M + M*NB)
3144 *
3145  CALL dgelqf( m, n, a, lda, work( itau ),
3146  $ work( iwork ), lwork-iwork+1, ierr )
3147  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3148 *
3149 * Generate Q in VT
3150 * (Workspace: need M + N, prefer M + N*NB)
3151 *
3152  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3153  $ work( iwork ), lwork-iwork+1, ierr )
3154  ie = itau
3155  itauq = ie + m
3156  itaup = itauq + m
3157  iwork = itaup + m
3158 *
3159 * Zero out above L in A
3160 *
3161  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3162  $ lda )
3163 *
3164 * Bidiagonalize L in A
3165 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3166 *
3167  CALL dgebrd( m, m, a, lda, s, work( ie ),
3168  $ work( itauq ), work( itaup ),
3169  $ work( iwork ), lwork-iwork+1, ierr )
3170 *
3171 * Multiply right bidiagonalizing vectors in A by Q
3172 * in VT
3173 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
3174 *
3175  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3176  $ work( itaup ), vt, ldvt,
3177  $ work( iwork ), lwork-iwork+1, ierr )
3178 *
3179 * Generate left bidiagonalizing vectors in A
3180 * (Workspace: need 4*M, prefer 3*M + M*NB)
3181 *
3182  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3183  $ work( iwork ), lwork-iwork+1, ierr )
3184  iwork = ie + m
3185 *
3186 * Perform bidiagonal QR iteration, computing left
3187 * singular vectors of A in A and computing right
3188 * singular vectors of A in VT
3189 * (Workspace: need BDSPAC)
3190 *
3191  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3192  $ ldvt, a, lda, dum, 1, work( iwork ),
3193  $ info )
3194 *
3195  END IF
3196 *
3197  ELSE IF( wntuas ) THEN
3198 *
3199 * Path 9t(N much larger than M, JOBU='S' or 'A',
3200 * JOBVT='A')
3201 * N right singular vectors to be computed in VT and
3202 * M left singular vectors to be computed in U
3203 *
3204  IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
3205 *
3206 * Sufficient workspace for a fast algorithm
3207 *
3208  iu = 1
3209  IF( lwork.GE.wrkbl+lda*m ) THEN
3210 *
3211 * WORK(IU) is LDA by M
3212 *
3213  ldwrku = lda
3214  ELSE
3215 *
3216 * WORK(IU) is M by M
3217 *
3218  ldwrku = m
3219  END IF
3220  itau = iu + ldwrku*m
3221  iwork = itau + m
3222 *
3223 * Compute A=L*Q, copying result to VT
3224 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3225 *
3226  CALL dgelqf( m, n, a, lda, work( itau ),
3227  $ work( iwork ), lwork-iwork+1, ierr )
3228  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3229 *
3230 * Generate Q in VT
3231 * (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3232 *
3233  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3234  $ work( iwork ), lwork-iwork+1, ierr )
3235 *
3236 * Copy L to WORK(IU), zeroing out above it
3237 *
3238  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3239  $ ldwrku )
3240  CALL dlaset( 'U', m-1, m-1, zero, zero,
3241  $ work( iu+ldwrku ), ldwrku )
3242  ie = itau
3243  itauq = ie + m
3244  itaup = itauq + m
3245  iwork = itaup + m
3246 *
3247 * Bidiagonalize L in WORK(IU), copying result to U
3248 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3249 *
3250  CALL dgebrd( m, m, work( iu ), ldwrku, s,
3251  $ work( ie ), work( itauq ),
3252  $ work( itaup ), work( iwork ),
3253  $ lwork-iwork+1, ierr )
3254  CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3255  $ ldu )
3256 *
3257 * Generate right bidiagonalizing vectors in WORK(IU)
3258 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3259 *
3260  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3261  $ work( itaup ), work( iwork ),
3262  $ lwork-iwork+1, ierr )
3263 *
3264 * Generate left bidiagonalizing vectors in U
3265 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3266 *
3267  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3268  $ work( iwork ), lwork-iwork+1, ierr )
3269  iwork = ie + m
3270 *
3271 * Perform bidiagonal QR iteration, computing left
3272 * singular vectors of L in U and computing right
3273 * singular vectors of L in WORK(IU)
3274 * (Workspace: need M*M + BDSPAC)
3275 *
3276  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3277  $ work( iu ), ldwrku, u, ldu, dum, 1,
3278  $ work( iwork ), info )
3279 *
3280 * Multiply right singular vectors of L in WORK(IU) by
3281 * Q in VT, storing result in A
3282 * (Workspace: need M*M)
3283 *
3284  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3285  $ ldwrku, vt, ldvt, zero, a, lda )
3286 *
3287 * Copy right singular vectors of A from A to VT
3288 *
3289  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3290 *
3291  ELSE
3292 *
3293 * Insufficient workspace for a fast algorithm
3294 *
3295  itau = 1
3296  iwork = itau + m
3297 *
3298 * Compute A=L*Q, copying result to VT
3299 * (Workspace: need 2*M, prefer M + M*NB)
3300 *
3301  CALL dgelqf( m, n, a, lda, work( itau ),
3302  $ work( iwork ), lwork-iwork+1, ierr )
3303  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3304 *
3305 * Generate Q in VT
3306 * (Workspace: need M + N, prefer M + N*NB)
3307 *
3308  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3309  $ work( iwork ), lwork-iwork+1, ierr )
3310 *
3311 * Copy L to U, zeroing out above it
3312 *
3313  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3314  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3315  $ ldu )
3316  ie = itau
3317  itauq = ie + m
3318  itaup = itauq + m
3319  iwork = itaup + m
3320 *
3321 * Bidiagonalize L in U
3322 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3323 *
3324  CALL dgebrd( m, m, u, ldu, s, work( ie ),
3325  $ work( itauq ), work( itaup ),
3326  $ work( iwork ), lwork-iwork+1, ierr )
3327 *
3328 * Multiply right bidiagonalizing vectors in U by Q
3329 * in VT
3330 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
3331 *
3332  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3333  $ work( itaup ), vt, ldvt,
3334  $ work( iwork ), lwork-iwork+1, ierr )
3335 *
3336 * Generate left bidiagonalizing vectors in U
3337 * (Workspace: need 4*M, prefer 3*M + M*NB)
3338 *
3339  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3340  $ work( iwork ), lwork-iwork+1, ierr )
3341  iwork = ie + m
3342 *
3343 * Perform bidiagonal QR iteration, computing left
3344 * singular vectors of A in U and computing right
3345 * singular vectors of A in VT
3346 * (Workspace: need BDSPAC)
3347 *
3348  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3349  $ ldvt, u, ldu, dum, 1, work( iwork ),
3350  $ info )
3351 *
3352  END IF
3353 *
3354  END IF
3355 *
3356  END IF
3357 *
3358  ELSE
3359 *
3360 * N .LT. MNTHR
3361 *
3362 * Path 10t(N greater than M, but not much larger)
3363 * Reduce to bidiagonal form without LQ decomposition
3364 *
3365  ie = 1
3366  itauq = ie + m
3367  itaup = itauq + m
3368  iwork = itaup + m
3369 *
3370 * Bidiagonalize A
3371 * (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3372 *
3373  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3374  $ work( itaup ), work( iwork ), lwork-iwork+1,
3375  $ ierr )
3376  IF( wntuas ) THEN
3377 *
3378 * If left singular vectors desired in U, copy result to U
3379 * and generate left bidiagonalizing vectors in U
3380 * (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3381 *
3382  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3383  CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3384  $ work( iwork ), lwork-iwork+1, ierr )
3385  END IF
3386  IF( wntvas ) THEN
3387 *
3388 * If right singular vectors desired in VT, copy result to
3389 * VT and generate right bidiagonalizing vectors in VT
3390 * (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3391 *
3392  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3393  IF( wntva )
3394  $ nrvt = n
3395  IF( wntvs )
3396  $ nrvt = m
3397  CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3398  $ work( iwork ), lwork-iwork+1, ierr )
3399  END IF
3400  IF( wntuo ) THEN
3401 *
3402 * If left singular vectors desired in A, generate left
3403 * bidiagonalizing vectors in A
3404 * (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3405 *
3406  CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3407  $ work( iwork ), lwork-iwork+1, ierr )
3408  END IF
3409  IF( wntvo ) THEN
3410 *
3411 * If right singular vectors desired in A, generate right
3412 * bidiagonalizing vectors in A
3413 * (Workspace: need 4*M, prefer 3*M + M*NB)
3414 *
3415  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3416  $ work( iwork ), lwork-iwork+1, ierr )
3417  END IF
3418  iwork = ie + m
3419  IF( wntuas .OR. wntuo )
3420  $ nru = m
3421  IF( wntun )
3422  $ nru = 0
3423  IF( wntvas .OR. wntvo )
3424  $ ncvt = n
3425  IF( wntvn )
3426  $ ncvt = 0
3427  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3428 *
3429 * Perform bidiagonal QR iteration, if desired, computing
3430 * left singular vectors in U and computing right singular
3431 * vectors in VT
3432 * (Workspace: need BDSPAC)
3433 *
3434  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3435  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3436  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3437 *
3438 * Perform bidiagonal QR iteration, if desired, computing
3439 * left singular vectors in U and computing right singular
3440 * vectors in A
3441 * (Workspace: need BDSPAC)
3442 *
3443  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3444  $ u, ldu, dum, 1, work( iwork ), info )
3445  ELSE
3446 *
3447 * Perform bidiagonal QR iteration, if desired, computing
3448 * left singular vectors in A and computing right singular
3449 * vectors in VT
3450 * (Workspace: need BDSPAC)
3451 *
3452  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3453  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3454  END IF
3455 *
3456  END IF
3457 *
3458  END IF
3459 *
3460 * If DBDSQR failed to converge, copy unconverged superdiagonals
3461 * to WORK( 2:MINMN )
3462 *
3463  IF( info.NE.0 ) THEN
3464  IF( ie.GT.2 ) THEN
3465  DO 50 i = 1, minmn - 1
3466  work( i+1 ) = work( i+ie-1 )
3467  50 CONTINUE
3468  END IF
3469  IF( ie.LT.2 ) THEN
3470  DO 60 i = minmn - 1, 1, -1
3471  work( i+1 ) = work( i+ie-1 )
3472  60 CONTINUE
3473  END IF
3474  END IF
3475 *
3476 * Undo scaling if necessary
3477 *
3478  IF( iscl.EQ.1 ) THEN
3479  IF( anrm.GT.bignum )
3480  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3481  $ ierr )
3482  IF( info.NE.0 .AND. anrm.GT.bignum )
3483  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3484  $ minmn, ierr )
3485  IF( anrm.LT.smlnum )
3486  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3487  $ ierr )
3488  IF( info.NE.0 .AND. anrm.LT.smlnum )
3489  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3490  $ minmn, ierr )
3491  END IF
3492 *
3493 * Return optimal workspace in WORK(1)
3494 *
3495  work( 1 ) = maxwrk
3496 *
3497  RETURN
3498 *
3499 * End of DGESVD
3500 *
3501  END
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:143
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: dlaset.f:110
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine dbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
DBDSQR
Definition: dbdsqr.f:241
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGBR
Definition: dorgbr.f:157
subroutine dgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGEQRF
Definition: dgeqrf.f:145
subroutine dgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGELQF
Definition: dgelqf.f:143
subroutine dgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
DGEBRD
Definition: dgebrd.f:205
subroutine dgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
DGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: dgesvd.f:211
subroutine dorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGQR
Definition: dorgqr.f:128
subroutine dorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGLQ
Definition: dorglq.f:127
subroutine dormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
DORMBR
Definition: dormbr.f:195