LAPACK  3.6.1 LAPACK: Linear Algebra PACKage
dgesvd.f
Go to the documentation of this file.
1 *> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
30 * \$ VT( LDVT, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> DGESVD computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, optionally computing the left and/or right singular
41 *> vectors. The SVD is written
42 *>
43 *> A = U * SIGMA * transpose(V)
44 *>
45 *> where SIGMA is an M-by-N matrix which is zero except for its
46 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48 *> are the singular values of A; they are real and non-negative, and
49 *> are returned in descending order. The first min(m,n) columns of
50 *> U and V are the left and right singular vectors of A.
51 *>
52 *> Note that the routine returns V**T, not V.
53 *> \endverbatim
54 *
55 * Arguments:
56 * ==========
57 *
58 *> \param[in] JOBU
59 *> \verbatim
60 *> JOBU is CHARACTER*1
61 *> Specifies options for computing all or part of the matrix U:
62 *> = 'A': all M columns of U are returned in array U:
63 *> = 'S': the first min(m,n) columns of U (the left singular
64 *> vectors) are returned in the array U;
65 *> = 'O': the first min(m,n) columns of U (the left singular
66 *> vectors) are overwritten on the array A;
67 *> = 'N': no columns of U (no left singular vectors) are
68 *> computed.
69 *> \endverbatim
70 *>
71 *> \param[in] JOBVT
72 *> \verbatim
73 *> JOBVT is CHARACTER*1
74 *> Specifies options for computing all or part of the matrix
75 *> V**T:
76 *> = 'A': all N rows of V**T are returned in the array VT;
77 *> = 'S': the first min(m,n) rows of V**T (the right singular
78 *> vectors) are returned in the array VT;
79 *> = 'O': the first min(m,n) rows of V**T (the right singular
80 *> vectors) are overwritten on the array A;
81 *> = 'N': no rows of V**T (no right singular vectors) are
82 *> computed.
83 *>
84 *> JOBVT and JOBU cannot both be 'O'.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is DOUBLE PRECISION array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBU = 'O', A is overwritten with the first min(m,n)
105 *> columns of U (the left singular vectors,
106 *> stored columnwise);
107 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108 *> rows of V**T (the right singular vectors,
109 *> stored rowwise);
110 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111 *> are destroyed.
112 *> \endverbatim
113 *>
114 *> \param[in] LDA
115 *> \verbatim
116 *> LDA is INTEGER
117 *> The leading dimension of the array A. LDA >= max(1,M).
118 *> \endverbatim
119 *>
120 *> \param[out] S
121 *> \verbatim
122 *> S is DOUBLE PRECISION array, dimension (min(M,N))
123 *> The singular values of A, sorted so that S(i) >= S(i+1).
124 *> \endverbatim
125 *>
126 *> \param[out] U
127 *> \verbatim
128 *> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130 *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131 *> if JOBU = 'S', U contains the first min(m,n) columns of U
132 *> (the left singular vectors, stored columnwise);
133 *> if JOBU = 'N' or 'O', U is not referenced.
134 *> \endverbatim
135 *>
136 *> \param[in] LDU
137 *> \verbatim
138 *> LDU is INTEGER
139 *> The leading dimension of the array U. LDU >= 1; if
140 *> JOBU = 'S' or 'A', LDU >= M.
141 *> \endverbatim
142 *>
143 *> \param[out] VT
144 *> \verbatim
145 *> VT is DOUBLE PRECISION array, dimension (LDVT,N)
146 *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147 *> V**T;
148 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149 *> V**T (the right singular vectors, stored rowwise);
150 *> if JOBVT = 'N' or 'O', VT is not referenced.
151 *> \endverbatim
152 *>
153 *> \param[in] LDVT
154 *> \verbatim
155 *> LDVT is INTEGER
156 *> The leading dimension of the array VT. LDVT >= 1; if
157 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158 *> \endverbatim
159 *>
160 *> \param[out] WORK
161 *> \verbatim
162 *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164 *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165 *> superdiagonal elements of an upper bidiagonal matrix B
166 *> whose diagonal is in S (not necessarily sorted). B
167 *> satisfies A = U * B * VT, so it has the same singular values
168 *> as A, and singular vectors related by U and VT.
169 *> \endverbatim
170 *>
171 *> \param[in] LWORK
172 *> \verbatim
173 *> LWORK is INTEGER
174 *> The dimension of the array WORK.
175 *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176 *> - PATH 1 (M much larger than N, JOBU='N')
177 *> - PATH 1t (N much larger than M, JOBVT='N')
178 *> LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
179 *> For good performance, LWORK should generally be larger.
180 *>
181 *> If LWORK = -1, then a workspace query is assumed; the routine
182 *> only calculates the optimal size of the WORK array, returns
183 *> this value as the first entry of the WORK array, and no error
184 *> message related to LWORK is issued by XERBLA.
185 *> \endverbatim
186 *>
187 *> \param[out] INFO
188 *> \verbatim
189 *> INFO is INTEGER
190 *> = 0: successful exit.
191 *> < 0: if INFO = -i, the i-th argument had an illegal value.
192 *> > 0: if DBDSQR did not converge, INFO specifies how many
193 *> superdiagonals of an intermediate bidiagonal form B
194 *> did not converge to zero. See the description of WORK
195 *> above for details.
196 *> \endverbatim
197 *
198 * Authors:
199 * ========
200 *
201 *> \author Univ. of Tennessee
202 *> \author Univ. of California Berkeley
203 *> \author Univ. of Colorado Denver
204 *> \author NAG Ltd.
205 *
206 *> \date April 2012
207 *
208 *> \ingroup doubleGEsing
209 *
210 * =====================================================================
211  SUBROUTINE dgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
212  \$ vt, ldvt, work, lwork, info )
213 *
214 * -- LAPACK driver routine (version 3.6.1) --
215 * -- LAPACK is a software package provided by Univ. of Tennessee, --
216 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217 * April 2012
218 *
219 * .. Scalar Arguments ..
220  CHARACTER JOBU, JOBVT
221  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222 * ..
223 * .. Array Arguments ..
224  DOUBLE PRECISION A( lda, * ), S( * ), U( ldu, * ),
225  \$ vt( ldvt, * ), work( * )
226 * ..
227 *
228 * =====================================================================
229 *
230 * .. Parameters ..
231  DOUBLE PRECISION ZERO, ONE
232  parameter ( zero = 0.0d0, one = 1.0d0 )
233 * ..
234 * .. Local Scalars ..
235  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
236  \$ wntva, wntvas, wntvn, wntvo, wntvs
237  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
238  \$ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
239  \$ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
240  \$ nrvt, wrkbl
241  INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
242  \$ lwork_dgebrd, lwork_dorgbr_p, lwork_dorgbr_q,
243  \$ lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m
244  DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
245 * ..
246 * .. Local Arrays ..
247  DOUBLE PRECISION DUM( 1 )
248 * ..
249 * .. External Subroutines ..
250  EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
252  \$ xerbla
253 * ..
254 * .. External Functions ..
255  LOGICAL LSAME
256  INTEGER ILAENV
257  DOUBLE PRECISION DLAMCH, DLANGE
258  EXTERNAL lsame, ilaenv, dlamch, dlange
259 * ..
260 * .. Intrinsic Functions ..
261  INTRINSIC max, min, sqrt
262 * ..
263 * .. Executable Statements ..
264 *
265 * Test the input arguments
266 *
267  info = 0
268  minmn = min( m, n )
269  wntua = lsame( jobu, 'A' )
270  wntus = lsame( jobu, 'S' )
271  wntuas = wntua .OR. wntus
272  wntuo = lsame( jobu, 'O' )
273  wntun = lsame( jobu, 'N' )
274  wntva = lsame( jobvt, 'A' )
275  wntvs = lsame( jobvt, 'S' )
276  wntvas = wntva .OR. wntvs
277  wntvo = lsame( jobvt, 'O' )
278  wntvn = lsame( jobvt, 'N' )
279  lquery = ( lwork.EQ.-1 )
280 *
281  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
282  info = -1
283  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
284  \$ ( wntvo .AND. wntuo ) ) THEN
285  info = -2
286  ELSE IF( m.LT.0 ) THEN
287  info = -3
288  ELSE IF( n.LT.0 ) THEN
289  info = -4
290  ELSE IF( lda.LT.max( 1, m ) ) THEN
291  info = -6
292  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
293  info = -9
294  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
295  \$ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
296  info = -11
297  END IF
298 *
299 * Compute workspace
300 * (Note: Comments in the code beginning "Workspace:" describe the
301 * minimal amount of workspace needed at that point in the code,
302 * as well as the preferred amount for good performance.
303 * NB refers to the optimal block size for the immediately
304 * following subroutine, as returned by ILAENV.)
305 *
306  IF( info.EQ.0 ) THEN
307  minwrk = 1
308  maxwrk = 1
309  IF( m.GE.n .AND. minmn.GT.0 ) THEN
310 *
311 * Compute space needed for DBDSQR
312 *
313  mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
314  bdspac = 5*n
315 * Compute space needed for DGEQRF
316  CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_dgeqrf = int( dum(1) )
318 * Compute space needed for DORGQR
319  CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
320  lwork_dorgqr_n = int( dum(1) )
321  CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
322  lwork_dorgqr_m = int( dum(1) )
323 * Compute space needed for DGEBRD
324  CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
325  \$ dum(1), dum(1), -1, ierr )
326  lwork_dgebrd = int( dum(1) )
327 * Compute space needed for DORGBR P
328  CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
329  \$ dum(1), -1, ierr )
330  lwork_dorgbr_p = int( dum(1) )
331 * Compute space needed for DORGBR Q
332  CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
333  \$ dum(1), -1, ierr )
334  lwork_dorgbr_q = int( dum(1) )
335 *
336  IF( m.GE.mnthr ) THEN
337  IF( wntun ) THEN
338 *
339 * Path 1 (M much larger than N, JOBU='N')
340 *
341  maxwrk = n + lwork_dgeqrf
342  maxwrk = max( maxwrk, 3*n + lwork_dgebrd )
343  IF( wntvo .OR. wntvas )
344  \$ maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
345  maxwrk = max( maxwrk, bdspac )
346  minwrk = max( 4*n, bdspac )
347  ELSE IF( wntuo .AND. wntvn ) THEN
348 *
349 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350 *
351  wrkbl = n + lwork_dgeqrf
352  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
353  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
354  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
355  wrkbl = max( wrkbl, bdspac )
356  maxwrk = max( n*n + wrkbl, n*n + m*n + n )
357  minwrk = max( 3*n + m, bdspac )
358  ELSE IF( wntuo .AND. wntvas ) THEN
359 *
360 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361 * 'A')
362 *
363  wrkbl = n + lwork_dgeqrf
364  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
365  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
366  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
367  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
368  wrkbl = max( wrkbl, bdspac )
369  maxwrk = max( n*n + wrkbl, n*n + m*n + n )
370  minwrk = max( 3*n + m, bdspac )
371  ELSE IF( wntus .AND. wntvn ) THEN
372 *
373 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374 *
375  wrkbl = n + lwork_dgeqrf
376  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
377  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
378  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
379  wrkbl = max( wrkbl, bdspac )
380  maxwrk = n*n + wrkbl
381  minwrk = max( 3*n + m, bdspac )
382  ELSE IF( wntus .AND. wntvo ) THEN
383 *
384 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385 *
386  wrkbl = n + lwork_dgeqrf
387  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
388  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
389  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
390  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
391  wrkbl = max( wrkbl, bdspac )
392  maxwrk = 2*n*n + wrkbl
393  minwrk = max( 3*n + m, bdspac )
394  ELSE IF( wntus .AND. wntvas ) THEN
395 *
396 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397 * 'A')
398 *
399  wrkbl = n + lwork_dgeqrf
400  wrkbl = max( wrkbl, n + lwork_dorgqr_n )
401  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
402  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
403  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
404  wrkbl = max( wrkbl, bdspac )
405  maxwrk = n*n + wrkbl
406  minwrk = max( 3*n + m, bdspac )
407  ELSE IF( wntua .AND. wntvn ) THEN
408 *
409 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410 *
411  wrkbl = n + lwork_dgeqrf
412  wrkbl = max( wrkbl, n + lwork_dorgqr_m )
413  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
414  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
415  wrkbl = max( wrkbl, bdspac )
416  maxwrk = n*n + wrkbl
417  minwrk = max( 3*n + m, bdspac )
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_dgeqrf
423  wrkbl = max( wrkbl, n + lwork_dorgqr_m )
424  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
425  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
426  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
427  wrkbl = max( wrkbl, bdspac )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = max( 3*n + m, bdspac )
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_dgeqrf
436  wrkbl = max( wrkbl, n + lwork_dorgqr_m )
437  wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
438  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
439  wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
440  wrkbl = max( wrkbl, bdspac )
441  maxwrk = n*n + wrkbl
442  minwrk = max( 3*n + m, bdspac )
443  END IF
444  ELSE
445 *
446 * Path 10 (M at least N, but not much larger)
447 *
448  CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
449  \$ dum(1), dum(1), -1, ierr )
450  lwork_dgebrd = int( dum(1) )
451  maxwrk = 3*n + lwork_dgebrd
452  IF( wntus .OR. wntuo ) THEN
453  CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
454  \$ dum(1), -1, ierr )
455  lwork_dorgbr_q = int( dum(1) )
456  maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
457  END IF
458  IF( wntua ) THEN
459  CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
460  \$ dum(1), -1, ierr )
461  lwork_dorgbr_q = int( dum(1) )
462  maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
463  END IF
464  IF( .NOT.wntvn ) THEN
465  maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
466  END IF
467  maxwrk = max( maxwrk, bdspac )
468  minwrk = max( 3*n + m, bdspac )
469  END IF
470  ELSE IF( minmn.GT.0 ) THEN
471 *
472 * Compute space needed for DBDSQR
473 *
474  mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
475  bdspac = 5*m
476 * Compute space needed for DGELQF
477  CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
478  lwork_dgelqf = int( dum(1) )
479 * Compute space needed for DORGLQ
480  CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
481  lwork_dorglq_n = int( dum(1) )
482  CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
483  lwork_dorglq_m = int( dum(1) )
484 * Compute space needed for DGEBRD
485  CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
486  \$ dum(1), dum(1), -1, ierr )
487  lwork_dgebrd = int( dum(1) )
488 * Compute space needed for DORGBR P
489  CALL dorgbr( 'P', m, m, m, a, n, dum(1),
490  \$ dum(1), -1, ierr )
491  lwork_dorgbr_p = int( dum(1) )
492 * Compute space needed for DORGBR Q
493  CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
494  \$ dum(1), -1, ierr )
495  lwork_dorgbr_q = int( dum(1) )
496  IF( n.GE.mnthr ) THEN
497  IF( wntvn ) THEN
498 *
499 * Path 1t(N much larger than M, JOBVT='N')
500 *
501  maxwrk = m + lwork_dgelqf
502  maxwrk = max( maxwrk, 3*m + lwork_dgebrd )
503  IF( wntuo .OR. wntuas )
504  \$ maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
505  maxwrk = max( maxwrk, bdspac )
506  minwrk = max( 4*m, bdspac )
507  ELSE IF( wntvo .AND. wntun ) THEN
508 *
509 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510 *
511  wrkbl = m + lwork_dgelqf
512  wrkbl = max( wrkbl, m + lwork_dorglq_m )
513  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
514  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
515  wrkbl = max( wrkbl, bdspac )
516  maxwrk = max( m*m + wrkbl, m*m + m*n + m )
517  minwrk = max( 3*m + n, bdspac )
518  ELSE IF( wntvo .AND. wntuas ) THEN
519 *
520 * Path 3t(N much larger than M, JOBU='S' or 'A',
521 * JOBVT='O')
522 *
523  wrkbl = m + lwork_dgelqf
524  wrkbl = max( wrkbl, m + lwork_dorglq_m )
525  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
526  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
527  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
528  wrkbl = max( wrkbl, bdspac )
529  maxwrk = max( m*m + wrkbl, m*m + m*n + m )
530  minwrk = max( 3*m + n, bdspac )
531  ELSE IF( wntvs .AND. wntun ) THEN
532 *
533 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534 *
535  wrkbl = m + lwork_dgelqf
536  wrkbl = max( wrkbl, m + lwork_dorglq_m )
537  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
538  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
539  wrkbl = max( wrkbl, bdspac )
540  maxwrk = m*m + wrkbl
541  minwrk = max( 3*m + n, bdspac )
542  ELSE IF( wntvs .AND. wntuo ) THEN
543 *
544 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545 *
546  wrkbl = m + lwork_dgelqf
547  wrkbl = max( wrkbl, m + lwork_dorglq_m )
548  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
549  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
550  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
551  wrkbl = max( wrkbl, bdspac )
552  maxwrk = 2*m*m + wrkbl
553  minwrk = max( 3*m + n, bdspac )
554  ELSE IF( wntvs .AND. wntuas ) THEN
555 *
556 * Path 6t(N much larger than M, JOBU='S' or 'A',
557 * JOBVT='S')
558 *
559  wrkbl = m + lwork_dgelqf
560  wrkbl = max( wrkbl, m + lwork_dorglq_m )
561  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
562  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
563  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
564  wrkbl = max( wrkbl, bdspac )
565  maxwrk = m*m + wrkbl
566  minwrk = max( 3*m + n, bdspac )
567  ELSE IF( wntva .AND. wntun ) THEN
568 *
569 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
570 *
571  wrkbl = m + lwork_dgelqf
572  wrkbl = max( wrkbl, m + lwork_dorglq_n )
573  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
574  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
575  wrkbl = max( wrkbl, bdspac )
576  maxwrk = m*m + wrkbl
577  minwrk = max( 3*m + n, bdspac )
578  ELSE IF( wntva .AND. wntuo ) THEN
579 *
580 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
581 *
582  wrkbl = m + lwork_dgelqf
583  wrkbl = max( wrkbl, m + lwork_dorglq_n )
584  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
585  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
586  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
587  wrkbl = max( wrkbl, bdspac )
588  maxwrk = 2*m*m + wrkbl
589  minwrk = max( 3*m + n, bdspac )
590  ELSE IF( wntva .AND. wntuas ) THEN
591 *
592 * Path 9t(N much larger than M, JOBU='S' or 'A',
593 * JOBVT='A')
594 *
595  wrkbl = m + lwork_dgelqf
596  wrkbl = max( wrkbl, m + lwork_dorglq_n )
597  wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
598  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
599  wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
600  wrkbl = max( wrkbl, bdspac )
601  maxwrk = m*m + wrkbl
602  minwrk = max( 3*m + n, bdspac )
603  END IF
604  ELSE
605 *
606 * Path 10t(N greater than M, but not much larger)
607 *
608  CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
609  \$ dum(1), dum(1), -1, ierr )
610  lwork_dgebrd = int( dum(1) )
611  maxwrk = 3*m + lwork_dgebrd
612  IF( wntvs .OR. wntvo ) THEN
613 * Compute space needed for DORGBR P
614  CALL dorgbr( 'P', m, n, m, a, n, dum(1),
615  \$ dum(1), -1, ierr )
616  lwork_dorgbr_p = int( dum(1) )
617  maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
618  END IF
619  IF( wntva ) THEN
620  CALL dorgbr( 'P', n, n, m, a, n, dum(1),
621  \$ dum(1), -1, ierr )
622  lwork_dorgbr_p = int( dum(1) )
623  maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
624  END IF
625  IF( .NOT.wntun ) THEN
626  maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
627  END IF
628  maxwrk = max( maxwrk, bdspac )
629  minwrk = max( 3*m + n, bdspac )
630  END IF
631  END IF
632  maxwrk = max( maxwrk, minwrk )
633  work( 1 ) = maxwrk
634 *
635  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
636  info = -13
637  END IF
638  END IF
639 *
640  IF( info.NE.0 ) THEN
641  CALL xerbla( 'DGESVD', -info )
642  RETURN
643  ELSE IF( lquery ) THEN
644  RETURN
645  END IF
646 *
647 * Quick return if possible
648 *
649  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
650  RETURN
651  END IF
652 *
653 * Get machine constants
654 *
655  eps = dlamch( 'P' )
656  smlnum = sqrt( dlamch( 'S' ) ) / eps
657  bignum = one / smlnum
658 *
659 * Scale A if max element outside range [SMLNUM,BIGNUM]
660 *
661  anrm = dlange( 'M', m, n, a, lda, dum )
662  iscl = 0
663  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
664  iscl = 1
665  CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
666  ELSE IF( anrm.GT.bignum ) THEN
667  iscl = 1
668  CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
669  END IF
670 *
671  IF( m.GE.n ) THEN
672 *
673 * A has at least as many rows as columns. If A has sufficiently
674 * more rows than columns, first reduce using the QR
675 * decomposition (if sufficient workspace available)
676 *
677  IF( m.GE.mnthr ) THEN
678 *
679  IF( wntun ) THEN
680 *
681 * Path 1 (M much larger than N, JOBU='N')
682 * No left singular vectors to be computed
683 *
684  itau = 1
685  iwork = itau + n
686 *
687 * Compute A=Q*R
688 * (Workspace: need 2*N, prefer N + N*NB)
689 *
690  CALL dgeqrf( m, n, a, lda, work( itau ), work( iwork ),
691  \$ lwork-iwork+1, ierr )
692 *
693 * Zero out below R
694 *
695  IF( n .GT. 1 ) THEN
696  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
697  \$ lda )
698  END IF
699  ie = 1
700  itauq = ie + n
701  itaup = itauq + n
702  iwork = itaup + n
703 *
704 * Bidiagonalize R in A
705 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
706 *
707  CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
708  \$ work( itaup ), work( iwork ), lwork-iwork+1,
709  \$ ierr )
710  ncvt = 0
711  IF( wntvo .OR. wntvas ) THEN
712 *
713 * If right singular vectors desired, generate P'.
714 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
715 *
716  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
717  \$ work( iwork ), lwork-iwork+1, ierr )
718  ncvt = n
719  END IF
720  iwork = ie + n
721 *
722 * Perform bidiagonal QR iteration, computing right
723 * singular vectors of A in A if desired
724 * (Workspace: need BDSPAC)
725 *
726  CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
727  \$ dum, 1, dum, 1, work( iwork ), info )
728 *
729 * If right singular vectors desired in VT, copy them there
730 *
731  IF( wntvas )
732  \$ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
733 *
734  ELSE IF( wntuo .AND. wntvn ) THEN
735 *
736 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
737 * N left singular vectors to be overwritten on A and
738 * no right singular vectors to be computed
739 *
740  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
741 *
742 * Sufficient workspace for a fast algorithm
743 *
744  ir = 1
745  IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
746 *
747 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
748 *
749  ldwrku = lda
750  ldwrkr = lda
751  ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
752 *
753 * WORK(IU) is LDA by N, WORK(IR) is N by N
754 *
755  ldwrku = lda
756  ldwrkr = n
757  ELSE
758 *
759 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
760 *
761  ldwrku = ( lwork-n*n-n ) / n
762  ldwrkr = n
763  END IF
764  itau = ir + ldwrkr*n
765  iwork = itau + n
766 *
767 * Compute A=Q*R
768 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
769 *
770  CALL dgeqrf( m, n, a, lda, work( itau ),
771  \$ work( iwork ), lwork-iwork+1, ierr )
772 *
773 * Copy R to WORK(IR) and zero out below it
774 *
775  CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
776  CALL dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
777  \$ ldwrkr )
778 *
779 * Generate Q in A
780 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
781 *
782  CALL dorgqr( m, n, n, a, lda, work( itau ),
783  \$ work( iwork ), lwork-iwork+1, ierr )
784  ie = itau
785  itauq = ie + n
786  itaup = itauq + n
787  iwork = itaup + n
788 *
789 * Bidiagonalize R in WORK(IR)
790 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
791 *
792  CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
793  \$ work( itauq ), work( itaup ),
794  \$ work( iwork ), lwork-iwork+1, ierr )
795 *
796 * Generate left vectors bidiagonalizing R
797 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
798 *
799  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
800  \$ work( itauq ), work( iwork ),
801  \$ lwork-iwork+1, ierr )
802  iwork = ie + n
803 *
804 * Perform bidiagonal QR iteration, computing left
805 * singular vectors of R in WORK(IR)
806 * (Workspace: need N*N + BDSPAC)
807 *
808  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
809  \$ work( ir ), ldwrkr, dum, 1,
810  \$ work( iwork ), info )
811  iu = ie + n
812 *
813 * Multiply Q in A by left singular vectors of R in
814 * WORK(IR), storing result in WORK(IU) and copying to A
815 * (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
816 *
817  DO 10 i = 1, m, ldwrku
818  chunk = min( m-i+1, ldwrku )
819  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
820  \$ lda, work( ir ), ldwrkr, zero,
821  \$ work( iu ), ldwrku )
822  CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
823  \$ a( i, 1 ), lda )
824  10 CONTINUE
825 *
826  ELSE
827 *
828 * Insufficient workspace for a fast algorithm
829 *
830  ie = 1
831  itauq = ie + n
832  itaup = itauq + n
833  iwork = itaup + n
834 *
835 * Bidiagonalize A
836 * (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
837 *
838  CALL dgebrd( m, n, a, lda, s, work( ie ),
839  \$ work( itauq ), work( itaup ),
840  \$ work( iwork ), lwork-iwork+1, ierr )
841 *
842 * Generate left vectors bidiagonalizing A
843 * (Workspace: need 4*N, prefer 3*N + N*NB)
844 *
845  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
846  \$ work( iwork ), lwork-iwork+1, ierr )
847  iwork = ie + n
848 *
849 * Perform bidiagonal QR iteration, computing left
850 * singular vectors of A in A
851 * (Workspace: need BDSPAC)
852 *
853  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
854  \$ a, lda, dum, 1, work( iwork ), info )
855 *
856  END IF
857 *
858  ELSE IF( wntuo .AND. wntvas ) THEN
859 *
860 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
861 * N left singular vectors to be overwritten on A and
862 * N right singular vectors to be computed in VT
863 *
864  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
865 *
866 * Sufficient workspace for a fast algorithm
867 *
868  ir = 1
869  IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
870 *
871 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
872 *
873  ldwrku = lda
874  ldwrkr = lda
875  ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
876 *
877 * WORK(IU) is LDA by N and WORK(IR) is N by N
878 *
879  ldwrku = lda
880  ldwrkr = n
881  ELSE
882 *
883 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
884 *
885  ldwrku = ( lwork-n*n-n ) / n
886  ldwrkr = n
887  END IF
888  itau = ir + ldwrkr*n
889  iwork = itau + n
890 *
891 * Compute A=Q*R
892 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
893 *
894  CALL dgeqrf( m, n, a, lda, work( itau ),
895  \$ work( iwork ), lwork-iwork+1, ierr )
896 *
897 * Copy R to VT, zeroing out below it
898 *
899  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
900  IF( n.GT.1 )
901  \$ CALL dlaset( 'L', n-1, n-1, zero, zero,
902  \$ vt( 2, 1 ), ldvt )
903 *
904 * Generate Q in A
905 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
906 *
907  CALL dorgqr( m, n, n, a, lda, work( itau ),
908  \$ work( iwork ), lwork-iwork+1, ierr )
909  ie = itau
910  itauq = ie + n
911  itaup = itauq + n
912  iwork = itaup + n
913 *
914 * Bidiagonalize R in VT, copying result to WORK(IR)
915 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
916 *
917  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
918  \$ work( itauq ), work( itaup ),
919  \$ work( iwork ), lwork-iwork+1, ierr )
920  CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
921 *
922 * Generate left vectors bidiagonalizing R in WORK(IR)
923 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
924 *
925  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
926  \$ work( itauq ), work( iwork ),
927  \$ lwork-iwork+1, ierr )
928 *
929 * Generate right vectors bidiagonalizing R in VT
930 * (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
931 *
932  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
933  \$ work( iwork ), lwork-iwork+1, ierr )
934  iwork = ie + n
935 *
936 * Perform bidiagonal QR iteration, computing left
937 * singular vectors of R in WORK(IR) and computing right
938 * singular vectors of R in VT
939 * (Workspace: need N*N + BDSPAC)
940 *
941  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
942  \$ work( ir ), ldwrkr, dum, 1,
943  \$ work( iwork ), info )
944  iu = ie + n
945 *
946 * Multiply Q in A by left singular vectors of R in
947 * WORK(IR), storing result in WORK(IU) and copying to A
948 * (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
949 *
950  DO 20 i = 1, m, ldwrku
951  chunk = min( m-i+1, ldwrku )
952  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
953  \$ lda, work( ir ), ldwrkr, zero,
954  \$ work( iu ), ldwrku )
955  CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
956  \$ a( i, 1 ), lda )
957  20 CONTINUE
958 *
959  ELSE
960 *
961 * Insufficient workspace for a fast algorithm
962 *
963  itau = 1
964  iwork = itau + n
965 *
966 * Compute A=Q*R
967 * (Workspace: need 2*N, prefer N + N*NB)
968 *
969  CALL dgeqrf( m, n, a, lda, work( itau ),
970  \$ work( iwork ), lwork-iwork+1, ierr )
971 *
972 * Copy R to VT, zeroing out below it
973 *
974  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
975  IF( n.GT.1 )
976  \$ CALL dlaset( 'L', n-1, n-1, zero, zero,
977  \$ vt( 2, 1 ), ldvt )
978 *
979 * Generate Q in A
980 * (Workspace: need 2*N, prefer N + N*NB)
981 *
982  CALL dorgqr( m, n, n, a, lda, work( itau ),
983  \$ work( iwork ), lwork-iwork+1, ierr )
984  ie = itau
985  itauq = ie + n
986  itaup = itauq + n
987  iwork = itaup + n
988 *
989 * Bidiagonalize R in VT
990 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
991 *
992  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
993  \$ work( itauq ), work( itaup ),
994  \$ work( iwork ), lwork-iwork+1, ierr )
995 *
996 * Multiply Q in A by left vectors bidiagonalizing R
997 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
998 *
999  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1000  \$ work( itauq ), a, lda, work( iwork ),
1001  \$ lwork-iwork+1, ierr )
1002 *
1003 * Generate right vectors bidiagonalizing R in VT
1004 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1005 *
1006  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1007  \$ work( iwork ), lwork-iwork+1, ierr )
1008  iwork = ie + n
1009 *
1010 * Perform bidiagonal QR iteration, computing left
1011 * singular vectors of A in A and computing right
1012 * singular vectors of A in VT
1013 * (Workspace: need BDSPAC)
1014 *
1015  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1016  \$ a, lda, dum, 1, work( iwork ), info )
1017 *
1018  END IF
1019 *
1020  ELSE IF( wntus ) THEN
1021 *
1022  IF( wntvn ) THEN
1023 *
1024 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1025 * N left singular vectors to be computed in U and
1026 * no right singular vectors to be computed
1027 *
1028  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1029 *
1030 * Sufficient workspace for a fast algorithm
1031 *
1032  ir = 1
1033  IF( lwork.GE.wrkbl+lda*n ) THEN
1034 *
1035 * WORK(IR) is LDA by N
1036 *
1037  ldwrkr = lda
1038  ELSE
1039 *
1040 * WORK(IR) is N by N
1041 *
1042  ldwrkr = n
1043  END IF
1044  itau = ir + ldwrkr*n
1045  iwork = itau + n
1046 *
1047 * Compute A=Q*R
1048 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1049 *
1050  CALL dgeqrf( m, n, a, lda, work( itau ),
1051  \$ work( iwork ), lwork-iwork+1, ierr )
1052 *
1053 * Copy R to WORK(IR), zeroing out below it
1054 *
1055  CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1056  \$ ldwrkr )
1057  CALL dlaset( 'L', n-1, n-1, zero, zero,
1058  \$ work( ir+1 ), ldwrkr )
1059 *
1060 * Generate Q in A
1061 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1062 *
1063  CALL dorgqr( m, n, n, a, lda, work( itau ),
1064  \$ work( iwork ), lwork-iwork+1, ierr )
1065  ie = itau
1066  itauq = ie + n
1067  itaup = itauq + n
1068  iwork = itaup + n
1069 *
1070 * Bidiagonalize R in WORK(IR)
1071 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1072 *
1073  CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1074  \$ work( ie ), work( itauq ),
1075  \$ work( itaup ), work( iwork ),
1076  \$ lwork-iwork+1, ierr )
1077 *
1078 * Generate left vectors bidiagonalizing R in WORK(IR)
1079 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1080 *
1081  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1082  \$ work( itauq ), work( iwork ),
1083  \$ lwork-iwork+1, ierr )
1084  iwork = ie + n
1085 *
1086 * Perform bidiagonal QR iteration, computing left
1087 * singular vectors of R in WORK(IR)
1088 * (Workspace: need N*N + BDSPAC)
1089 *
1090  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1091  \$ 1, work( ir ), ldwrkr, dum, 1,
1092  \$ work( iwork ), info )
1093 *
1094 * Multiply Q in A by left singular vectors of R in
1095 * WORK(IR), storing result in U
1096 * (Workspace: need N*N)
1097 *
1098  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1099  \$ work( ir ), ldwrkr, zero, u, ldu )
1100 *
1101  ELSE
1102 *
1103 * Insufficient workspace for a fast algorithm
1104 *
1105  itau = 1
1106  iwork = itau + n
1107 *
1108 * Compute A=Q*R, copying result to U
1109 * (Workspace: need 2*N, prefer N + N*NB)
1110 *
1111  CALL dgeqrf( m, n, a, lda, work( itau ),
1112  \$ work( iwork ), lwork-iwork+1, ierr )
1113  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1114 *
1115 * Generate Q in U
1116 * (Workspace: need 2*N, prefer N + N*NB)
1117 *
1118  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1119  \$ work( iwork ), lwork-iwork+1, ierr )
1120  ie = itau
1121  itauq = ie + n
1122  itaup = itauq + n
1123  iwork = itaup + n
1124 *
1125 * Zero out below R in A
1126 *
1127  IF( n .GT. 1 ) THEN
1128  CALL dlaset( 'L', n-1, n-1, zero, zero,
1129  \$ a( 2, 1 ), lda )
1130  END IF
1131 *
1132 * Bidiagonalize R in A
1133 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1134 *
1135  CALL dgebrd( n, n, a, lda, s, work( ie ),
1136  \$ work( itauq ), work( itaup ),
1137  \$ work( iwork ), lwork-iwork+1, ierr )
1138 *
1139 * Multiply Q in U by left vectors bidiagonalizing R
1140 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1141 *
1142  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1143  \$ work( itauq ), u, ldu, work( iwork ),
1144  \$ lwork-iwork+1, ierr )
1145  iwork = ie + n
1146 *
1147 * Perform bidiagonal QR iteration, computing left
1148 * singular vectors of A in U
1149 * (Workspace: need BDSPAC)
1150 *
1151  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1152  \$ 1, u, ldu, dum, 1, work( iwork ),
1153  \$ info )
1154 *
1155  END IF
1156 *
1157  ELSE IF( wntvo ) THEN
1158 *
1159 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1160 * N left singular vectors to be computed in U and
1161 * N right singular vectors to be overwritten on A
1162 *
1163  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1164 *
1165 * Sufficient workspace for a fast algorithm
1166 *
1167  iu = 1
1168  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1169 *
1170 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1171 *
1172  ldwrku = lda
1173  ir = iu + ldwrku*n
1174  ldwrkr = lda
1175  ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1176 *
1177 * WORK(IU) is LDA by N and WORK(IR) is N by N
1178 *
1179  ldwrku = lda
1180  ir = iu + ldwrku*n
1181  ldwrkr = n
1182  ELSE
1183 *
1184 * WORK(IU) is N by N and WORK(IR) is N by N
1185 *
1186  ldwrku = n
1187  ir = iu + ldwrku*n
1188  ldwrkr = n
1189  END IF
1190  itau = ir + ldwrkr*n
1191  iwork = itau + n
1192 *
1193 * Compute A=Q*R
1194 * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1195 *
1196  CALL dgeqrf( m, n, a, lda, work( itau ),
1197  \$ work( iwork ), lwork-iwork+1, ierr )
1198 *
1199 * Copy R to WORK(IU), zeroing out below it
1200 *
1201  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1202  \$ ldwrku )
1203  CALL dlaset( 'L', n-1, n-1, zero, zero,
1204  \$ work( iu+1 ), ldwrku )
1205 *
1206 * Generate Q in A
1207 * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1208 *
1209  CALL dorgqr( m, n, n, a, lda, work( itau ),
1210  \$ work( iwork ), lwork-iwork+1, ierr )
1211  ie = itau
1212  itauq = ie + n
1213  itaup = itauq + n
1214  iwork = itaup + n
1215 *
1216 * Bidiagonalize R in WORK(IU), copying result to
1217 * WORK(IR)
1218 * (Workspace: need 2*N*N + 4*N,
1219 * prefer 2*N*N+3*N+2*N*NB)
1220 *
1221  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1222  \$ work( ie ), work( itauq ),
1223  \$ work( itaup ), work( iwork ),
1224  \$ lwork-iwork+1, ierr )
1225  CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1226  \$ work( ir ), ldwrkr )
1227 *
1228 * Generate left bidiagonalizing vectors in WORK(IU)
1229 * (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1230 *
1231  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1232  \$ work( itauq ), work( iwork ),
1233  \$ lwork-iwork+1, ierr )
1234 *
1235 * Generate right bidiagonalizing vectors in WORK(IR)
1236 * (Workspace: need 2*N*N + 4*N-1,
1237 * prefer 2*N*N+3*N+(N-1)*NB)
1238 *
1239  CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1240  \$ work( itaup ), work( iwork ),
1241  \$ lwork-iwork+1, ierr )
1242  iwork = ie + n
1243 *
1244 * Perform bidiagonal QR iteration, computing left
1245 * singular vectors of R in WORK(IU) and computing
1246 * right singular vectors of R in WORK(IR)
1247 * (Workspace: need 2*N*N + BDSPAC)
1248 *
1249  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1250  \$ work( ir ), ldwrkr, work( iu ),
1251  \$ ldwrku, dum, 1, work( iwork ), info )
1252 *
1253 * Multiply Q in A by left singular vectors of R in
1254 * WORK(IU), storing result in U
1255 * (Workspace: need N*N)
1256 *
1257  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1258  \$ work( iu ), ldwrku, zero, u, ldu )
1259 *
1260 * Copy right singular vectors of R to A
1261 * (Workspace: need N*N)
1262 *
1263  CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1264  \$ lda )
1265 *
1266  ELSE
1267 *
1268 * Insufficient workspace for a fast algorithm
1269 *
1270  itau = 1
1271  iwork = itau + n
1272 *
1273 * Compute A=Q*R, copying result to U
1274 * (Workspace: need 2*N, prefer N + N*NB)
1275 *
1276  CALL dgeqrf( m, n, a, lda, work( itau ),
1277  \$ work( iwork ), lwork-iwork+1, ierr )
1278  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1279 *
1280 * Generate Q in U
1281 * (Workspace: need 2*N, prefer N + N*NB)
1282 *
1283  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1284  \$ work( iwork ), lwork-iwork+1, ierr )
1285  ie = itau
1286  itauq = ie + n
1287  itaup = itauq + n
1288  iwork = itaup + n
1289 *
1290 * Zero out below R in A
1291 *
1292  IF( n .GT. 1 ) THEN
1293  CALL dlaset( 'L', n-1, n-1, zero, zero,
1294  \$ a( 2, 1 ), lda )
1295  END IF
1296 *
1297 * Bidiagonalize R in A
1298 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1299 *
1300  CALL dgebrd( n, n, a, lda, s, work( ie ),
1301  \$ work( itauq ), work( itaup ),
1302  \$ work( iwork ), lwork-iwork+1, ierr )
1303 *
1304 * Multiply Q in U by left vectors bidiagonalizing R
1305 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1306 *
1307  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1308  \$ work( itauq ), u, ldu, work( iwork ),
1309  \$ lwork-iwork+1, ierr )
1310 *
1311 * Generate right vectors bidiagonalizing R in A
1312 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1313 *
1314  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1315  \$ work( iwork ), lwork-iwork+1, ierr )
1316  iwork = ie + n
1317 *
1318 * Perform bidiagonal QR iteration, computing left
1319 * singular vectors of A in U and computing right
1320 * singular vectors of A in A
1321 * (Workspace: need BDSPAC)
1322 *
1323  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1324  \$ lda, u, ldu, dum, 1, work( iwork ),
1325  \$ info )
1326 *
1327  END IF
1328 *
1329  ELSE IF( wntvas ) THEN
1330 *
1331 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1332 * or 'A')
1333 * N left singular vectors to be computed in U and
1334 * N right singular vectors to be computed in VT
1335 *
1336  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1337 *
1338 * Sufficient workspace for a fast algorithm
1339 *
1340  iu = 1
1341  IF( lwork.GE.wrkbl+lda*n ) THEN
1342 *
1343 * WORK(IU) is LDA by N
1344 *
1345  ldwrku = lda
1346  ELSE
1347 *
1348 * WORK(IU) is N by N
1349 *
1350  ldwrku = n
1351  END IF
1352  itau = iu + ldwrku*n
1353  iwork = itau + n
1354 *
1355 * Compute A=Q*R
1356 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1357 *
1358  CALL dgeqrf( m, n, a, lda, work( itau ),
1359  \$ work( iwork ), lwork-iwork+1, ierr )
1360 *
1361 * Copy R to WORK(IU), zeroing out below it
1362 *
1363  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1364  \$ ldwrku )
1365  CALL dlaset( 'L', n-1, n-1, zero, zero,
1366  \$ work( iu+1 ), ldwrku )
1367 *
1368 * Generate Q in A
1369 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1370 *
1371  CALL dorgqr( m, n, n, a, lda, work( itau ),
1372  \$ work( iwork ), lwork-iwork+1, ierr )
1373  ie = itau
1374  itauq = ie + n
1375  itaup = itauq + n
1376  iwork = itaup + n
1377 *
1378 * Bidiagonalize R in WORK(IU), copying result to VT
1379 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1380 *
1381  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1382  \$ work( ie ), work( itauq ),
1383  \$ work( itaup ), work( iwork ),
1384  \$ lwork-iwork+1, ierr )
1385  CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1386  \$ ldvt )
1387 *
1388 * Generate left bidiagonalizing vectors in WORK(IU)
1389 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1390 *
1391  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1392  \$ work( itauq ), work( iwork ),
1393  \$ lwork-iwork+1, ierr )
1394 *
1395 * Generate right bidiagonalizing vectors in VT
1396 * (Workspace: need N*N + 4*N-1,
1397 * prefer N*N+3*N+(N-1)*NB)
1398 *
1399  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1400  \$ work( iwork ), lwork-iwork+1, ierr )
1401  iwork = ie + n
1402 *
1403 * Perform bidiagonal QR iteration, computing left
1404 * singular vectors of R in WORK(IU) and computing
1405 * right singular vectors of R in VT
1406 * (Workspace: need N*N + BDSPAC)
1407 *
1408  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1409  \$ ldvt, work( iu ), ldwrku, dum, 1,
1410  \$ work( iwork ), info )
1411 *
1412 * Multiply Q in A by left singular vectors of R in
1413 * WORK(IU), storing result in U
1414 * (Workspace: need N*N)
1415 *
1416  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1417  \$ work( iu ), ldwrku, zero, u, ldu )
1418 *
1419  ELSE
1420 *
1421 * Insufficient workspace for a fast algorithm
1422 *
1423  itau = 1
1424  iwork = itau + n
1425 *
1426 * Compute A=Q*R, copying result to U
1427 * (Workspace: need 2*N, prefer N + N*NB)
1428 *
1429  CALL dgeqrf( m, n, a, lda, work( itau ),
1430  \$ work( iwork ), lwork-iwork+1, ierr )
1431  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1432 *
1433 * Generate Q in U
1434 * (Workspace: need 2*N, prefer N + N*NB)
1435 *
1436  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1437  \$ work( iwork ), lwork-iwork+1, ierr )
1438 *
1439 * Copy R to VT, zeroing out below it
1440 *
1441  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1442  IF( n.GT.1 )
1443  \$ CALL dlaset( 'L', n-1, n-1, zero, zero,
1444  \$ vt( 2, 1 ), ldvt )
1445  ie = itau
1446  itauq = ie + n
1447  itaup = itauq + n
1448  iwork = itaup + n
1449 *
1450 * Bidiagonalize R in VT
1451 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1452 *
1453  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1454  \$ work( itauq ), work( itaup ),
1455  \$ work( iwork ), lwork-iwork+1, ierr )
1456 *
1457 * Multiply Q in U by left bidiagonalizing vectors
1458 * in VT
1459 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1460 *
1461  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1462  \$ work( itauq ), u, ldu, work( iwork ),
1463  \$ lwork-iwork+1, ierr )
1464 *
1465 * Generate right bidiagonalizing vectors in VT
1466 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1467 *
1468  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1469  \$ work( iwork ), lwork-iwork+1, ierr )
1470  iwork = ie + n
1471 *
1472 * Perform bidiagonal QR iteration, computing left
1473 * singular vectors of A in U and computing right
1474 * singular vectors of A in VT
1475 * (Workspace: need BDSPAC)
1476 *
1477  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1478  \$ ldvt, u, ldu, dum, 1, work( iwork ),
1479  \$ info )
1480 *
1481  END IF
1482 *
1483  END IF
1484 *
1485  ELSE IF( wntua ) THEN
1486 *
1487  IF( wntvn ) THEN
1488 *
1489 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1490 * M left singular vectors to be computed in U and
1491 * no right singular vectors to be computed
1492 *
1493  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1494 *
1495 * Sufficient workspace for a fast algorithm
1496 *
1497  ir = 1
1498  IF( lwork.GE.wrkbl+lda*n ) THEN
1499 *
1500 * WORK(IR) is LDA by N
1501 *
1502  ldwrkr = lda
1503  ELSE
1504 *
1505 * WORK(IR) is N by N
1506 *
1507  ldwrkr = n
1508  END IF
1509  itau = ir + ldwrkr*n
1510  iwork = itau + n
1511 *
1512 * Compute A=Q*R, copying result to U
1513 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1514 *
1515  CALL dgeqrf( m, n, a, lda, work( itau ),
1516  \$ work( iwork ), lwork-iwork+1, ierr )
1517  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1518 *
1519 * Copy R to WORK(IR), zeroing out below it
1520 *
1521  CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1522  \$ ldwrkr )
1523  CALL dlaset( 'L', n-1, n-1, zero, zero,
1524  \$ work( ir+1 ), ldwrkr )
1525 *
1526 * Generate Q in U
1527 * (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1528 *
1529  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1530  \$ work( iwork ), lwork-iwork+1, ierr )
1531  ie = itau
1532  itauq = ie + n
1533  itaup = itauq + n
1534  iwork = itaup + n
1535 *
1536 * Bidiagonalize R in WORK(IR)
1537 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1538 *
1539  CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1540  \$ work( ie ), work( itauq ),
1541  \$ work( itaup ), work( iwork ),
1542  \$ lwork-iwork+1, ierr )
1543 *
1544 * Generate left bidiagonalizing vectors in WORK(IR)
1545 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1546 *
1547  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1548  \$ work( itauq ), work( iwork ),
1549  \$ lwork-iwork+1, ierr )
1550  iwork = ie + n
1551 *
1552 * Perform bidiagonal QR iteration, computing left
1553 * singular vectors of R in WORK(IR)
1554 * (Workspace: need N*N + BDSPAC)
1555 *
1556  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1557  \$ 1, work( ir ), ldwrkr, dum, 1,
1558  \$ work( iwork ), info )
1559 *
1560 * Multiply Q in U by left singular vectors of R in
1561 * WORK(IR), storing result in A
1562 * (Workspace: need N*N)
1563 *
1564  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1565  \$ work( ir ), ldwrkr, zero, a, lda )
1566 *
1567 * Copy left singular vectors of A from A to U
1568 *
1569  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1570 *
1571  ELSE
1572 *
1573 * Insufficient workspace for a fast algorithm
1574 *
1575  itau = 1
1576  iwork = itau + n
1577 *
1578 * Compute A=Q*R, copying result to U
1579 * (Workspace: need 2*N, prefer N + N*NB)
1580 *
1581  CALL dgeqrf( m, n, a, lda, work( itau ),
1582  \$ work( iwork ), lwork-iwork+1, ierr )
1583  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1584 *
1585 * Generate Q in U
1586 * (Workspace: need N + M, prefer N + M*NB)
1587 *
1588  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1589  \$ work( iwork ), lwork-iwork+1, ierr )
1590  ie = itau
1591  itauq = ie + n
1592  itaup = itauq + n
1593  iwork = itaup + n
1594 *
1595 * Zero out below R in A
1596 *
1597  IF( n .GT. 1 ) THEN
1598  CALL dlaset( 'L', n-1, n-1, zero, zero,
1599  \$ a( 2, 1 ), lda )
1600  END IF
1601 *
1602 * Bidiagonalize R in A
1603 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1604 *
1605  CALL dgebrd( n, n, a, lda, s, work( ie ),
1606  \$ work( itauq ), work( itaup ),
1607  \$ work( iwork ), lwork-iwork+1, ierr )
1608 *
1609 * Multiply Q in U by left bidiagonalizing vectors
1610 * in A
1611 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1612 *
1613  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1614  \$ work( itauq ), u, ldu, work( iwork ),
1615  \$ lwork-iwork+1, ierr )
1616  iwork = ie + n
1617 *
1618 * Perform bidiagonal QR iteration, computing left
1619 * singular vectors of A in U
1620 * (Workspace: need BDSPAC)
1621 *
1622  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1623  \$ 1, u, ldu, dum, 1, work( iwork ),
1624  \$ info )
1625 *
1626  END IF
1627 *
1628  ELSE IF( wntvo ) THEN
1629 *
1630 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1631 * M left singular vectors to be computed in U and
1632 * N right singular vectors to be overwritten on A
1633 *
1634  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1635 *
1636 * Sufficient workspace for a fast algorithm
1637 *
1638  iu = 1
1639  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1640 *
1641 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1642 *
1643  ldwrku = lda
1644  ir = iu + ldwrku*n
1645  ldwrkr = lda
1646  ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1647 *
1648 * WORK(IU) is LDA by N and WORK(IR) is N by N
1649 *
1650  ldwrku = lda
1651  ir = iu + ldwrku*n
1652  ldwrkr = n
1653  ELSE
1654 *
1655 * WORK(IU) is N by N and WORK(IR) is N by N
1656 *
1657  ldwrku = n
1658  ir = iu + ldwrku*n
1659  ldwrkr = n
1660  END IF
1661  itau = ir + ldwrkr*n
1662  iwork = itau + n
1663 *
1664 * Compute A=Q*R, copying result to U
1665 * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1666 *
1667  CALL dgeqrf( m, n, a, lda, work( itau ),
1668  \$ work( iwork ), lwork-iwork+1, ierr )
1669  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1670 *
1671 * Generate Q in U
1672 * (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1673 *
1674  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1675  \$ work( iwork ), lwork-iwork+1, ierr )
1676 *
1677 * Copy R to WORK(IU), zeroing out below it
1678 *
1679  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1680  \$ ldwrku )
1681  CALL dlaset( 'L', n-1, n-1, zero, zero,
1682  \$ work( iu+1 ), ldwrku )
1683  ie = itau
1684  itauq = ie + n
1685  itaup = itauq + n
1686  iwork = itaup + n
1687 *
1688 * Bidiagonalize R in WORK(IU), copying result to
1689 * WORK(IR)
1690 * (Workspace: need 2*N*N + 4*N,
1691 * prefer 2*N*N+3*N+2*N*NB)
1692 *
1693  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1694  \$ work( ie ), work( itauq ),
1695  \$ work( itaup ), work( iwork ),
1696  \$ lwork-iwork+1, ierr )
1697  CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1698  \$ work( ir ), ldwrkr )
1699 *
1700 * Generate left bidiagonalizing vectors in WORK(IU)
1701 * (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1702 *
1703  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1704  \$ work( itauq ), work( iwork ),
1705  \$ lwork-iwork+1, ierr )
1706 *
1707 * Generate right bidiagonalizing vectors in WORK(IR)
1708 * (Workspace: need 2*N*N + 4*N-1,
1709 * prefer 2*N*N+3*N+(N-1)*NB)
1710 *
1711  CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1712  \$ work( itaup ), work( iwork ),
1713  \$ lwork-iwork+1, ierr )
1714  iwork = ie + n
1715 *
1716 * Perform bidiagonal QR iteration, computing left
1717 * singular vectors of R in WORK(IU) and computing
1718 * right singular vectors of R in WORK(IR)
1719 * (Workspace: need 2*N*N + BDSPAC)
1720 *
1721  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1722  \$ work( ir ), ldwrkr, work( iu ),
1723  \$ ldwrku, dum, 1, work( iwork ), info )
1724 *
1725 * Multiply Q in U by left singular vectors of R in
1726 * WORK(IU), storing result in A
1727 * (Workspace: need N*N)
1728 *
1729  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1730  \$ work( iu ), ldwrku, zero, a, lda )
1731 *
1732 * Copy left singular vectors of A from A to U
1733 *
1734  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1735 *
1736 * Copy right singular vectors of R from WORK(IR) to A
1737 *
1738  CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1739  \$ lda )
1740 *
1741  ELSE
1742 *
1743 * Insufficient workspace for a fast algorithm
1744 *
1745  itau = 1
1746  iwork = itau + n
1747 *
1748 * Compute A=Q*R, copying result to U
1749 * (Workspace: need 2*N, prefer N + N*NB)
1750 *
1751  CALL dgeqrf( m, n, a, lda, work( itau ),
1752  \$ work( iwork ), lwork-iwork+1, ierr )
1753  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1754 *
1755 * Generate Q in U
1756 * (Workspace: need N + M, prefer N + M*NB)
1757 *
1758  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1759  \$ work( iwork ), lwork-iwork+1, ierr )
1760  ie = itau
1761  itauq = ie + n
1762  itaup = itauq + n
1763  iwork = itaup + n
1764 *
1765 * Zero out below R in A
1766 *
1767  IF( n .GT. 1 ) THEN
1768  CALL dlaset( 'L', n-1, n-1, zero, zero,
1769  \$ a( 2, 1 ), lda )
1770  END IF
1771 *
1772 * Bidiagonalize R in A
1773 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1774 *
1775  CALL dgebrd( n, n, a, lda, s, work( ie ),
1776  \$ work( itauq ), work( itaup ),
1777  \$ work( iwork ), lwork-iwork+1, ierr )
1778 *
1779 * Multiply Q in U by left bidiagonalizing vectors
1780 * in A
1781 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1782 *
1783  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1784  \$ work( itauq ), u, ldu, work( iwork ),
1785  \$ lwork-iwork+1, ierr )
1786 *
1787 * Generate right bidiagonalizing vectors in A
1788 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1789 *
1790  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1791  \$ work( iwork ), lwork-iwork+1, ierr )
1792  iwork = ie + n
1793 *
1794 * Perform bidiagonal QR iteration, computing left
1795 * singular vectors of A in U and computing right
1796 * singular vectors of A in A
1797 * (Workspace: need BDSPAC)
1798 *
1799  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1800  \$ lda, u, ldu, dum, 1, work( iwork ),
1801  \$ info )
1802 *
1803  END IF
1804 *
1805  ELSE IF( wntvas ) THEN
1806 *
1807 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1808 * or 'A')
1809 * M left singular vectors to be computed in U and
1810 * N right singular vectors to be computed in VT
1811 *
1812  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1813 *
1814 * Sufficient workspace for a fast algorithm
1815 *
1816  iu = 1
1817  IF( lwork.GE.wrkbl+lda*n ) THEN
1818 *
1819 * WORK(IU) is LDA by N
1820 *
1821  ldwrku = lda
1822  ELSE
1823 *
1824 * WORK(IU) is N by N
1825 *
1826  ldwrku = n
1827  END IF
1828  itau = iu + ldwrku*n
1829  iwork = itau + n
1830 *
1831 * Compute A=Q*R, copying result to U
1832 * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1833 *
1834  CALL dgeqrf( m, n, a, lda, work( itau ),
1835  \$ work( iwork ), lwork-iwork+1, ierr )
1836  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1837 *
1838 * Generate Q in U
1839 * (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1840 *
1841  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1842  \$ work( iwork ), lwork-iwork+1, ierr )
1843 *
1844 * Copy R to WORK(IU), zeroing out below it
1845 *
1846  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1847  \$ ldwrku )
1848  CALL dlaset( 'L', n-1, n-1, zero, zero,
1849  \$ work( iu+1 ), ldwrku )
1850  ie = itau
1851  itauq = ie + n
1852  itaup = itauq + n
1853  iwork = itaup + n
1854 *
1855 * Bidiagonalize R in WORK(IU), copying result to VT
1856 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1857 *
1858  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1859  \$ work( ie ), work( itauq ),
1860  \$ work( itaup ), work( iwork ),
1861  \$ lwork-iwork+1, ierr )
1862  CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1863  \$ ldvt )
1864 *
1865 * Generate left bidiagonalizing vectors in WORK(IU)
1866 * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1867 *
1868  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1869  \$ work( itauq ), work( iwork ),
1870  \$ lwork-iwork+1, ierr )
1871 *
1872 * Generate right bidiagonalizing vectors in VT
1873 * (Workspace: need N*N + 4*N-1,
1874 * prefer N*N+3*N+(N-1)*NB)
1875 *
1876  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1877  \$ work( iwork ), lwork-iwork+1, ierr )
1878  iwork = ie + n
1879 *
1880 * Perform bidiagonal QR iteration, computing left
1881 * singular vectors of R in WORK(IU) and computing
1882 * right singular vectors of R in VT
1883 * (Workspace: need N*N + BDSPAC)
1884 *
1885  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1886  \$ ldvt, work( iu ), ldwrku, dum, 1,
1887  \$ work( iwork ), info )
1888 *
1889 * Multiply Q in U by left singular vectors of R in
1890 * WORK(IU), storing result in A
1891 * (Workspace: need N*N)
1892 *
1893  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1894  \$ work( iu ), ldwrku, zero, a, lda )
1895 *
1896 * Copy left singular vectors of A from A to U
1897 *
1898  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1899 *
1900  ELSE
1901 *
1902 * Insufficient workspace for a fast algorithm
1903 *
1904  itau = 1
1905  iwork = itau + n
1906 *
1907 * Compute A=Q*R, copying result to U
1908 * (Workspace: need 2*N, prefer N + N*NB)
1909 *
1910  CALL dgeqrf( m, n, a, lda, work( itau ),
1911  \$ work( iwork ), lwork-iwork+1, ierr )
1912  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1913 *
1914 * Generate Q in U
1915 * (Workspace: need N + M, prefer N + M*NB)
1916 *
1917  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1918  \$ work( iwork ), lwork-iwork+1, ierr )
1919 *
1920 * Copy R from A to VT, zeroing out below it
1921 *
1922  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1923  IF( n.GT.1 )
1924  \$ CALL dlaset( 'L', n-1, n-1, zero, zero,
1925  \$ vt( 2, 1 ), ldvt )
1926  ie = itau
1927  itauq = ie + n
1928  itaup = itauq + n
1929  iwork = itaup + n
1930 *
1931 * Bidiagonalize R in VT
1932 * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1933 *
1934  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1935  \$ work( itauq ), work( itaup ),
1936  \$ work( iwork ), lwork-iwork+1, ierr )
1937 *
1938 * Multiply Q in U by left bidiagonalizing vectors
1939 * in VT
1940 * (Workspace: need 3*N + M, prefer 3*N + M*NB)
1941 *
1942  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1943  \$ work( itauq ), u, ldu, work( iwork ),
1944  \$ lwork-iwork+1, ierr )
1945 *
1946 * Generate right bidiagonalizing vectors in VT
1947 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1948 *
1949  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1950  \$ work( iwork ), lwork-iwork+1, ierr )
1951  iwork = ie + n
1952 *
1953 * Perform bidiagonal QR iteration, computing left
1954 * singular vectors of A in U and computing right
1955 * singular vectors of A in VT
1956 * (Workspace: need BDSPAC)
1957 *
1958  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1959  \$ ldvt, u, ldu, dum, 1, work( iwork ),
1960  \$ info )
1961 *
1962  END IF
1963 *
1964  END IF
1965 *
1966  END IF
1967 *
1968  ELSE
1969 *
1970 * M .LT. MNTHR
1971 *
1972 * Path 10 (M at least N, but not much larger)
1973 * Reduce to bidiagonal form without QR decomposition
1974 *
1975  ie = 1
1976  itauq = ie + n
1977  itaup = itauq + n
1978  iwork = itaup + n
1979 *
1980 * Bidiagonalize A
1981 * (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1982 *
1983  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1984  \$ work( itaup ), work( iwork ), lwork-iwork+1,
1985  \$ ierr )
1986  IF( wntuas ) THEN
1987 *
1988 * If left singular vectors desired in U, copy result to U
1989 * and generate left bidiagonalizing vectors in U
1990 * (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1991 *
1992  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1993  IF( wntus )
1994  \$ ncu = n
1995  IF( wntua )
1996  \$ ncu = m
1997  CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1998  \$ work( iwork ), lwork-iwork+1, ierr )
1999  END IF
2000  IF( wntvas ) THEN
2001 *
2002 * If right singular vectors desired in VT, copy result to
2003 * VT and generate right bidiagonalizing vectors in VT
2004 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2005 *
2006  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
2007  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2008  \$ work( iwork ), lwork-iwork+1, ierr )
2009  END IF
2010  IF( wntuo ) THEN
2011 *
2012 * If left singular vectors desired in A, generate left
2013 * bidiagonalizing vectors in A
2014 * (Workspace: need 4*N, prefer 3*N + N*NB)
2015 *
2016  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2017  \$ work( iwork ), lwork-iwork+1, ierr )
2018  END IF
2019  IF( wntvo ) THEN
2020 *
2021 * If right singular vectors desired in A, generate right
2022 * bidiagonalizing vectors in A
2023 * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2024 *
2025  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2026  \$ work( iwork ), lwork-iwork+1, ierr )
2027  END IF
2028  iwork = ie + n
2029  IF( wntuas .OR. wntuo )
2030  \$ nru = m
2031  IF( wntun )
2032  \$ nru = 0
2033  IF( wntvas .OR. wntvo )
2034  \$ ncvt = n
2035  IF( wntvn )
2036  \$ ncvt = 0
2037  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2038 *
2039 * Perform bidiagonal QR iteration, if desired, computing
2040 * left singular vectors in U and computing right singular
2041 * vectors in VT
2042 * (Workspace: need BDSPAC)
2043 *
2044  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2045  \$ ldvt, u, ldu, dum, 1, work( iwork ), info )
2046  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2047 *
2048 * Perform bidiagonal QR iteration, if desired, computing
2049 * left singular vectors in U and computing right singular
2050 * vectors in A
2051 * (Workspace: need BDSPAC)
2052 *
2053  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2054  \$ u, ldu, dum, 1, work( iwork ), info )
2055  ELSE
2056 *
2057 * Perform bidiagonal QR iteration, if desired, computing
2058 * left singular vectors in A and computing right singular
2059 * vectors in VT
2060 * (Workspace: need BDSPAC)
2061 *
2062  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2063  \$ ldvt, a, lda, dum, 1, work( iwork ), info )
2064  END IF
2065 *
2066  END IF
2067 *
2068  ELSE
2069 *
2070 * A has more columns than rows. If A has sufficiently more
2071 * columns than rows, first reduce using the LQ decomposition (if
2072 * sufficient workspace available)
2073 *
2074  IF( n.GE.mnthr ) THEN
2075 *
2076  IF( wntvn ) THEN
2077 *
2078 * Path 1t(N much larger than M, JOBVT='N')
2079 * No right singular vectors to be computed
2080 *
2081  itau = 1
2082  iwork = itau + m
2083 *
2084 * Compute A=L*Q
2085 * (Workspace: need 2*M, prefer M + M*NB)
2086 *
2087  CALL dgelqf( m, n, a, lda, work( itau ), work( iwork ),
2088  \$ lwork-iwork+1, ierr )
2089 *
2090 * Zero out above L
2091 *
2092  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2093  ie = 1
2094  itauq = ie + m
2095  itaup = itauq + m
2096  iwork = itaup + m
2097 *
2098 * Bidiagonalize L in A
2099 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2100 *
2101  CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2102  \$ work( itaup ), work( iwork ), lwork-iwork+1,
2103  \$ ierr )
2104  IF( wntuo .OR. wntuas ) THEN
2105 *
2106 * If left singular vectors desired, generate Q
2107 * (Workspace: need 4*M, prefer 3*M + M*NB)
2108 *
2109  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2110  \$ work( iwork ), lwork-iwork+1, ierr )
2111  END IF
2112  iwork = ie + m
2113  nru = 0
2114  IF( wntuo .OR. wntuas )
2115  \$ nru = m
2116 *
2117 * Perform bidiagonal QR iteration, computing left singular
2118 * vectors of A in A if desired
2119 * (Workspace: need BDSPAC)
2120 *
2121  CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2122  \$ lda, dum, 1, work( iwork ), info )
2123 *
2124 * If left singular vectors desired in U, copy them there
2125 *
2126  IF( wntuas )
2127  \$ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2128 *
2129  ELSE IF( wntvo .AND. wntun ) THEN
2130 *
2131 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2132 * M right singular vectors to be overwritten on A and
2133 * no left singular vectors to be computed
2134 *
2135  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2136 *
2137 * Sufficient workspace for a fast algorithm
2138 *
2139  ir = 1
2140  IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2141 *
2142 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2143 *
2144  ldwrku = lda
2145  chunk = n
2146  ldwrkr = lda
2147  ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2148 *
2149 * WORK(IU) is LDA by N and WORK(IR) is M by M
2150 *
2151  ldwrku = lda
2152  chunk = n
2153  ldwrkr = m
2154  ELSE
2155 *
2156 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2157 *
2158  ldwrku = m
2159  chunk = ( lwork-m*m-m ) / m
2160  ldwrkr = m
2161  END IF
2162  itau = ir + ldwrkr*m
2163  iwork = itau + m
2164 *
2165 * Compute A=L*Q
2166 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2167 *
2168  CALL dgelqf( m, n, a, lda, work( itau ),
2169  \$ work( iwork ), lwork-iwork+1, ierr )
2170 *
2171 * Copy L to WORK(IR) and zero out above it
2172 *
2173  CALL dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2174  CALL dlaset( 'U', m-1, m-1, zero, zero,
2175  \$ work( ir+ldwrkr ), ldwrkr )
2176 *
2177 * Generate Q in A
2178 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2179 *
2180  CALL dorglq( m, n, m, a, lda, work( itau ),
2181  \$ work( iwork ), lwork-iwork+1, ierr )
2182  ie = itau
2183  itauq = ie + m
2184  itaup = itauq + m
2185  iwork = itaup + m
2186 *
2187 * Bidiagonalize L in WORK(IR)
2188 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2189 *
2190  CALL dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2191  \$ work( itauq ), work( itaup ),
2192  \$ work( iwork ), lwork-iwork+1, ierr )
2193 *
2194 * Generate right vectors bidiagonalizing L
2195 * (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2196 *
2197  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2198  \$ work( itaup ), work( iwork ),
2199  \$ lwork-iwork+1, ierr )
2200  iwork = ie + m
2201 *
2202 * Perform bidiagonal QR iteration, computing right
2203 * singular vectors of L in WORK(IR)
2204 * (Workspace: need M*M + BDSPAC)
2205 *
2206  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2207  \$ work( ir ), ldwrkr, dum, 1, dum, 1,
2208  \$ work( iwork ), info )
2209  iu = ie + m
2210 *
2211 * Multiply right singular vectors of L in WORK(IR) by Q
2212 * in A, storing result in WORK(IU) and copying to A
2213 * (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2214 *
2215  DO 30 i = 1, n, chunk
2216  blk = min( n-i+1, chunk )
2217  CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2218  \$ ldwrkr, a( 1, i ), lda, zero,
2219  \$ work( iu ), ldwrku )
2220  CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2221  \$ a( 1, i ), lda )
2222  30 CONTINUE
2223 *
2224  ELSE
2225 *
2226 * Insufficient workspace for a fast algorithm
2227 *
2228  ie = 1
2229  itauq = ie + m
2230  itaup = itauq + m
2231  iwork = itaup + m
2232 *
2233 * Bidiagonalize A
2234 * (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2235 *
2236  CALL dgebrd( m, n, a, lda, s, work( ie ),
2237  \$ work( itauq ), work( itaup ),
2238  \$ work( iwork ), lwork-iwork+1, ierr )
2239 *
2240 * Generate right vectors bidiagonalizing A
2241 * (Workspace: need 4*M, prefer 3*M + M*NB)
2242 *
2243  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2244  \$ work( iwork ), lwork-iwork+1, ierr )
2245  iwork = ie + m
2246 *
2247 * Perform bidiagonal QR iteration, computing right
2248 * singular vectors of A in A
2249 * (Workspace: need BDSPAC)
2250 *
2251  CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2252  \$ dum, 1, dum, 1, work( iwork ), info )
2253 *
2254  END IF
2255 *
2256  ELSE IF( wntvo .AND. wntuas ) THEN
2257 *
2258 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2259 * M right singular vectors to be overwritten on A and
2260 * M left singular vectors to be computed in U
2261 *
2262  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2263 *
2264 * Sufficient workspace for a fast algorithm
2265 *
2266  ir = 1
2267  IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2268 *
2269 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2270 *
2271  ldwrku = lda
2272  chunk = n
2273  ldwrkr = lda
2274  ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2275 *
2276 * WORK(IU) is LDA by N and WORK(IR) is M by M
2277 *
2278  ldwrku = lda
2279  chunk = n
2280  ldwrkr = m
2281  ELSE
2282 *
2283 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2284 *
2285  ldwrku = m
2286  chunk = ( lwork-m*m-m ) / m
2287  ldwrkr = m
2288  END IF
2289  itau = ir + ldwrkr*m
2290  iwork = itau + m
2291 *
2292 * Compute A=L*Q
2293 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2294 *
2295  CALL dgelqf( m, n, a, lda, work( itau ),
2296  \$ work( iwork ), lwork-iwork+1, ierr )
2297 *
2298 * Copy L to U, zeroing about above it
2299 *
2300  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2301  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2302  \$ ldu )
2303 *
2304 * Generate Q in A
2305 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2306 *
2307  CALL dorglq( m, n, m, a, lda, work( itau ),
2308  \$ work( iwork ), lwork-iwork+1, ierr )
2309  ie = itau
2310  itauq = ie + m
2311  itaup = itauq + m
2312  iwork = itaup + m
2313 *
2314 * Bidiagonalize L in U, copying result to WORK(IR)
2315 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2316 *
2317  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2318  \$ work( itauq ), work( itaup ),
2319  \$ work( iwork ), lwork-iwork+1, ierr )
2320  CALL dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2321 *
2322 * Generate right vectors bidiagonalizing L in WORK(IR)
2323 * (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2324 *
2325  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2326  \$ work( itaup ), work( iwork ),
2327  \$ lwork-iwork+1, ierr )
2328 *
2329 * Generate left vectors bidiagonalizing L in U
2330 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2331 *
2332  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2333  \$ work( iwork ), lwork-iwork+1, ierr )
2334  iwork = ie + m
2335 *
2336 * Perform bidiagonal QR iteration, computing left
2337 * singular vectors of L in U, and computing right
2338 * singular vectors of L in WORK(IR)
2339 * (Workspace: need M*M + BDSPAC)
2340 *
2341  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2342  \$ work( ir ), ldwrkr, u, ldu, dum, 1,
2343  \$ work( iwork ), info )
2344  iu = ie + m
2345 *
2346 * Multiply right singular vectors of L in WORK(IR) by Q
2347 * in A, storing result in WORK(IU) and copying to A
2348 * (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2349 *
2350  DO 40 i = 1, n, chunk
2351  blk = min( n-i+1, chunk )
2352  CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2353  \$ ldwrkr, a( 1, i ), lda, zero,
2354  \$ work( iu ), ldwrku )
2355  CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2356  \$ a( 1, i ), lda )
2357  40 CONTINUE
2358 *
2359  ELSE
2360 *
2361 * Insufficient workspace for a fast algorithm
2362 *
2363  itau = 1
2364  iwork = itau + m
2365 *
2366 * Compute A=L*Q
2367 * (Workspace: need 2*M, prefer M + M*NB)
2368 *
2369  CALL dgelqf( m, n, a, lda, work( itau ),
2370  \$ work( iwork ), lwork-iwork+1, ierr )
2371 *
2372 * Copy L to U, zeroing out above it
2373 *
2374  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2375  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2376  \$ ldu )
2377 *
2378 * Generate Q in A
2379 * (Workspace: need 2*M, prefer M + M*NB)
2380 *
2381  CALL dorglq( m, n, m, a, lda, work( itau ),
2382  \$ work( iwork ), lwork-iwork+1, ierr )
2383  ie = itau
2384  itauq = ie + m
2385  itaup = itauq + m
2386  iwork = itaup + m
2387 *
2388 * Bidiagonalize L in U
2389 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2390 *
2391  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2392  \$ work( itauq ), work( itaup ),
2393  \$ work( iwork ), lwork-iwork+1, ierr )
2394 *
2395 * Multiply right vectors bidiagonalizing L by Q in A
2396 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2397 *
2398  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2399  \$ work( itaup ), a, lda, work( iwork ),
2400  \$ lwork-iwork+1, ierr )
2401 *
2402 * Generate left vectors bidiagonalizing L in U
2403 * (Workspace: need 4*M, prefer 3*M + M*NB)
2404 *
2405  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2406  \$ work( iwork ), lwork-iwork+1, ierr )
2407  iwork = ie + m
2408 *
2409 * Perform bidiagonal QR iteration, computing left
2410 * singular vectors of A in U and computing right
2411 * singular vectors of A in A
2412 * (Workspace: need BDSPAC)
2413 *
2414  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2415  \$ u, ldu, dum, 1, work( iwork ), info )
2416 *
2417  END IF
2418 *
2419  ELSE IF( wntvs ) THEN
2420 *
2421  IF( wntun ) THEN
2422 *
2423 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2424 * M right singular vectors to be computed in VT and
2425 * no left singular vectors to be computed
2426 *
2427  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2428 *
2429 * Sufficient workspace for a fast algorithm
2430 *
2431  ir = 1
2432  IF( lwork.GE.wrkbl+lda*m ) THEN
2433 *
2434 * WORK(IR) is LDA by M
2435 *
2436  ldwrkr = lda
2437  ELSE
2438 *
2439 * WORK(IR) is M by M
2440 *
2441  ldwrkr = m
2442  END IF
2443  itau = ir + ldwrkr*m
2444  iwork = itau + m
2445 *
2446 * Compute A=L*Q
2447 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2448 *
2449  CALL dgelqf( m, n, a, lda, work( itau ),
2450  \$ work( iwork ), lwork-iwork+1, ierr )
2451 *
2452 * Copy L to WORK(IR), zeroing out above it
2453 *
2454  CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2455  \$ ldwrkr )
2456  CALL dlaset( 'U', m-1, m-1, zero, zero,
2457  \$ work( ir+ldwrkr ), ldwrkr )
2458 *
2459 * Generate Q in A
2460 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2461 *
2462  CALL dorglq( m, n, m, a, lda, work( itau ),
2463  \$ work( iwork ), lwork-iwork+1, ierr )
2464  ie = itau
2465  itauq = ie + m
2466  itaup = itauq + m
2467  iwork = itaup + m
2468 *
2469 * Bidiagonalize L in WORK(IR)
2470 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2471 *
2472  CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2473  \$ work( ie ), work( itauq ),
2474  \$ work( itaup ), work( iwork ),
2475  \$ lwork-iwork+1, ierr )
2476 *
2477 * Generate right vectors bidiagonalizing L in
2478 * WORK(IR)
2479 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2480 *
2481  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2482  \$ work( itaup ), work( iwork ),
2483  \$ lwork-iwork+1, ierr )
2484  iwork = ie + m
2485 *
2486 * Perform bidiagonal QR iteration, computing right
2487 * singular vectors of L in WORK(IR)
2488 * (Workspace: need M*M + BDSPAC)
2489 *
2490  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2491  \$ work( ir ), ldwrkr, dum, 1, dum, 1,
2492  \$ work( iwork ), info )
2493 *
2494 * Multiply right singular vectors of L in WORK(IR) by
2495 * Q in A, storing result in VT
2496 * (Workspace: need M*M)
2497 *
2498  CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2499  \$ ldwrkr, a, lda, zero, vt, ldvt )
2500 *
2501  ELSE
2502 *
2503 * Insufficient workspace for a fast algorithm
2504 *
2505  itau = 1
2506  iwork = itau + m
2507 *
2508 * Compute A=L*Q
2509 * (Workspace: need 2*M, prefer M + M*NB)
2510 *
2511  CALL dgelqf( m, n, a, lda, work( itau ),
2512  \$ work( iwork ), lwork-iwork+1, ierr )
2513 *
2514 * Copy result to VT
2515 *
2516  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2517 *
2518 * Generate Q in VT
2519 * (Workspace: need 2*M, prefer M + M*NB)
2520 *
2521  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2522  \$ work( iwork ), lwork-iwork+1, ierr )
2523  ie = itau
2524  itauq = ie + m
2525  itaup = itauq + m
2526  iwork = itaup + m
2527 *
2528 * Zero out above L in A
2529 *
2530  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2531  \$ lda )
2532 *
2533 * Bidiagonalize L in A
2534 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2535 *
2536  CALL dgebrd( m, m, a, lda, s, work( ie ),
2537  \$ work( itauq ), work( itaup ),
2538  \$ work( iwork ), lwork-iwork+1, ierr )
2539 *
2540 * Multiply right vectors bidiagonalizing L by Q in VT
2541 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2542 *
2543  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2544  \$ work( itaup ), vt, ldvt,
2545  \$ work( iwork ), lwork-iwork+1, ierr )
2546  iwork = ie + m
2547 *
2548 * Perform bidiagonal QR iteration, computing right
2549 * singular vectors of A in VT
2550 * (Workspace: need BDSPAC)
2551 *
2552  CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2553  \$ ldvt, dum, 1, dum, 1, work( iwork ),
2554  \$ info )
2555 *
2556  END IF
2557 *
2558  ELSE IF( wntuo ) THEN
2559 *
2560 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2561 * M right singular vectors to be computed in VT and
2562 * M left singular vectors to be overwritten on A
2563 *
2564  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2565 *
2566 * Sufficient workspace for a fast algorithm
2567 *
2568  iu = 1
2569  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2570 *
2571 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2572 *
2573  ldwrku = lda
2574  ir = iu + ldwrku*m
2575  ldwrkr = lda
2576  ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
2577 *
2578 * WORK(IU) is LDA by M and WORK(IR) is M by M
2579 *
2580  ldwrku = lda
2581  ir = iu + ldwrku*m
2582  ldwrkr = m
2583  ELSE
2584 *
2585 * WORK(IU) is M by M and WORK(IR) is M by M
2586 *
2587  ldwrku = m
2588  ir = iu + ldwrku*m
2589  ldwrkr = m
2590  END IF
2591  itau = ir + ldwrkr*m
2592  iwork = itau + m
2593 *
2594 * Compute A=L*Q
2595 * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2596 *
2597  CALL dgelqf( m, n, a, lda, work( itau ),
2598  \$ work( iwork ), lwork-iwork+1, ierr )
2599 *
2600 * Copy L to WORK(IU), zeroing out below it
2601 *
2602  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2603  \$ ldwrku )
2604  CALL dlaset( 'U', m-1, m-1, zero, zero,
2605  \$ work( iu+ldwrku ), ldwrku )
2606 *
2607 * Generate Q in A
2608 * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2609 *
2610  CALL dorglq( m, n, m, a, lda, work( itau ),
2611  \$ work( iwork ), lwork-iwork+1, ierr )
2612  ie = itau
2613  itauq = ie + m
2614  itaup = itauq + m
2615  iwork = itaup + m
2616 *
2617 * Bidiagonalize L in WORK(IU), copying result to
2618 * WORK(IR)
2619 * (Workspace: need 2*M*M + 4*M,
2620 * prefer 2*M*M+3*M+2*M*NB)
2621 *
2622  CALL dgebrd( m, m, work( iu ), ldwrku, s,
2623  \$ work( ie ), work( itauq ),
2624  \$ work( itaup ), work( iwork ),
2625  \$ lwork-iwork+1, ierr )
2626  CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2627  \$ work( ir ), ldwrkr )
2628 *
2629 * Generate right bidiagonalizing vectors in WORK(IU)
2630 * (Workspace: need 2*M*M + 4*M-1,
2631 * prefer 2*M*M+3*M+(M-1)*NB)
2632 *
2633  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2634  \$ work( itaup ), work( iwork ),
2635  \$ lwork-iwork+1, ierr )
2636 *
2637 * Generate left bidiagonalizing vectors in WORK(IR)
2638 * (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2639 *
2640  CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2641  \$ work( itauq ), work( iwork ),
2642  \$ lwork-iwork+1, ierr )
2643  iwork = ie + m
2644 *
2645 * Perform bidiagonal QR iteration, computing left
2646 * singular vectors of L in WORK(IR) and computing
2647 * right singular vectors of L in WORK(IU)
2648 * (Workspace: need 2*M*M + BDSPAC)
2649 *
2650  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2651  \$ work( iu ), ldwrku, work( ir ),
2652  \$ ldwrkr, dum, 1, work( iwork ), info )
2653 *
2654 * Multiply right singular vectors of L in WORK(IU) by
2655 * Q in A, storing result in VT
2656 * (Workspace: need M*M)
2657 *
2658  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2659  \$ ldwrku, a, lda, zero, vt, ldvt )
2660 *
2661 * Copy left singular vectors of L to A
2662 * (Workspace: need M*M)
2663 *
2664  CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2665  \$ lda )
2666 *
2667  ELSE
2668 *
2669 * Insufficient workspace for a fast algorithm
2670 *
2671  itau = 1
2672  iwork = itau + m
2673 *
2674 * Compute A=L*Q, copying result to VT
2675 * (Workspace: need 2*M, prefer M + M*NB)
2676 *
2677  CALL dgelqf( m, n, a, lda, work( itau ),
2678  \$ work( iwork ), lwork-iwork+1, ierr )
2679  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2680 *
2681 * Generate Q in VT
2682 * (Workspace: need 2*M, prefer M + M*NB)
2683 *
2684  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2685  \$ work( iwork ), lwork-iwork+1, ierr )
2686  ie = itau
2687  itauq = ie + m
2688  itaup = itauq + m
2689  iwork = itaup + m
2690 *
2691 * Zero out above L in A
2692 *
2693  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2694  \$ lda )
2695 *
2696 * Bidiagonalize L in A
2697 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2698 *
2699  CALL dgebrd( m, m, a, lda, s, work( ie ),
2700  \$ work( itauq ), work( itaup ),
2701  \$ work( iwork ), lwork-iwork+1, ierr )
2702 *
2703 * Multiply right vectors bidiagonalizing L by Q in VT
2704 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2705 *
2706  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2707  \$ work( itaup ), vt, ldvt,
2708  \$ work( iwork ), lwork-iwork+1, ierr )
2709 *
2710 * Generate left bidiagonalizing vectors of L in A
2711 * (Workspace: need 4*M, prefer 3*M + M*NB)
2712 *
2713  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2714  \$ work( iwork ), lwork-iwork+1, ierr )
2715  iwork = ie + m
2716 *
2717 * Perform bidiagonal QR iteration, compute left
2718 * singular vectors of A in A and compute right
2719 * singular vectors of A in VT
2720 * (Workspace: need BDSPAC)
2721 *
2722  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2723  \$ ldvt, a, lda, dum, 1, work( iwork ),
2724  \$ info )
2725 *
2726  END IF
2727 *
2728  ELSE IF( wntuas ) THEN
2729 *
2730 * Path 6t(N much larger than M, JOBU='S' or 'A',
2731 * JOBVT='S')
2732 * M right singular vectors to be computed in VT and
2733 * M left singular vectors to be computed in U
2734 *
2735  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2736 *
2737 * Sufficient workspace for a fast algorithm
2738 *
2739  iu = 1
2740  IF( lwork.GE.wrkbl+lda*m ) THEN
2741 *
2742 * WORK(IU) is LDA by N
2743 *
2744  ldwrku = lda
2745  ELSE
2746 *
2747 * WORK(IU) is LDA by M
2748 *
2749  ldwrku = m
2750  END IF
2751  itau = iu + ldwrku*m
2752  iwork = itau + m
2753 *
2754 * Compute A=L*Q
2755 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2756 *
2757  CALL dgelqf( m, n, a, lda, work( itau ),
2758  \$ work( iwork ), lwork-iwork+1, ierr )
2759 *
2760 * Copy L to WORK(IU), zeroing out above it
2761 *
2762  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2763  \$ ldwrku )
2764  CALL dlaset( 'U', m-1, m-1, zero, zero,
2765  \$ work( iu+ldwrku ), ldwrku )
2766 *
2767 * Generate Q in A
2768 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2769 *
2770  CALL dorglq( m, n, m, a, lda, work( itau ),
2771  \$ work( iwork ), lwork-iwork+1, ierr )
2772  ie = itau
2773  itauq = ie + m
2774  itaup = itauq + m
2775  iwork = itaup + m
2776 *
2777 * Bidiagonalize L in WORK(IU), copying result to U
2778 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2779 *
2780  CALL dgebrd( m, m, work( iu ), ldwrku, s,
2781  \$ work( ie ), work( itauq ),
2782  \$ work( itaup ), work( iwork ),
2783  \$ lwork-iwork+1, ierr )
2784  CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2785  \$ ldu )
2786 *
2787 * Generate right bidiagonalizing vectors in WORK(IU)
2788 * (Workspace: need M*M + 4*M-1,
2789 * prefer M*M+3*M+(M-1)*NB)
2790 *
2791  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2792  \$ work( itaup ), work( iwork ),
2793  \$ lwork-iwork+1, ierr )
2794 *
2795 * Generate left bidiagonalizing vectors in U
2796 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2797 *
2798  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2799  \$ work( iwork ), lwork-iwork+1, ierr )
2800  iwork = ie + m
2801 *
2802 * Perform bidiagonal QR iteration, computing left
2803 * singular vectors of L in U and computing right
2804 * singular vectors of L in WORK(IU)
2805 * (Workspace: need M*M + BDSPAC)
2806 *
2807  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2808  \$ work( iu ), ldwrku, u, ldu, dum, 1,
2809  \$ work( iwork ), info )
2810 *
2811 * Multiply right singular vectors of L in WORK(IU) by
2812 * Q in A, storing result in VT
2813 * (Workspace: need M*M)
2814 *
2815  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2816  \$ ldwrku, a, lda, zero, vt, ldvt )
2817 *
2818  ELSE
2819 *
2820 * Insufficient workspace for a fast algorithm
2821 *
2822  itau = 1
2823  iwork = itau + m
2824 *
2825 * Compute A=L*Q, copying result to VT
2826 * (Workspace: need 2*M, prefer M + M*NB)
2827 *
2828  CALL dgelqf( m, n, a, lda, work( itau ),
2829  \$ work( iwork ), lwork-iwork+1, ierr )
2830  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2831 *
2832 * Generate Q in VT
2833 * (Workspace: need 2*M, prefer M + M*NB)
2834 *
2835  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2836  \$ work( iwork ), lwork-iwork+1, ierr )
2837 *
2838 * Copy L to U, zeroing out above it
2839 *
2840  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2841  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2842  \$ ldu )
2843  ie = itau
2844  itauq = ie + m
2845  itaup = itauq + m
2846  iwork = itaup + m
2847 *
2848 * Bidiagonalize L in U
2849 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2850 *
2851  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2852  \$ work( itauq ), work( itaup ),
2853  \$ work( iwork ), lwork-iwork+1, ierr )
2854 *
2855 * Multiply right bidiagonalizing vectors in U by Q
2856 * in VT
2857 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
2858 *
2859  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2860  \$ work( itaup ), vt, ldvt,
2861  \$ work( iwork ), lwork-iwork+1, ierr )
2862 *
2863 * Generate left bidiagonalizing vectors in U
2864 * (Workspace: need 4*M, prefer 3*M + M*NB)
2865 *
2866  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2867  \$ work( iwork ), lwork-iwork+1, ierr )
2868  iwork = ie + m
2869 *
2870 * Perform bidiagonal QR iteration, computing left
2871 * singular vectors of A in U and computing right
2872 * singular vectors of A in VT
2873 * (Workspace: need BDSPAC)
2874 *
2875  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2876  \$ ldvt, u, ldu, dum, 1, work( iwork ),
2877  \$ info )
2878 *
2879  END IF
2880 *
2881  END IF
2882 *
2883  ELSE IF( wntva ) THEN
2884 *
2885  IF( wntun ) THEN
2886 *
2887 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2888 * N right singular vectors to be computed in VT and
2889 * no left singular vectors to be computed
2890 *
2891  IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
2892 *
2893 * Sufficient workspace for a fast algorithm
2894 *
2895  ir = 1
2896  IF( lwork.GE.wrkbl+lda*m ) THEN
2897 *
2898 * WORK(IR) is LDA by M
2899 *
2900  ldwrkr = lda
2901  ELSE
2902 *
2903 * WORK(IR) is M by M
2904 *
2905  ldwrkr = m
2906  END IF
2907  itau = ir + ldwrkr*m
2908  iwork = itau + m
2909 *
2910 * Compute A=L*Q, copying result to VT
2911 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2912 *
2913  CALL dgelqf( m, n, a, lda, work( itau ),
2914  \$ work( iwork ), lwork-iwork+1, ierr )
2915  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2916 *
2917 * Copy L to WORK(IR), zeroing out above it
2918 *
2919  CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2920  \$ ldwrkr )
2921  CALL dlaset( 'U', m-1, m-1, zero, zero,
2922  \$ work( ir+ldwrkr ), ldwrkr )
2923 *
2924 * Generate Q in VT
2925 * (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2926 *
2927  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2928  \$ work( iwork ), lwork-iwork+1, ierr )
2929  ie = itau
2930  itauq = ie + m
2931  itaup = itauq + m
2932  iwork = itaup + m
2933 *
2934 * Bidiagonalize L in WORK(IR)
2935 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2936 *
2937  CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2938  \$ work( ie ), work( itauq ),
2939  \$ work( itaup ), work( iwork ),
2940  \$ lwork-iwork+1, ierr )
2941 *
2942 * Generate right bidiagonalizing vectors in WORK(IR)
2943 * (Workspace: need M*M + 4*M-1,
2944 * prefer M*M+3*M+(M-1)*NB)
2945 *
2946  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2947  \$ work( itaup ), work( iwork ),
2948  \$ lwork-iwork+1, ierr )
2949  iwork = ie + m
2950 *
2951 * Perform bidiagonal QR iteration, computing right
2952 * singular vectors of L in WORK(IR)
2953 * (Workspace: need M*M + BDSPAC)
2954 *
2955  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2956  \$ work( ir ), ldwrkr, dum, 1, dum, 1,
2957  \$ work( iwork ), info )
2958 *
2959 * Multiply right singular vectors of L in WORK(IR) by
2960 * Q in VT, storing result in A
2961 * (Workspace: need M*M)
2962 *
2963  CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2964  \$ ldwrkr, vt, ldvt, zero, a, lda )
2965 *
2966 * Copy right singular vectors of A from A to VT
2967 *
2968  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
2969 *
2970  ELSE
2971 *
2972 * Insufficient workspace for a fast algorithm
2973 *
2974  itau = 1
2975  iwork = itau + m
2976 *
2977 * Compute A=L*Q, copying result to VT
2978 * (Workspace: need 2*M, prefer M + M*NB)
2979 *
2980  CALL dgelqf( m, n, a, lda, work( itau ),
2981  \$ work( iwork ), lwork-iwork+1, ierr )
2982  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2983 *
2984 * Generate Q in VT
2985 * (Workspace: need M + N, prefer M + N*NB)
2986 *
2987  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2988  \$ work( iwork ), lwork-iwork+1, ierr )
2989  ie = itau
2990  itauq = ie + m
2991  itaup = itauq + m
2992  iwork = itaup + m
2993 *
2994 * Zero out above L in A
2995 *
2996  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2997  \$ lda )
2998 *
2999 * Bidiagonalize L in A
3000 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3001 *
3002  CALL dgebrd( m, m, a, lda, s, work( ie ),
3003  \$ work( itauq ), work( itaup ),
3004  \$ work( iwork ), lwork-iwork+1, ierr )
3005 *
3006 * Multiply right bidiagonalizing vectors in A by Q
3007 * in VT
3008 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
3009 *
3010  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3011  \$ work( itaup ), vt, ldvt,
3012  \$ work( iwork ), lwork-iwork+1, ierr )
3013  iwork = ie + m
3014 *
3015 * Perform bidiagonal QR iteration, computing right
3016 * singular vectors of A in VT
3017 * (Workspace: need BDSPAC)
3018 *
3019  CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3020  \$ ldvt, dum, 1, dum, 1, work( iwork ),
3021  \$ info )
3022 *
3023  END IF
3024 *
3025  ELSE IF( wntuo ) THEN
3026 *
3027 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3028 * N right singular vectors to be computed in VT and
3029 * M left singular vectors to be overwritten on A
3030 *
3031  IF( lwork.GE.2*m*m+max( n + m, 4*m, bdspac ) ) THEN
3032 *
3033 * Sufficient workspace for a fast algorithm
3034 *
3035  iu = 1
3036  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3037 *
3038 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3039 *
3040  ldwrku = lda
3041  ir = iu + ldwrku*m
3042  ldwrkr = lda
3043  ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
3044 *
3045 * WORK(IU) is LDA by M and WORK(IR) is M by M
3046 *
3047  ldwrku = lda
3048  ir = iu + ldwrku*m
3049  ldwrkr = m
3050  ELSE
3051 *
3052 * WORK(IU) is M by M and WORK(IR) is M by M
3053 *
3054  ldwrku = m
3055  ir = iu + ldwrku*m
3056  ldwrkr = m
3057  END IF
3058  itau = ir + ldwrkr*m
3059  iwork = itau + m
3060 *
3061 * Compute A=L*Q, copying result to VT
3062 * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3063 *
3064  CALL dgelqf( m, n, a, lda, work( itau ),
3065  \$ work( iwork ), lwork-iwork+1, ierr )
3066  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3067 *
3068 * Generate Q in VT
3069 * (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3070 *
3071  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3072  \$ work( iwork ), lwork-iwork+1, ierr )
3073 *
3074 * Copy L to WORK(IU), zeroing out above it
3075 *
3076  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3077  \$ ldwrku )
3078  CALL dlaset( 'U', m-1, m-1, zero, zero,
3079  \$ work( iu+ldwrku ), ldwrku )
3080  ie = itau
3081  itauq = ie + m
3082  itaup = itauq + m
3083  iwork = itaup + m
3084 *
3085 * Bidiagonalize L in WORK(IU), copying result to
3086 * WORK(IR)
3087 * (Workspace: need 2*M*M + 4*M,
3088 * prefer 2*M*M+3*M+2*M*NB)
3089 *
3090  CALL dgebrd( m, m, work( iu ), ldwrku, s,
3091  \$ work( ie ), work( itauq ),
3092  \$ work( itaup ), work( iwork ),
3093  \$ lwork-iwork+1, ierr )
3094  CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3095  \$ work( ir ), ldwrkr )
3096 *
3097 * Generate right bidiagonalizing vectors in WORK(IU)
3098 * (Workspace: need 2*M*M + 4*M-1,
3099 * prefer 2*M*M+3*M+(M-1)*NB)
3100 *
3101  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3102  \$ work( itaup ), work( iwork ),
3103  \$ lwork-iwork+1, ierr )
3104 *
3105 * Generate left bidiagonalizing vectors in WORK(IR)
3106 * (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3107 *
3108  CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3109  \$ work( itauq ), work( iwork ),
3110  \$ lwork-iwork+1, ierr )
3111  iwork = ie + m
3112 *
3113 * Perform bidiagonal QR iteration, computing left
3114 * singular vectors of L in WORK(IR) and computing
3115 * right singular vectors of L in WORK(IU)
3116 * (Workspace: need 2*M*M + BDSPAC)
3117 *
3118  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3119  \$ work( iu ), ldwrku, work( ir ),
3120  \$ ldwrkr, dum, 1, work( iwork ), info )
3121 *
3122 * Multiply right singular vectors of L in WORK(IU) by
3123 * Q in VT, storing result in A
3124 * (Workspace: need M*M)
3125 *
3126  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3127  \$ ldwrku, vt, ldvt, zero, a, lda )
3128 *
3129 * Copy right singular vectors of A from A to VT
3130 *
3131  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3132 *
3133 * Copy left singular vectors of A from WORK(IR) to A
3134 *
3135  CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3136  \$ lda )
3137 *
3138  ELSE
3139 *
3140 * Insufficient workspace for a fast algorithm
3141 *
3142  itau = 1
3143  iwork = itau + m
3144 *
3145 * Compute A=L*Q, copying result to VT
3146 * (Workspace: need 2*M, prefer M + M*NB)
3147 *
3148  CALL dgelqf( m, n, a, lda, work( itau ),
3149  \$ work( iwork ), lwork-iwork+1, ierr )
3150  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3151 *
3152 * Generate Q in VT
3153 * (Workspace: need M + N, prefer M + N*NB)
3154 *
3155  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3156  \$ work( iwork ), lwork-iwork+1, ierr )
3157  ie = itau
3158  itauq = ie + m
3159  itaup = itauq + m
3160  iwork = itaup + m
3161 *
3162 * Zero out above L in A
3163 *
3164  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3165  \$ lda )
3166 *
3167 * Bidiagonalize L in A
3168 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3169 *
3170  CALL dgebrd( m, m, a, lda, s, work( ie ),
3171  \$ work( itauq ), work( itaup ),
3172  \$ work( iwork ), lwork-iwork+1, ierr )
3173 *
3174 * Multiply right bidiagonalizing vectors in A by Q
3175 * in VT
3176 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
3177 *
3178  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3179  \$ work( itaup ), vt, ldvt,
3180  \$ work( iwork ), lwork-iwork+1, ierr )
3181 *
3182 * Generate left bidiagonalizing vectors in A
3183 * (Workspace: need 4*M, prefer 3*M + M*NB)
3184 *
3185  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3186  \$ work( iwork ), lwork-iwork+1, ierr )
3187  iwork = ie + m
3188 *
3189 * Perform bidiagonal QR iteration, computing left
3190 * singular vectors of A in A and computing right
3191 * singular vectors of A in VT
3192 * (Workspace: need BDSPAC)
3193 *
3194  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3195  \$ ldvt, a, lda, dum, 1, work( iwork ),
3196  \$ info )
3197 *
3198  END IF
3199 *
3200  ELSE IF( wntuas ) THEN
3201 *
3202 * Path 9t(N much larger than M, JOBU='S' or 'A',
3203 * JOBVT='A')
3204 * N right singular vectors to be computed in VT and
3205 * M left singular vectors to be computed in U
3206 *
3207  IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
3208 *
3209 * Sufficient workspace for a fast algorithm
3210 *
3211  iu = 1
3212  IF( lwork.GE.wrkbl+lda*m ) THEN
3213 *
3214 * WORK(IU) is LDA by M
3215 *
3216  ldwrku = lda
3217  ELSE
3218 *
3219 * WORK(IU) is M by M
3220 *
3221  ldwrku = m
3222  END IF
3223  itau = iu + ldwrku*m
3224  iwork = itau + m
3225 *
3226 * Compute A=L*Q, copying result to VT
3227 * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3228 *
3229  CALL dgelqf( m, n, a, lda, work( itau ),
3230  \$ work( iwork ), lwork-iwork+1, ierr )
3231  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3232 *
3233 * Generate Q in VT
3234 * (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3235 *
3236  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3237  \$ work( iwork ), lwork-iwork+1, ierr )
3238 *
3239 * Copy L to WORK(IU), zeroing out above it
3240 *
3241  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3242  \$ ldwrku )
3243  CALL dlaset( 'U', m-1, m-1, zero, zero,
3244  \$ work( iu+ldwrku ), ldwrku )
3245  ie = itau
3246  itauq = ie + m
3247  itaup = itauq + m
3248  iwork = itaup + m
3249 *
3250 * Bidiagonalize L in WORK(IU), copying result to U
3251 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3252 *
3253  CALL dgebrd( m, m, work( iu ), ldwrku, s,
3254  \$ work( ie ), work( itauq ),
3255  \$ work( itaup ), work( iwork ),
3256  \$ lwork-iwork+1, ierr )
3257  CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3258  \$ ldu )
3259 *
3260 * Generate right bidiagonalizing vectors in WORK(IU)
3261 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3262 *
3263  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3264  \$ work( itaup ), work( iwork ),
3265  \$ lwork-iwork+1, ierr )
3266 *
3267 * Generate left bidiagonalizing vectors in U
3268 * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3269 *
3270  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3271  \$ work( iwork ), lwork-iwork+1, ierr )
3272  iwork = ie + m
3273 *
3274 * Perform bidiagonal QR iteration, computing left
3275 * singular vectors of L in U and computing right
3276 * singular vectors of L in WORK(IU)
3277 * (Workspace: need M*M + BDSPAC)
3278 *
3279  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3280  \$ work( iu ), ldwrku, u, ldu, dum, 1,
3281  \$ work( iwork ), info )
3282 *
3283 * Multiply right singular vectors of L in WORK(IU) by
3284 * Q in VT, storing result in A
3285 * (Workspace: need M*M)
3286 *
3287  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3288  \$ ldwrku, vt, ldvt, zero, a, lda )
3289 *
3290 * Copy right singular vectors of A from A to VT
3291 *
3292  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3293 *
3294  ELSE
3295 *
3296 * Insufficient workspace for a fast algorithm
3297 *
3298  itau = 1
3299  iwork = itau + m
3300 *
3301 * Compute A=L*Q, copying result to VT
3302 * (Workspace: need 2*M, prefer M + M*NB)
3303 *
3304  CALL dgelqf( m, n, a, lda, work( itau ),
3305  \$ work( iwork ), lwork-iwork+1, ierr )
3306  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3307 *
3308 * Generate Q in VT
3309 * (Workspace: need M + N, prefer M + N*NB)
3310 *
3311  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3312  \$ work( iwork ), lwork-iwork+1, ierr )
3313 *
3314 * Copy L to U, zeroing out above it
3315 *
3316  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3317  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3318  \$ ldu )
3319  ie = itau
3320  itauq = ie + m
3321  itaup = itauq + m
3322  iwork = itaup + m
3323 *
3324 * Bidiagonalize L in U
3325 * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3326 *
3327  CALL dgebrd( m, m, u, ldu, s, work( ie ),
3328  \$ work( itauq ), work( itaup ),
3329  \$ work( iwork ), lwork-iwork+1, ierr )
3330 *
3331 * Multiply right bidiagonalizing vectors in U by Q
3332 * in VT
3333 * (Workspace: need 3*M + N, prefer 3*M + N*NB)
3334 *
3335  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3336  \$ work( itaup ), vt, ldvt,
3337  \$ work( iwork ), lwork-iwork+1, ierr )
3338 *
3339 * Generate left bidiagonalizing vectors in U
3340 * (Workspace: need 4*M, prefer 3*M + M*NB)
3341 *
3342  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3343  \$ work( iwork ), lwork-iwork+1, ierr )
3344  iwork = ie + m
3345 *
3346 * Perform bidiagonal QR iteration, computing left
3347 * singular vectors of A in U and computing right
3348 * singular vectors of A in VT
3349 * (Workspace: need BDSPAC)
3350 *
3351  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3352  \$ ldvt, u, ldu, dum, 1, work( iwork ),
3353  \$ info )
3354 *
3355  END IF
3356 *
3357  END IF
3358 *
3359  END IF
3360 *
3361  ELSE
3362 *
3363 * N .LT. MNTHR
3364 *
3365 * Path 10t(N greater than M, but not much larger)
3366 * Reduce to bidiagonal form without LQ decomposition
3367 *
3368  ie = 1
3369  itauq = ie + m
3370  itaup = itauq + m
3371  iwork = itaup + m
3372 *
3373 * Bidiagonalize A
3374 * (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3375 *
3376  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3377  \$ work( itaup ), work( iwork ), lwork-iwork+1,
3378  \$ ierr )
3379  IF( wntuas ) THEN
3380 *
3381 * If left singular vectors desired in U, copy result to U
3382 * and generate left bidiagonalizing vectors in U
3383 * (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3384 *
3385  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3386  CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3387  \$ work( iwork ), lwork-iwork+1, ierr )
3388  END IF
3389  IF( wntvas ) THEN
3390 *
3391 * If right singular vectors desired in VT, copy result to
3392 * VT and generate right bidiagonalizing vectors in VT
3393 * (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3394 *
3395  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3396  IF( wntva )
3397  \$ nrvt = n
3398  IF( wntvs )
3399  \$ nrvt = m
3400  CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3401  \$ work( iwork ), lwork-iwork+1, ierr )
3402  END IF
3403  IF( wntuo ) THEN
3404 *
3405 * If left singular vectors desired in A, generate left
3406 * bidiagonalizing vectors in A
3407 * (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3408 *
3409  CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3410  \$ work( iwork ), lwork-iwork+1, ierr )
3411  END IF
3412  IF( wntvo ) THEN
3413 *
3414 * If right singular vectors desired in A, generate right
3415 * bidiagonalizing vectors in A
3416 * (Workspace: need 4*M, prefer 3*M + M*NB)
3417 *
3418  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3419  \$ work( iwork ), lwork-iwork+1, ierr )
3420  END IF
3421  iwork = ie + m
3422  IF( wntuas .OR. wntuo )
3423  \$ nru = m
3424  IF( wntun )
3425  \$ nru = 0
3426  IF( wntvas .OR. wntvo )
3427  \$ ncvt = n
3428  IF( wntvn )
3429  \$ ncvt = 0
3430  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3431 *
3432 * Perform bidiagonal QR iteration, if desired, computing
3433 * left singular vectors in U and computing right singular
3434 * vectors in VT
3435 * (Workspace: need BDSPAC)
3436 *
3437  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3438  \$ ldvt, u, ldu, dum, 1, work( iwork ), info )
3439  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3440 *
3441 * Perform bidiagonal QR iteration, if desired, computing
3442 * left singular vectors in U and computing right singular
3443 * vectors in A
3444 * (Workspace: need BDSPAC)
3445 *
3446  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3447  \$ u, ldu, dum, 1, work( iwork ), info )
3448  ELSE
3449 *
3450 * Perform bidiagonal QR iteration, if desired, computing
3451 * left singular vectors in A and computing right singular
3452 * vectors in VT
3453 * (Workspace: need BDSPAC)
3454 *
3455  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3456  \$ ldvt, a, lda, dum, 1, work( iwork ), info )
3457  END IF
3458 *
3459  END IF
3460 *
3461  END IF
3462 *
3463 * If DBDSQR failed to converge, copy unconverged superdiagonals
3464 * to WORK( 2:MINMN )
3465 *
3466  IF( info.NE.0 ) THEN
3467  IF( ie.GT.2 ) THEN
3468  DO 50 i = 1, minmn - 1
3469  work( i+1 ) = work( i+ie-1 )
3470  50 CONTINUE
3471  END IF
3472  IF( ie.LT.2 ) THEN
3473  DO 60 i = minmn - 1, 1, -1
3474  work( i+1 ) = work( i+ie-1 )
3475  60 CONTINUE
3476  END IF
3477  END IF
3478 *
3479 * Undo scaling if necessary
3480 *
3481  IF( iscl.EQ.1 ) THEN
3482  IF( anrm.GT.bignum )
3483  \$ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3484  \$ ierr )
3485  IF( info.NE.0 .AND. anrm.GT.bignum )
3486  \$ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3487  \$ minmn, ierr )
3488  IF( anrm.LT.smlnum )
3489  \$ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3490  \$ ierr )
3491  IF( info.NE.0 .AND. anrm.LT.smlnum )
3492  \$ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3493  \$ minmn, ierr )
3494  END IF
3495 *
3496 * Return optimal workspace in WORK(1)
3497 *
3498  work( 1 ) = maxwrk
3499 *
3500  RETURN
3501 *
3502 * End of DGESVD
3503 *
3504  END
subroutine dgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
DGEBRD
Definition: dgebrd.f:207
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:112
subroutine dormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
DORMBR
Definition: dormbr.f:197
subroutine dorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGLQ
Definition: dorglq.f:129
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:145
subroutine dbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
DBDSQR
Definition: dbdsqr.f:232
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
subroutine dgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGELQF
Definition: dgelqf.f:137
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine dorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGBR
Definition: dorgbr.f:159
subroutine dgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGEQRF
Definition: dgeqrf.f:138
subroutine dgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
DGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: dgesvd.f:213
subroutine dorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGQR
Definition: dorgqr.f:130