LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
dgesdd.f
Go to the documentation of this file.
1 *> \brief \b DGESDD
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGESDD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesdd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesdd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesdd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, IWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBZ
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * INTEGER IWORK( * )
30 * DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
31 * $ VT( LDVT, * ), WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> DGESDD computes the singular value decomposition (SVD) of a real
41 *> M-by-N matrix A, optionally computing the left and right singular
42 *> vectors. If singular vectors are desired, it uses a
43 *> divide-and-conquer algorithm.
44 *>
45 *> The SVD is written
46 *>
47 *> A = U * SIGMA * transpose(V)
48 *>
49 *> where SIGMA is an M-by-N matrix which is zero except for its
50 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
51 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
52 *> are the singular values of A; they are real and non-negative, and
53 *> are returned in descending order. The first min(m,n) columns of
54 *> U and V are the left and right singular vectors of A.
55 *>
56 *> Note that the routine returns VT = V**T, not V.
57 *>
58 *> The divide and conquer algorithm makes very mild assumptions about
59 *> floating point arithmetic. It will work on machines with a guard
60 *> digit in add/subtract, or on those binary machines without guard
61 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
62 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
63 *> without guard digits, but we know of none.
64 *> \endverbatim
65 *
66 * Arguments:
67 * ==========
68 *
69 *> \param[in] JOBZ
70 *> \verbatim
71 *> JOBZ is CHARACTER*1
72 *> Specifies options for computing all or part of the matrix U:
73 *> = 'A': all M columns of U and all N rows of V**T are
74 *> returned in the arrays U and VT;
75 *> = 'S': the first min(M,N) columns of U and the first
76 *> min(M,N) rows of V**T are returned in the arrays U
77 *> and VT;
78 *> = 'O': If M >= N, the first N columns of U are overwritten
79 *> on the array A and all rows of V**T are returned in
80 *> the array VT;
81 *> otherwise, all columns of U are returned in the
82 *> array U and the first M rows of V**T are overwritten
83 *> in the array A;
84 *> = 'N': no columns of U or rows of V**T are computed.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is DOUBLE PRECISION array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBZ = 'O', A is overwritten with the first N columns
105 *> of U (the left singular vectors, stored
106 *> columnwise) if M >= N;
107 *> A is overwritten with the first M rows
108 *> of V**T (the right singular vectors, stored
109 *> rowwise) otherwise.
110 *> if JOBZ .ne. 'O', the contents of A are destroyed.
111 *> \endverbatim
112 *>
113 *> \param[in] LDA
114 *> \verbatim
115 *> LDA is INTEGER
116 *> The leading dimension of the array A. LDA >= max(1,M).
117 *> \endverbatim
118 *>
119 *> \param[out] S
120 *> \verbatim
121 *> S is DOUBLE PRECISION array, dimension (min(M,N))
122 *> The singular values of A, sorted so that S(i) >= S(i+1).
123 *> \endverbatim
124 *>
125 *> \param[out] U
126 *> \verbatim
127 *> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
128 *> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
129 *> UCOL = min(M,N) if JOBZ = 'S'.
130 *> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
131 *> orthogonal matrix U;
132 *> if JOBZ = 'S', U contains the first min(M,N) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is DOUBLE PRECISION array, dimension (LDVT,N)
147 *> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
148 *> N-by-N orthogonal matrix V**T;
149 *> if JOBZ = 'S', VT contains the first min(M,N) rows of
150 *> V**T (the right singular vectors, stored rowwise);
151 *> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1;
158 *> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
159 *> if JOBZ = 'S', LDVT >= min(M,N).
160 *> \endverbatim
161 *>
162 *> \param[out] WORK
163 *> \verbatim
164 *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
165 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
166 *> \endverbatim
167 *>
168 *> \param[in] LWORK
169 *> \verbatim
170 *> LWORK is INTEGER
171 *> The dimension of the array WORK. LWORK >= 1.
172 *> If LWORK = -1, a workspace query is assumed. The optimal
173 *> size for the WORK array is calculated and stored in WORK(1),
174 *> and no other work except argument checking is performed.
175 *>
176 *> Let mx = max(M,N) and mn = min(M,N).
177 *> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
178 *> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
179 *> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
180 *> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
181 *> These are not tight minimums in all cases; see comments inside code.
182 *> For good performance, LWORK should generally be larger;
183 *> a query is recommended.
184 *> \endverbatim
185 *>
186 *> \param[out] IWORK
187 *> \verbatim
188 *> IWORK is INTEGER array, dimension (8*min(M,N))
189 *> \endverbatim
190 *>
191 *> \param[out] INFO
192 *> \verbatim
193 *> INFO is INTEGER
194 *> = 0: successful exit.
195 *> < 0: if INFO = -i, the i-th argument had an illegal value.
196 *> > 0: DBDSDC did not converge, updating process failed.
197 *> \endverbatim
198 *
199 * Authors:
200 * ========
201 *
202 *> \author Univ. of Tennessee
203 *> \author Univ. of California Berkeley
204 *> \author Univ. of Colorado Denver
205 *> \author NAG Ltd.
206 *
207 *> \date June 2016
208 *
209 *> \ingroup doubleGEsing
210 *
211 *> \par Contributors:
212 * ==================
213 *>
214 *> Ming Gu and Huan Ren, Computer Science Division, University of
215 *> California at Berkeley, USA
216 *>
217 *> @precisions fortran d -> s
218 * =====================================================================
219  SUBROUTINE dgesdd( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
220  $ work, lwork, iwork, info )
221  implicit none
222 *
223 * -- LAPACK driver routine (version 3.6.1) --
224 * -- LAPACK is a software package provided by Univ. of Tennessee, --
225 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
226 * June 2016
227 *
228 * .. Scalar Arguments ..
229  CHARACTER JOBZ
230  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
231 * ..
232 * .. Array Arguments ..
233  INTEGER IWORK( * )
234  DOUBLE PRECISION A( lda, * ), S( * ), U( ldu, * ),
235  $ vt( ldvt, * ), work( * )
236 * ..
237 *
238 * =====================================================================
239 *
240 * .. Parameters ..
241  DOUBLE PRECISION ZERO, ONE
242  parameter ( zero = 0.0d0, one = 1.0d0 )
243 * ..
244 * .. Local Scalars ..
245  LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
246  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
247  $ ir, iscl, itau, itaup, itauq, iu, ivt, ldwkvt,
248  $ ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,
249  $ mnthr, nwork, wrkbl
250  INTEGER LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,
251  $ lwork_dgebrd_nn, lwork_dgelqf_mn,
252  $ lwork_dgeqrf_mn,
253  $ lwork_dorgbr_p_mm, lwork_dorgbr_q_nn,
254  $ lwork_dorglq_mn, lwork_dorglq_nn,
255  $ lwork_dorgqr_mm, lwork_dorgqr_mn,
256  $ lwork_dormbr_prt_mm, lwork_dormbr_qln_mm,
257  $ lwork_dormbr_prt_mn, lwork_dormbr_qln_mn,
258  $ lwork_dormbr_prt_nn, lwork_dormbr_qln_nn
259  DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
260 * ..
261 * .. Local Arrays ..
262  INTEGER IDUM( 1 )
263  DOUBLE PRECISION DUM( 1 )
264 * ..
265 * .. External Subroutines ..
266  EXTERNAL dbdsdc, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
268  $ xerbla
269 * ..
270 * .. External Functions ..
271  LOGICAL LSAME
272  DOUBLE PRECISION DLAMCH, DLANGE
273  EXTERNAL dlamch, dlange, lsame
274 * ..
275 * .. Intrinsic Functions ..
276  INTRINSIC int, max, min, sqrt
277 * ..
278 * .. Executable Statements ..
279 *
280 * Test the input arguments
281 *
282  info = 0
283  minmn = min( m, n )
284  wntqa = lsame( jobz, 'A' )
285  wntqs = lsame( jobz, 'S' )
286  wntqas = wntqa .OR. wntqs
287  wntqo = lsame( jobz, 'O' )
288  wntqn = lsame( jobz, 'N' )
289  lquery = ( lwork.EQ.-1 )
290 *
291  IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
292  info = -1
293  ELSE IF( m.LT.0 ) THEN
294  info = -2
295  ELSE IF( n.LT.0 ) THEN
296  info = -3
297  ELSE IF( lda.LT.max( 1, m ) ) THEN
298  info = -5
299  ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
300  $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
301  info = -8
302  ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
303  $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
304  $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
305  info = -10
306  END IF
307 *
308 * Compute workspace
309 * Note: Comments in the code beginning "Workspace:" describe the
310 * minimal amount of workspace allocated at that point in the code,
311 * as well as the preferred amount for good performance.
312 * NB refers to the optimal block size for the immediately
313 * following subroutine, as returned by ILAENV.
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  bdspac = 0
319  mnthr = int( minmn*11.0d0 / 6.0d0 )
320  IF( m.GE.n .AND. minmn.GT.0 ) THEN
321 *
322 * Compute space needed for DBDSDC
323 *
324  IF( wntqn ) THEN
325 * dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
326 * keep 7*N for backwards compatability.
327  bdspac = 7*n
328  ELSE
329  bdspac = 3*n*n + 4*n
330  END IF
331 *
332 * Compute space preferred for each routine
333  CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
334  $ dum(1), dum(1), -1, ierr )
335  lwork_dgebrd_mn = int( dum(1) )
336 *
337  CALL dgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
338  $ dum(1), dum(1), -1, ierr )
339  lwork_dgebrd_nn = int( dum(1) )
340 *
341  CALL dgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
342  lwork_dgeqrf_mn = int( dum(1) )
343 *
344  CALL dorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
345  $ ierr )
346  lwork_dorgbr_q_nn = int( dum(1) )
347 *
348  CALL dorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )
349  lwork_dorgqr_mm = int( dum(1) )
350 *
351  CALL dorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )
352  lwork_dorgqr_mn = int( dum(1) )
353 *
354  CALL dormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
355  $ dum(1), dum(1), n, dum(1), -1, ierr )
356  lwork_dormbr_prt_nn = int( dum(1) )
357 *
358  CALL dormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
359  $ dum(1), dum(1), n, dum(1), -1, ierr )
360  lwork_dormbr_qln_nn = int( dum(1) )
361 *
362  CALL dormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
363  $ dum(1), dum(1), m, dum(1), -1, ierr )
364  lwork_dormbr_qln_mn = int( dum(1) )
365 *
366  CALL dormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
367  $ dum(1), dum(1), m, dum(1), -1, ierr )
368  lwork_dormbr_qln_mm = int( dum(1) )
369 *
370  IF( m.GE.mnthr ) THEN
371  IF( wntqn ) THEN
372 *
373 * Path 1 (M >> N, JOBZ='N')
374 *
375  wrkbl = n + lwork_dgeqrf_mn
376  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
377  maxwrk = max( wrkbl, bdspac + n )
378  minwrk = bdspac + n
379  ELSE IF( wntqo ) THEN
380 *
381 * Path 2 (M >> N, JOBZ='O')
382 *
383  wrkbl = n + lwork_dgeqrf_mn
384  wrkbl = max( wrkbl, n + lwork_dorgqr_mn )
385  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
386  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
387  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
388  wrkbl = max( wrkbl, 3*n + bdspac )
389  maxwrk = wrkbl + 2*n*n
390  minwrk = bdspac + 2*n*n + 3*n
391  ELSE IF( wntqs ) THEN
392 *
393 * Path 3 (M >> N, JOBZ='S')
394 *
395  wrkbl = n + lwork_dgeqrf_mn
396  wrkbl = max( wrkbl, n + lwork_dorgqr_mn )
397  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
398  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
399  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
400  wrkbl = max( wrkbl, 3*n + bdspac )
401  maxwrk = wrkbl + n*n
402  minwrk = bdspac + n*n + 3*n
403  ELSE IF( wntqa ) THEN
404 *
405 * Path 4 (M >> N, JOBZ='A')
406 *
407  wrkbl = n + lwork_dgeqrf_mn
408  wrkbl = max( wrkbl, n + lwork_dorgqr_mm )
409  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
410  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
411  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
412  wrkbl = max( wrkbl, 3*n + bdspac )
413  maxwrk = wrkbl + n*n
414  minwrk = n*n + max( 3*n + bdspac, n + m )
415  END IF
416  ELSE
417 *
418 * Path 5 (M >= N, but not much larger)
419 *
420  wrkbl = 3*n + lwork_dgebrd_mn
421  IF( wntqn ) THEN
422 * Path 5n (M >= N, jobz='N')
423  maxwrk = max( wrkbl, 3*n + bdspac )
424  minwrk = 3*n + max( m, bdspac )
425  ELSE IF( wntqo ) THEN
426 * Path 5o (M >= N, jobz='O')
427  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
428  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )
429  wrkbl = max( wrkbl, 3*n + bdspac )
430  maxwrk = wrkbl + m*n
431  minwrk = 3*n + max( m, n*n + bdspac )
432  ELSE IF( wntqs ) THEN
433 * Path 5s (M >= N, jobz='S')
434  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )
435  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
436  maxwrk = max( wrkbl, 3*n + bdspac )
437  minwrk = 3*n + max( m, bdspac )
438  ELSE IF( wntqa ) THEN
439 * Path 5a (M >= N, jobz='A')
440  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mm )
441  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
442  maxwrk = max( wrkbl, 3*n + bdspac )
443  minwrk = 3*n + max( m, bdspac )
444  END IF
445  END IF
446  ELSE IF( minmn.GT.0 ) THEN
447 *
448 * Compute space needed for DBDSDC
449 *
450  IF( wntqn ) THEN
451 * dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
452 * keep 7*N for backwards compatability.
453  bdspac = 7*m
454  ELSE
455  bdspac = 3*m*m + 4*m
456  END IF
457 *
458 * Compute space preferred for each routine
459  CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
460  $ dum(1), dum(1), -1, ierr )
461  lwork_dgebrd_mn = int( dum(1) )
462 *
463  CALL dgebrd( m, m, a, m, s, dum(1), dum(1),
464  $ dum(1), dum(1), -1, ierr )
465  lwork_dgebrd_mm = int( dum(1) )
466 *
467  CALL dgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
468  lwork_dgelqf_mn = int( dum(1) )
469 *
470  CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
471  lwork_dorglq_nn = int( dum(1) )
472 *
473  CALL dorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
474  lwork_dorglq_mn = int( dum(1) )
475 *
476  CALL dorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )
477  lwork_dorgbr_p_mm = int( dum(1) )
478 *
479  CALL dormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
480  $ dum(1), dum(1), m, dum(1), -1, ierr )
481  lwork_dormbr_prt_mm = int( dum(1) )
482 *
483  CALL dormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
484  $ dum(1), dum(1), m, dum(1), -1, ierr )
485  lwork_dormbr_prt_mn = int( dum(1) )
486 *
487  CALL dormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
488  $ dum(1), dum(1), n, dum(1), -1, ierr )
489  lwork_dormbr_prt_nn = int( dum(1) )
490 *
491  CALL dormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
492  $ dum(1), dum(1), m, dum(1), -1, ierr )
493  lwork_dormbr_qln_mm = int( dum(1) )
494 *
495  IF( n.GE.mnthr ) THEN
496  IF( wntqn ) THEN
497 *
498 * Path 1t (N >> M, JOBZ='N')
499 *
500  wrkbl = m + lwork_dgelqf_mn
501  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
502  maxwrk = max( wrkbl, bdspac + m )
503  minwrk = bdspac + m
504  ELSE IF( wntqo ) THEN
505 *
506 * Path 2t (N >> M, JOBZ='O')
507 *
508  wrkbl = m + lwork_dgelqf_mn
509  wrkbl = max( wrkbl, m + lwork_dorglq_mn )
510  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
511  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
512  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
513  wrkbl = max( wrkbl, 3*m + bdspac )
514  maxwrk = wrkbl + 2*m*m
515  minwrk = bdspac + 2*m*m + 3*m
516  ELSE IF( wntqs ) THEN
517 *
518 * Path 3t (N >> M, JOBZ='S')
519 *
520  wrkbl = m + lwork_dgelqf_mn
521  wrkbl = max( wrkbl, m + lwork_dorglq_mn )
522  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
523  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
524  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
525  wrkbl = max( wrkbl, 3*m + bdspac )
526  maxwrk = wrkbl + m*m
527  minwrk = bdspac + m*m + 3*m
528  ELSE IF( wntqa ) THEN
529 *
530 * Path 4t (N >> M, JOBZ='A')
531 *
532  wrkbl = m + lwork_dgelqf_mn
533  wrkbl = max( wrkbl, m + lwork_dorglq_nn )
534  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
535  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
536  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
537  wrkbl = max( wrkbl, 3*m + bdspac )
538  maxwrk = wrkbl + m*m
539  minwrk = m*m + max( 3*m + bdspac, m + n )
540  END IF
541  ELSE
542 *
543 * Path 5t (N > M, but not much larger)
544 *
545  wrkbl = 3*m + lwork_dgebrd_mn
546  IF( wntqn ) THEN
547 * Path 5tn (N > M, jobz='N')
548  maxwrk = max( wrkbl, 3*m + bdspac )
549  minwrk = 3*m + max( n, bdspac )
550  ELSE IF( wntqo ) THEN
551 * Path 5to (N > M, jobz='O')
552  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
553  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )
554  wrkbl = max( wrkbl, 3*m + bdspac )
555  maxwrk = wrkbl + m*n
556  minwrk = 3*m + max( n, m*m + bdspac )
557  ELSE IF( wntqs ) THEN
558 * Path 5ts (N > M, jobz='S')
559  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
560  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )
561  maxwrk = max( wrkbl, 3*m + bdspac )
562  minwrk = 3*m + max( n, bdspac )
563  ELSE IF( wntqa ) THEN
564 * Path 5ta (N > M, jobz='A')
565  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
566  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_nn )
567  maxwrk = max( wrkbl, 3*m + bdspac )
568  minwrk = 3*m + max( n, bdspac )
569  END IF
570  END IF
571  END IF
572 
573  maxwrk = max( maxwrk, minwrk )
574  work( 1 ) = maxwrk
575 *
576  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
577  info = -12
578  END IF
579  END IF
580 *
581  IF( info.NE.0 ) THEN
582  CALL xerbla( 'DGESDD', -info )
583  RETURN
584  ELSE IF( lquery ) THEN
585  RETURN
586  END IF
587 *
588 * Quick return if possible
589 *
590  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
591  RETURN
592  END IF
593 *
594 * Get machine constants
595 *
596  eps = dlamch( 'P' )
597  smlnum = sqrt( dlamch( 'S' ) ) / eps
598  bignum = one / smlnum
599 *
600 * Scale A if max element outside range [SMLNUM,BIGNUM]
601 *
602  anrm = dlange( 'M', m, n, a, lda, dum )
603  iscl = 0
604  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
605  iscl = 1
606  CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
607  ELSE IF( anrm.GT.bignum ) THEN
608  iscl = 1
609  CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
610  END IF
611 *
612  IF( m.GE.n ) THEN
613 *
614 * A has at least as many rows as columns. If A has sufficiently
615 * more rows than columns, first reduce using the QR
616 * decomposition (if sufficient workspace available)
617 *
618  IF( m.GE.mnthr ) THEN
619 *
620  IF( wntqn ) THEN
621 *
622 * Path 1 (M >> N, JOBZ='N')
623 * No singular vectors to be computed
624 *
625  itau = 1
626  nwork = itau + n
627 *
628 * Compute A=Q*R
629 * Workspace: need N [tau] + N [work]
630 * Workspace: prefer N [tau] + N*NB [work]
631 *
632  CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
633  $ lwork - nwork + 1, ierr )
634 *
635 * Zero out below R
636 *
637  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
638  ie = 1
639  itauq = ie + n
640  itaup = itauq + n
641  nwork = itaup + n
642 *
643 * Bidiagonalize R in A
644 * Workspace: need 3*N [e, tauq, taup] + N [work]
645 * Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
646 *
647  CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
648  $ work( itaup ), work( nwork ), lwork-nwork+1,
649  $ ierr )
650  nwork = ie + n
651 *
652 * Perform bidiagonal SVD, computing singular values only
653 * Workspace: need N [e] + BDSPAC
654 *
655  CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
656  $ dum, idum, work( nwork ), iwork, info )
657 *
658  ELSE IF( wntqo ) THEN
659 *
660 * Path 2 (M >> N, JOBZ = 'O')
661 * N left singular vectors to be overwritten on A and
662 * N right singular vectors to be computed in VT
663 *
664  ir = 1
665 *
666 * WORK(IR) is LDWRKR by N
667 *
668  IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
669  ldwrkr = lda
670  ELSE
671  ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
672  END IF
673  itau = ir + ldwrkr*n
674  nwork = itau + n
675 *
676 * Compute A=Q*R
677 * Workspace: need N*N [R] + N [tau] + N [work]
678 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
679 *
680  CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
681  $ lwork - nwork + 1, ierr )
682 *
683 * Copy R to WORK(IR), zeroing out below it
684 *
685  CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
686  CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
687  $ ldwrkr )
688 *
689 * Generate Q in A
690 * Workspace: need N*N [R] + N [tau] + N [work]
691 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
692 *
693  CALL dorgqr( m, n, n, a, lda, work( itau ),
694  $ work( nwork ), lwork - nwork + 1, ierr )
695  ie = itau
696  itauq = ie + n
697  itaup = itauq + n
698  nwork = itaup + n
699 *
700 * Bidiagonalize R in WORK(IR)
701 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
702 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
703 *
704  CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
705  $ work( itauq ), work( itaup ), work( nwork ),
706  $ lwork - nwork + 1, ierr )
707 *
708 * WORK(IU) is N by N
709 *
710  iu = nwork
711  nwork = iu + n*n
712 *
713 * Perform bidiagonal SVD, computing left singular vectors
714 * of bidiagonal matrix in WORK(IU) and computing right
715 * singular vectors of bidiagonal matrix in VT
716 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
717 *
718  CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
719  $ vt, ldvt, dum, idum, work( nwork ), iwork,
720  $ info )
721 *
722 * Overwrite WORK(IU) by left singular vectors of R
723 * and VT by right singular vectors of R
724 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
725 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
726 *
727  CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
728  $ work( itauq ), work( iu ), n, work( nwork ),
729  $ lwork - nwork + 1, ierr )
730  CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
731  $ work( itaup ), vt, ldvt, work( nwork ),
732  $ lwork - nwork + 1, ierr )
733 *
734 * Multiply Q in A by left singular vectors of R in
735 * WORK(IU), storing result in WORK(IR) and copying to A
736 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
737 * Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
738 *
739  DO 10 i = 1, m, ldwrkr
740  chunk = min( m - i + 1, ldwrkr )
741  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
742  $ lda, work( iu ), n, zero, work( ir ),
743  $ ldwrkr )
744  CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,
745  $ a( i, 1 ), lda )
746  10 CONTINUE
747 *
748  ELSE IF( wntqs ) THEN
749 *
750 * Path 3 (M >> N, JOBZ='S')
751 * N left singular vectors to be computed in U and
752 * N right singular vectors to be computed in VT
753 *
754  ir = 1
755 *
756 * WORK(IR) is N by N
757 *
758  ldwrkr = n
759  itau = ir + ldwrkr*n
760  nwork = itau + n
761 *
762 * Compute A=Q*R
763 * Workspace: need N*N [R] + N [tau] + N [work]
764 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
765 *
766  CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
767  $ lwork - nwork + 1, ierr )
768 *
769 * Copy R to WORK(IR), zeroing out below it
770 *
771  CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
772  CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
773  $ ldwrkr )
774 *
775 * Generate Q in A
776 * Workspace: need N*N [R] + N [tau] + N [work]
777 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
778 *
779  CALL dorgqr( m, n, n, a, lda, work( itau ),
780  $ work( nwork ), lwork - nwork + 1, ierr )
781  ie = itau
782  itauq = ie + n
783  itaup = itauq + n
784  nwork = itaup + n
785 *
786 * Bidiagonalize R in WORK(IR)
787 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
788 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
789 *
790  CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791  $ work( itauq ), work( itaup ), work( nwork ),
792  $ lwork - nwork + 1, ierr )
793 *
794 * Perform bidiagonal SVD, computing left singular vectors
795 * of bidiagoal matrix in U and computing right singular
796 * vectors of bidiagonal matrix in VT
797 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
798 *
799  CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
800  $ ldvt, dum, idum, work( nwork ), iwork,
801  $ info )
802 *
803 * Overwrite U by left singular vectors of R and VT
804 * by right singular vectors of R
805 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
806 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
807 *
808  CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
809  $ work( itauq ), u, ldu, work( nwork ),
810  $ lwork - nwork + 1, ierr )
811 *
812  CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
813  $ work( itaup ), vt, ldvt, work( nwork ),
814  $ lwork - nwork + 1, ierr )
815 *
816 * Multiply Q in A by left singular vectors of R in
817 * WORK(IR), storing result in U
818 * Workspace: need N*N [R]
819 *
820  CALL dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
821  CALL dgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),
822  $ ldwrkr, zero, u, ldu )
823 *
824  ELSE IF( wntqa ) THEN
825 *
826 * Path 4 (M >> N, JOBZ='A')
827 * M left singular vectors to be computed in U and
828 * N right singular vectors to be computed in VT
829 *
830  iu = 1
831 *
832 * WORK(IU) is N by N
833 *
834  ldwrku = n
835  itau = iu + ldwrku*n
836  nwork = itau + n
837 *
838 * Compute A=Q*R, copying result to U
839 * Workspace: need N*N [U] + N [tau] + N [work]
840 * Workspace: prefer N*N [U] + N [tau] + N*NB [work]
841 *
842  CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
843  $ lwork - nwork + 1, ierr )
844  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
845 *
846 * Generate Q in U
847 * Workspace: need N*N [U] + N [tau] + M [work]
848 * Workspace: prefer N*N [U] + N [tau] + M*NB [work]
849  CALL dorgqr( m, m, n, u, ldu, work( itau ),
850  $ work( nwork ), lwork - nwork + 1, ierr )
851 *
852 * Produce R in A, zeroing out other entries
853 *
854  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
855  ie = itau
856  itauq = ie + n
857  itaup = itauq + n
858  nwork = itaup + n
859 *
860 * Bidiagonalize R in A
861 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
862 * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
863 *
864  CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
865  $ work( itaup ), work( nwork ), lwork-nwork+1,
866  $ ierr )
867 *
868 * Perform bidiagonal SVD, computing left singular vectors
869 * of bidiagonal matrix in WORK(IU) and computing right
870 * singular vectors of bidiagonal matrix in VT
871 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
872 *
873  CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
874  $ vt, ldvt, dum, idum, work( nwork ), iwork,
875  $ info )
876 *
877 * Overwrite WORK(IU) by left singular vectors of R and VT
878 * by right singular vectors of R
879 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
880 * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
881 *
882  CALL dormbr( 'Q', 'L', 'N', n, n, n, a, lda,
883  $ work( itauq ), work( iu ), ldwrku,
884  $ work( nwork ), lwork - nwork + 1, ierr )
885  CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
886  $ work( itaup ), vt, ldvt, work( nwork ),
887  $ lwork - nwork + 1, ierr )
888 *
889 * Multiply Q in U by left singular vectors of R in
890 * WORK(IU), storing result in A
891 * Workspace: need N*N [U]
892 *
893  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),
894  $ ldwrku, zero, a, lda )
895 *
896 * Copy left singular vectors of A from A to U
897 *
898  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
899 *
900  END IF
901 *
902  ELSE
903 *
904 * M .LT. MNTHR
905 *
906 * Path 5 (M >= N, but not much larger)
907 * Reduce to bidiagonal form without QR decomposition
908 *
909  ie = 1
910  itauq = ie + n
911  itaup = itauq + n
912  nwork = itaup + n
913 *
914 * Bidiagonalize A
915 * Workspace: need 3*N [e, tauq, taup] + M [work]
916 * Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
917 *
918  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
919  $ work( itaup ), work( nwork ), lwork-nwork+1,
920  $ ierr )
921  IF( wntqn ) THEN
922 *
923 * Path 5n (M >= N, JOBZ='N')
924 * Perform bidiagonal SVD, only computing singular values
925 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
926 *
927  CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
928  $ dum, idum, work( nwork ), iwork, info )
929  ELSE IF( wntqo ) THEN
930 * Path 5o (M >= N, JOBZ='O')
931  iu = nwork
932  IF( lwork .GE. m*n + 3*n + bdspac ) THEN
933 *
934 * WORK( IU ) is M by N
935 *
936  ldwrku = m
937  nwork = iu + ldwrku*n
938  CALL dlaset( 'F', m, n, zero, zero, work( iu ),
939  $ ldwrku )
940 * IR is unused; silence compile warnings
941  ir = -1
942  ELSE
943 *
944 * WORK( IU ) is N by N
945 *
946  ldwrku = n
947  nwork = iu + ldwrku*n
948 *
949 * WORK(IR) is LDWRKR by N
950 *
951  ir = nwork
952  ldwrkr = ( lwork - n*n - 3*n ) / n
953  END IF
954  nwork = iu + ldwrku*n
955 *
956 * Perform bidiagonal SVD, computing left singular vectors
957 * of bidiagonal matrix in WORK(IU) and computing right
958 * singular vectors of bidiagonal matrix in VT
959 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
960 *
961  CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
962  $ ldwrku, vt, ldvt, dum, idum, work( nwork ),
963  $ iwork, info )
964 *
965 * Overwrite VT by right singular vectors of A
966 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
967 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
968 *
969  CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
970  $ work( itaup ), vt, ldvt, work( nwork ),
971  $ lwork - nwork + 1, ierr )
972 *
973  IF( lwork .GE. m*n + 3*n + bdspac ) THEN
974 *
975 * Path 5o-fast
976 * Overwrite WORK(IU) by left singular vectors of A
977 * Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
978 * Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
979 *
980  CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,
981  $ work( itauq ), work( iu ), ldwrku,
982  $ work( nwork ), lwork - nwork + 1, ierr )
983 *
984 * Copy left singular vectors of A from WORK(IU) to A
985 *
986  CALL dlacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
987  ELSE
988 *
989 * Path 5o-slow
990 * Generate Q in A
991 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
992 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
993 *
994  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
995  $ work( nwork ), lwork - nwork + 1, ierr )
996 *
997 * Multiply Q in A by left singular vectors of
998 * bidiagonal matrix in WORK(IU), storing result in
999 * WORK(IR) and copying to A
1000 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
1001 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
1002 *
1003  DO 20 i = 1, m, ldwrkr
1004  chunk = min( m - i + 1, ldwrkr )
1005  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
1006  $ lda, work( iu ), ldwrku, zero,
1007  $ work( ir ), ldwrkr )
1008  CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,
1009  $ a( i, 1 ), lda )
1010  20 CONTINUE
1011  END IF
1012 *
1013  ELSE IF( wntqs ) THEN
1014 *
1015 * Path 5s (M >= N, JOBZ='S')
1016 * Perform bidiagonal SVD, computing left singular vectors
1017 * of bidiagonal matrix in U and computing right singular
1018 * vectors of bidiagonal matrix in VT
1019 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
1020 *
1021  CALL dlaset( 'F', m, n, zero, zero, u, ldu )
1022  CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1023  $ ldvt, dum, idum, work( nwork ), iwork,
1024  $ info )
1025 *
1026 * Overwrite U by left singular vectors of A and VT
1027 * by right singular vectors of A
1028 * Workspace: need 3*N [e, tauq, taup] + N [work]
1029 * Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
1030 *
1031  CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,
1032  $ work( itauq ), u, ldu, work( nwork ),
1033  $ lwork - nwork + 1, ierr )
1034  CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
1035  $ work( itaup ), vt, ldvt, work( nwork ),
1036  $ lwork - nwork + 1, ierr )
1037  ELSE IF( wntqa ) THEN
1038 *
1039 * Path 5a (M >= N, JOBZ='A')
1040 * Perform bidiagonal SVD, computing left singular vectors
1041 * of bidiagonal matrix in U and computing right singular
1042 * vectors of bidiagonal matrix in VT
1043 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
1044 *
1045  CALL dlaset( 'F', m, m, zero, zero, u, ldu )
1046  CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1047  $ ldvt, dum, idum, work( nwork ), iwork,
1048  $ info )
1049 *
1050 * Set the right corner of U to identity matrix
1051 *
1052  IF( m.GT.n ) THEN
1053  CALL dlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),
1054  $ ldu )
1055  END IF
1056 *
1057 * Overwrite U by left singular vectors of A and VT
1058 * by right singular vectors of A
1059 * Workspace: need 3*N [e, tauq, taup] + M [work]
1060 * Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
1061 *
1062  CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1063  $ work( itauq ), u, ldu, work( nwork ),
1064  $ lwork - nwork + 1, ierr )
1065  CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,
1066  $ work( itaup ), vt, ldvt, work( nwork ),
1067  $ lwork - nwork + 1, ierr )
1068  END IF
1069 *
1070  END IF
1071 *
1072  ELSE
1073 *
1074 * A has more columns than rows. If A has sufficiently more
1075 * columns than rows, first reduce using the LQ decomposition (if
1076 * sufficient workspace available)
1077 *
1078  IF( n.GE.mnthr ) THEN
1079 *
1080  IF( wntqn ) THEN
1081 *
1082 * Path 1t (N >> M, JOBZ='N')
1083 * No singular vectors to be computed
1084 *
1085  itau = 1
1086  nwork = itau + m
1087 *
1088 * Compute A=L*Q
1089 * Workspace: need M [tau] + M [work]
1090 * Workspace: prefer M [tau] + M*NB [work]
1091 *
1092  CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1093  $ lwork - nwork + 1, ierr )
1094 *
1095 * Zero out above L
1096 *
1097  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1098  ie = 1
1099  itauq = ie + m
1100  itaup = itauq + m
1101  nwork = itaup + m
1102 *
1103 * Bidiagonalize L in A
1104 * Workspace: need 3*M [e, tauq, taup] + M [work]
1105 * Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
1106 *
1107  CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1108  $ work( itaup ), work( nwork ), lwork-nwork+1,
1109  $ ierr )
1110  nwork = ie + m
1111 *
1112 * Perform bidiagonal SVD, computing singular values only
1113 * Workspace: need M [e] + BDSPAC
1114 *
1115  CALL dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,
1116  $ dum, idum, work( nwork ), iwork, info )
1117 *
1118  ELSE IF( wntqo ) THEN
1119 *
1120 * Path 2t (N >> M, JOBZ='O')
1121 * M right singular vectors to be overwritten on A and
1122 * M left singular vectors to be computed in U
1123 *
1124  ivt = 1
1125 *
1126 * WORK(IVT) is M by M
1127 * WORK(IL) is M by M; it is later resized to M by chunk for gemm
1128 *
1129  il = ivt + m*m
1130  IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
1131  ldwrkl = m
1132  chunk = n
1133  ELSE
1134  ldwrkl = m
1135  chunk = ( lwork - m*m ) / m
1136  END IF
1137  itau = il + ldwrkl*m
1138  nwork = itau + m
1139 *
1140 * Compute A=L*Q
1141 * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1142 * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1143 *
1144  CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1145  $ lwork - nwork + 1, ierr )
1146 *
1147 * Copy L to WORK(IL), zeroing about above it
1148 *
1149  CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1150  CALL dlaset( 'U', m - 1, m - 1, zero, zero,
1151  $ work( il + ldwrkl ), ldwrkl )
1152 *
1153 * Generate Q in A
1154 * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1155 * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1156 *
1157  CALL dorglq( m, n, m, a, lda, work( itau ),
1158  $ work( nwork ), lwork - nwork + 1, ierr )
1159  ie = itau
1160  itauq = ie + m
1161  itaup = itauq + m
1162  nwork = itaup + m
1163 *
1164 * Bidiagonalize L in WORK(IL)
1165 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1166 * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1167 *
1168  CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1169  $ work( itauq ), work( itaup ), work( nwork ),
1170  $ lwork - nwork + 1, ierr )
1171 *
1172 * Perform bidiagonal SVD, computing left singular vectors
1173 * of bidiagonal matrix in U, and computing right singular
1174 * vectors of bidiagonal matrix in WORK(IVT)
1175 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1176 *
1177  CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1178  $ work( ivt ), m, dum, idum, work( nwork ),
1179  $ iwork, info )
1180 *
1181 * Overwrite U by left singular vectors of L and WORK(IVT)
1182 * by right singular vectors of L
1183 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1184 * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1185 *
1186  CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1187  $ work( itauq ), u, ldu, work( nwork ),
1188  $ lwork - nwork + 1, ierr )
1189  CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1190  $ work( itaup ), work( ivt ), m,
1191  $ work( nwork ), lwork - nwork + 1, ierr )
1192 *
1193 * Multiply right singular vectors of L in WORK(IVT) by Q
1194 * in A, storing result in WORK(IL) and copying to A
1195 * Workspace: need M*M [VT] + M*M [L]
1196 * Workspace: prefer M*M [VT] + M*N [L]
1197 * At this point, L is resized as M by chunk.
1198 *
1199  DO 30 i = 1, n, chunk
1200  blk = min( n - i + 1, chunk )
1201  CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,
1202  $ a( 1, i ), lda, zero, work( il ), ldwrkl )
1203  CALL dlacpy( 'F', m, blk, work( il ), ldwrkl,
1204  $ a( 1, i ), lda )
1205  30 CONTINUE
1206 *
1207  ELSE IF( wntqs ) THEN
1208 *
1209 * Path 3t (N >> M, JOBZ='S')
1210 * M right singular vectors to be computed in VT and
1211 * M left singular vectors to be computed in U
1212 *
1213  il = 1
1214 *
1215 * WORK(IL) is M by M
1216 *
1217  ldwrkl = m
1218  itau = il + ldwrkl*m
1219  nwork = itau + m
1220 *
1221 * Compute A=L*Q
1222 * Workspace: need M*M [L] + M [tau] + M [work]
1223 * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1224 *
1225  CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1226  $ lwork - nwork + 1, ierr )
1227 *
1228 * Copy L to WORK(IL), zeroing out above it
1229 *
1230  CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1231  CALL dlaset( 'U', m - 1, m - 1, zero, zero,
1232  $ work( il + ldwrkl ), ldwrkl )
1233 *
1234 * Generate Q in A
1235 * Workspace: need M*M [L] + M [tau] + M [work]
1236 * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1237 *
1238  CALL dorglq( m, n, m, a, lda, work( itau ),
1239  $ work( nwork ), lwork - nwork + 1, ierr )
1240  ie = itau
1241  itauq = ie + m
1242  itaup = itauq + m
1243  nwork = itaup + m
1244 *
1245 * Bidiagonalize L in WORK(IU).
1246 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1247 * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1248 *
1249  CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1250  $ work( itauq ), work( itaup ), work( nwork ),
1251  $ lwork - nwork + 1, ierr )
1252 *
1253 * Perform bidiagonal SVD, computing left singular vectors
1254 * of bidiagonal matrix in U and computing right singular
1255 * vectors of bidiagonal matrix in VT
1256 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1257 *
1258  CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
1259  $ ldvt, dum, idum, work( nwork ), iwork,
1260  $ info )
1261 *
1262 * Overwrite U by left singular vectors of L and VT
1263 * by right singular vectors of L
1264 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1265 * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1266 *
1267  CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1268  $ work( itauq ), u, ldu, work( nwork ),
1269  $ lwork - nwork + 1, ierr )
1270  CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1271  $ work( itaup ), vt, ldvt, work( nwork ),
1272  $ lwork - nwork + 1, ierr )
1273 *
1274 * Multiply right singular vectors of L in WORK(IL) by
1275 * Q in A, storing result in VT
1276 * Workspace: need M*M [L]
1277 *
1278  CALL dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1279  CALL dgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,
1280  $ a, lda, zero, vt, ldvt )
1281 *
1282  ELSE IF( wntqa ) THEN
1283 *
1284 * Path 4t (N >> M, JOBZ='A')
1285 * N right singular vectors to be computed in VT and
1286 * M left singular vectors to be computed in U
1287 *
1288  ivt = 1
1289 *
1290 * WORK(IVT) is M by M
1291 *
1292  ldwkvt = m
1293  itau = ivt + ldwkvt*m
1294  nwork = itau + m
1295 *
1296 * Compute A=L*Q, copying result to VT
1297 * Workspace: need M*M [VT] + M [tau] + M [work]
1298 * Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
1299 *
1300  CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1301  $ lwork - nwork + 1, ierr )
1302  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
1303 *
1304 * Generate Q in VT
1305 * Workspace: need M*M [VT] + M [tau] + N [work]
1306 * Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
1307 *
1308  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
1309  $ work( nwork ), lwork - nwork + 1, ierr )
1310 *
1311 * Produce L in A, zeroing out other entries
1312 *
1313  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1314  ie = itau
1315  itauq = ie + m
1316  itaup = itauq + m
1317  nwork = itaup + m
1318 *
1319 * Bidiagonalize L in A
1320 * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
1321 * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
1322 *
1323  CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1324  $ work( itaup ), work( nwork ), lwork-nwork+1,
1325  $ ierr )
1326 *
1327 * Perform bidiagonal SVD, computing left singular vectors
1328 * of bidiagonal matrix in U and computing right singular
1329 * vectors of bidiagonal matrix in WORK(IVT)
1330 * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
1331 *
1332  CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1333  $ work( ivt ), ldwkvt, dum, idum,
1334  $ work( nwork ), iwork, info )
1335 *
1336 * Overwrite U by left singular vectors of L and WORK(IVT)
1337 * by right singular vectors of L
1338 * Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
1339 * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
1340 *
1341  CALL dormbr( 'Q', 'L', 'N', m, m, m, a, lda,
1342  $ work( itauq ), u, ldu, work( nwork ),
1343  $ lwork - nwork + 1, ierr )
1344  CALL dormbr( 'P', 'R', 'T', m, m, m, a, lda,
1345  $ work( itaup ), work( ivt ), ldwkvt,
1346  $ work( nwork ), lwork - nwork + 1, ierr )
1347 *
1348 * Multiply right singular vectors of L in WORK(IVT) by
1349 * Q in VT, storing result in A
1350 * Workspace: need M*M [VT]
1351 *
1352  CALL dgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,
1353  $ vt, ldvt, zero, a, lda )
1354 *
1355 * Copy right singular vectors of A from A to VT
1356 *
1357  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
1358 *
1359  END IF
1360 *
1361  ELSE
1362 *
1363 * N .LT. MNTHR
1364 *
1365 * Path 5t (N > M, but not much larger)
1366 * Reduce to bidiagonal form without LQ decomposition
1367 *
1368  ie = 1
1369  itauq = ie + m
1370  itaup = itauq + m
1371  nwork = itaup + m
1372 *
1373 * Bidiagonalize A
1374 * Workspace: need 3*M [e, tauq, taup] + N [work]
1375 * Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
1376 *
1377  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1378  $ work( itaup ), work( nwork ), lwork-nwork+1,
1379  $ ierr )
1380  IF( wntqn ) THEN
1381 *
1382 * Path 5tn (N > M, JOBZ='N')
1383 * Perform bidiagonal SVD, only computing singular values
1384 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1385 *
1386  CALL dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,
1387  $ dum, idum, work( nwork ), iwork, info )
1388  ELSE IF( wntqo ) THEN
1389 * Path 5to (N > M, JOBZ='O')
1390  ldwkvt = m
1391  ivt = nwork
1392  IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1393 *
1394 * WORK( IVT ) is M by N
1395 *
1396  CALL dlaset( 'F', m, n, zero, zero, work( ivt ),
1397  $ ldwkvt )
1398  nwork = ivt + ldwkvt*n
1399 * IL is unused; silence compile warnings
1400  il = -1
1401  ELSE
1402 *
1403 * WORK( IVT ) is M by M
1404 *
1405  nwork = ivt + ldwkvt*m
1406  il = nwork
1407 *
1408 * WORK(IL) is M by CHUNK
1409 *
1410  chunk = ( lwork - m*m - 3*m ) / m
1411  END IF
1412 *
1413 * Perform bidiagonal SVD, computing left singular vectors
1414 * of bidiagonal matrix in U and computing right singular
1415 * vectors of bidiagonal matrix in WORK(IVT)
1416 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
1417 *
1418  CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
1419  $ work( ivt ), ldwkvt, dum, idum,
1420  $ work( nwork ), iwork, info )
1421 *
1422 * Overwrite U by left singular vectors of A
1423 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1424 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1425 *
1426  CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1427  $ work( itauq ), u, ldu, work( nwork ),
1428  $ lwork - nwork + 1, ierr )
1429 *
1430  IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1431 *
1432 * Path 5to-fast
1433 * Overwrite WORK(IVT) by left singular vectors of A
1434 * Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
1435 * Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
1436 *
1437  CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,
1438  $ work( itaup ), work( ivt ), ldwkvt,
1439  $ work( nwork ), lwork - nwork + 1, ierr )
1440 *
1441 * Copy right singular vectors of A from WORK(IVT) to A
1442 *
1443  CALL dlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1444  ELSE
1445 *
1446 * Path 5to-slow
1447 * Generate P**T in A
1448 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1449 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1450 *
1451  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
1452  $ work( nwork ), lwork - nwork + 1, ierr )
1453 *
1454 * Multiply Q in A by right singular vectors of
1455 * bidiagonal matrix in WORK(IVT), storing result in
1456 * WORK(IL) and copying to A
1457 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
1458 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
1459 *
1460  DO 40 i = 1, n, chunk
1461  blk = min( n - i + 1, chunk )
1462  CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ),
1463  $ ldwkvt, a( 1, i ), lda, zero,
1464  $ work( il ), m )
1465  CALL dlacpy( 'F', m, blk, work( il ), m, a( 1, i ),
1466  $ lda )
1467  40 CONTINUE
1468  END IF
1469  ELSE IF( wntqs ) THEN
1470 *
1471 * Path 5ts (N > M, JOBZ='S')
1472 * Perform bidiagonal SVD, computing left singular vectors
1473 * of bidiagonal matrix in U and computing right singular
1474 * vectors of bidiagonal matrix in VT
1475 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1476 *
1477  CALL dlaset( 'F', m, n, zero, zero, vt, ldvt )
1478  CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1479  $ ldvt, dum, idum, work( nwork ), iwork,
1480  $ info )
1481 *
1482 * Overwrite U by left singular vectors of A and VT
1483 * by right singular vectors of A
1484 * Workspace: need 3*M [e, tauq, taup] + M [work]
1485 * Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
1486 *
1487  CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1488  $ work( itauq ), u, ldu, work( nwork ),
1489  $ lwork - nwork + 1, ierr )
1490  CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,
1491  $ work( itaup ), vt, ldvt, work( nwork ),
1492  $ lwork - nwork + 1, ierr )
1493  ELSE IF( wntqa ) THEN
1494 *
1495 * Path 5ta (N > M, JOBZ='A')
1496 * Perform bidiagonal SVD, computing left singular vectors
1497 * of bidiagonal matrix in U and computing right singular
1498 * vectors of bidiagonal matrix in VT
1499 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1500 *
1501  CALL dlaset( 'F', n, n, zero, zero, vt, ldvt )
1502  CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1503  $ ldvt, dum, idum, work( nwork ), iwork,
1504  $ info )
1505 *
1506 * Set the right corner of VT to identity matrix
1507 *
1508  IF( n.GT.m ) THEN
1509  CALL dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
1510  $ ldvt )
1511  END IF
1512 *
1513 * Overwrite U by left singular vectors of A and VT
1514 * by right singular vectors of A
1515 * Workspace: need 3*M [e, tauq, taup] + N [work]
1516 * Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
1517 *
1518  CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1519  $ work( itauq ), u, ldu, work( nwork ),
1520  $ lwork - nwork + 1, ierr )
1521  CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,
1522  $ work( itaup ), vt, ldvt, work( nwork ),
1523  $ lwork - nwork + 1, ierr )
1524  END IF
1525 *
1526  END IF
1527 *
1528  END IF
1529 *
1530 * Undo scaling if necessary
1531 *
1532  IF( iscl.EQ.1 ) THEN
1533  IF( anrm.GT.bignum )
1534  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1535  $ ierr )
1536  IF( anrm.LT.smlnum )
1537  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1538  $ ierr )
1539  END IF
1540 *
1541 * Return optimal workspace in WORK(1)
1542 *
1543  work( 1 ) = maxwrk
1544 *
1545  RETURN
1546 *
1547 * End of DGESDD
1548 *
1549  END
subroutine dgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
DGEBRD
Definition: dgebrd.f:207
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:112
subroutine dormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
DORMBR
Definition: dormbr.f:197
subroutine dorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGLQ
Definition: dorglq.f:129
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:145
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
subroutine dgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGELQF
Definition: dgelqf.f:137
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine dgesdd(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO)
DGESDD
Definition: dgesdd.f:221
subroutine dorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGBR
Definition: dorgbr.f:159
subroutine dbdsdc(UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, WORK, IWORK, INFO)
DBDSDC
Definition: dbdsdc.f:207
subroutine dgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGEQRF
Definition: dgeqrf.f:138
subroutine dorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGQR
Definition: dorgqr.f:130