LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
sgesdd.f
Go to the documentation of this file.
1 *> \brief \b SGESDD
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGESDD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesdd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesdd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesdd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, IWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBZ
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * INTEGER IWORK( * )
30 * REAL A( LDA, * ), S( * ), U( LDU, * ),
31 * $ VT( LDVT, * ), WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> SGESDD computes the singular value decomposition (SVD) of a real
41 *> M-by-N matrix A, optionally computing the left and right singular
42 *> vectors. If singular vectors are desired, it uses a
43 *> divide-and-conquer algorithm.
44 *>
45 *> The SVD is written
46 *>
47 *> A = U * SIGMA * transpose(V)
48 *>
49 *> where SIGMA is an M-by-N matrix which is zero except for its
50 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
51 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
52 *> are the singular values of A; they are real and non-negative, and
53 *> are returned in descending order. The first min(m,n) columns of
54 *> U and V are the left and right singular vectors of A.
55 *>
56 *> Note that the routine returns VT = V**T, not V.
57 *>
58 *> The divide and conquer algorithm makes very mild assumptions about
59 *> floating point arithmetic. It will work on machines with a guard
60 *> digit in add/subtract, or on those binary machines without guard
61 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
62 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
63 *> without guard digits, but we know of none.
64 *> \endverbatim
65 *
66 * Arguments:
67 * ==========
68 *
69 *> \param[in] JOBZ
70 *> \verbatim
71 *> JOBZ is CHARACTER*1
72 *> Specifies options for computing all or part of the matrix U:
73 *> = 'A': all M columns of U and all N rows of V**T are
74 *> returned in the arrays U and VT;
75 *> = 'S': the first min(M,N) columns of U and the first
76 *> min(M,N) rows of V**T are returned in the arrays U
77 *> and VT;
78 *> = 'O': If M >= N, the first N columns of U are overwritten
79 *> on the array A and all rows of V**T are returned in
80 *> the array VT;
81 *> otherwise, all columns of U are returned in the
82 *> array U and the first M rows of V**T are overwritten
83 *> in the array A;
84 *> = 'N': no columns of U or rows of V**T are computed.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is REAL array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBZ = 'O', A is overwritten with the first N columns
105 *> of U (the left singular vectors, stored
106 *> columnwise) if M >= N;
107 *> A is overwritten with the first M rows
108 *> of V**T (the right singular vectors, stored
109 *> rowwise) otherwise.
110 *> if JOBZ .ne. 'O', the contents of A are destroyed.
111 *> \endverbatim
112 *>
113 *> \param[in] LDA
114 *> \verbatim
115 *> LDA is INTEGER
116 *> The leading dimension of the array A. LDA >= max(1,M).
117 *> \endverbatim
118 *>
119 *> \param[out] S
120 *> \verbatim
121 *> S is REAL array, dimension (min(M,N))
122 *> The singular values of A, sorted so that S(i) >= S(i+1).
123 *> \endverbatim
124 *>
125 *> \param[out] U
126 *> \verbatim
127 *> U is REAL array, dimension (LDU,UCOL)
128 *> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
129 *> UCOL = min(M,N) if JOBZ = 'S'.
130 *> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
131 *> orthogonal matrix U;
132 *> if JOBZ = 'S', U contains the first min(M,N) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is REAL array, dimension (LDVT,N)
147 *> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
148 *> N-by-N orthogonal matrix V**T;
149 *> if JOBZ = 'S', VT contains the first min(M,N) rows of
150 *> V**T (the right singular vectors, stored rowwise);
151 *> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1;
158 *> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
159 *> if JOBZ = 'S', LDVT >= min(M,N).
160 *> \endverbatim
161 *>
162 *> \param[out] WORK
163 *> \verbatim
164 *> WORK is REAL array, dimension (MAX(1,LWORK))
165 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
166 *> \endverbatim
167 *>
168 *> \param[in] LWORK
169 *> \verbatim
170 *> LWORK is INTEGER
171 *> The dimension of the array WORK. LWORK >= 1.
172 *> If LWORK = -1, a workspace query is assumed. The optimal
173 *> size for the WORK array is calculated and stored in WORK(1),
174 *> and no other work except argument checking is performed.
175 *>
176 *> Let mx = max(M,N) and mn = min(M,N).
177 *> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
178 *> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
179 *> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
180 *> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
181 *> These are not tight minimums in all cases; see comments inside code.
182 *> For good performance, LWORK should generally be larger;
183 *> a query is recommended.
184 *> \endverbatim
185 *>
186 *> \param[out] IWORK
187 *> \verbatim
188 *> IWORK is INTEGER array, dimension (8*min(M,N))
189 *> \endverbatim
190 *>
191 *> \param[out] INFO
192 *> \verbatim
193 *> INFO is INTEGER
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> = -4: if A had a NAN entry.
196 *> > 0: SBDSDC did not converge, updating process failed.
197 *> = 0: successful exit.
198 *> \endverbatim
199 *
200 * Authors:
201 * ========
202 *
203 *> \author Univ. of Tennessee
204 *> \author Univ. of California Berkeley
205 *> \author Univ. of Colorado Denver
206 *> \author NAG Ltd.
207 *
208 *> \ingroup realGEsing
209 *
210 *> \par Contributors:
211 * ==================
212 *>
213 *> Ming Gu and Huan Ren, Computer Science Division, University of
214 *> California at Berkeley, USA
215 *>
216 * =====================================================================
217  SUBROUTINE sgesdd( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
218  $ WORK, LWORK, IWORK, INFO )
219  implicit none
220 *
221 * -- LAPACK driver routine --
222 * -- LAPACK is a software package provided by Univ. of Tennessee, --
223 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
224 *
225 * .. Scalar Arguments ..
226  CHARACTER JOBZ
227  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
228 * ..
229 * .. Array Arguments ..
230  INTEGER IWORK( * )
231  REAL A( LDA, * ), S( * ), U( LDU, * ),
232  $ vt( ldvt, * ), work( * )
233 * ..
234 *
235 * =====================================================================
236 *
237 * .. Parameters ..
238  REAL ZERO, ONE
239  parameter( zero = 0.0e0, one = 1.0e0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
243  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
244  $ ir, iscl, itau, itaup, itauq, iu, ivt, ldwkvt,
245  $ ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,
246  $ mnthr, nwork, wrkbl
247  INTEGER LWORK_SGEBRD_MN, LWORK_SGEBRD_MM,
248  $ lwork_sgebrd_nn, lwork_sgelqf_mn,
249  $ lwork_sgeqrf_mn,
250  $ lwork_sorgbr_p_mm, lwork_sorgbr_q_nn,
251  $ lwork_sorglq_mn, lwork_sorglq_nn,
252  $ lwork_sorgqr_mm, lwork_sorgqr_mn,
253  $ lwork_sormbr_prt_mm, lwork_sormbr_qln_mm,
254  $ lwork_sormbr_prt_mn, lwork_sormbr_qln_mn,
255  $ lwork_sormbr_prt_nn, lwork_sormbr_qln_nn
256  REAL ANRM, BIGNUM, EPS, SMLNUM
257 * ..
258 * .. Local Arrays ..
259  INTEGER IDUM( 1 )
260  REAL DUM( 1 )
261 * ..
262 * .. External Subroutines ..
263  EXTERNAL sbdsdc, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
265  $ xerbla
266 * ..
267 * .. External Functions ..
268  LOGICAL LSAME, SISNAN
269  REAL SLAMCH, SLANGE
270  EXTERNAL slamch, slange, lsame, sisnan
271 * ..
272 * .. Intrinsic Functions ..
273  INTRINSIC int, max, min, sqrt
274 * ..
275 * .. Executable Statements ..
276 *
277 * Test the input arguments
278 *
279  info = 0
280  minmn = min( m, n )
281  wntqa = lsame( jobz, 'A' )
282  wntqs = lsame( jobz, 'S' )
283  wntqas = wntqa .OR. wntqs
284  wntqo = lsame( jobz, 'O' )
285  wntqn = lsame( jobz, 'N' )
286  lquery = ( lwork.EQ.-1 )
287 *
288  IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
289  info = -1
290  ELSE IF( m.LT.0 ) THEN
291  info = -2
292  ELSE IF( n.LT.0 ) THEN
293  info = -3
294  ELSE IF( lda.LT.max( 1, m ) ) THEN
295  info = -5
296  ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
297  $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
298  info = -8
299  ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
300  $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
301  $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
302  info = -10
303  END IF
304 *
305 * Compute workspace
306 * Note: Comments in the code beginning "Workspace:" describe the
307 * minimal amount of workspace allocated at that point in the code,
308 * as well as the preferred amount for good performance.
309 * NB refers to the optimal block size for the immediately
310 * following subroutine, as returned by ILAENV.
311 *
312  IF( info.EQ.0 ) THEN
313  minwrk = 1
314  maxwrk = 1
315  bdspac = 0
316  mnthr = int( minmn*11.0e0 / 6.0e0 )
317  IF( m.GE.n .AND. minmn.GT.0 ) THEN
318 *
319 * Compute space needed for SBDSDC
320 *
321  IF( wntqn ) THEN
322 * sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
323 * keep 7*N for backwards compatibility.
324  bdspac = 7*n
325  ELSE
326  bdspac = 3*n*n + 4*n
327  END IF
328 *
329 * Compute space preferred for each routine
330  CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
331  $ dum(1), dum(1), -1, ierr )
332  lwork_sgebrd_mn = int( dum(1) )
333 *
334  CALL sgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
335  $ dum(1), dum(1), -1, ierr )
336  lwork_sgebrd_nn = int( dum(1) )
337 *
338  CALL sgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
339  lwork_sgeqrf_mn = int( dum(1) )
340 *
341  CALL sorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
342  $ ierr )
343  lwork_sorgbr_q_nn = int( dum(1) )
344 *
345  CALL sorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )
346  lwork_sorgqr_mm = int( dum(1) )
347 *
348  CALL sorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )
349  lwork_sorgqr_mn = int( dum(1) )
350 *
351  CALL sormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
352  $ dum(1), dum(1), n, dum(1), -1, ierr )
353  lwork_sormbr_prt_nn = int( dum(1) )
354 *
355  CALL sormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
356  $ dum(1), dum(1), n, dum(1), -1, ierr )
357  lwork_sormbr_qln_nn = int( dum(1) )
358 *
359  CALL sormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
360  $ dum(1), dum(1), m, dum(1), -1, ierr )
361  lwork_sormbr_qln_mn = int( dum(1) )
362 *
363  CALL sormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
364  $ dum(1), dum(1), m, dum(1), -1, ierr )
365  lwork_sormbr_qln_mm = int( dum(1) )
366 *
367  IF( m.GE.mnthr ) THEN
368  IF( wntqn ) THEN
369 *
370 * Path 1 (M >> N, JOBZ='N')
371 *
372  wrkbl = n + lwork_sgeqrf_mn
373  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
374  maxwrk = max( wrkbl, bdspac + n )
375  minwrk = bdspac + n
376  ELSE IF( wntqo ) THEN
377 *
378 * Path 2 (M >> N, JOBZ='O')
379 *
380  wrkbl = n + lwork_sgeqrf_mn
381  wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
382  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
383  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
384  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
385  wrkbl = max( wrkbl, 3*n + bdspac )
386  maxwrk = wrkbl + 2*n*n
387  minwrk = bdspac + 2*n*n + 3*n
388  ELSE IF( wntqs ) THEN
389 *
390 * Path 3 (M >> N, JOBZ='S')
391 *
392  wrkbl = n + lwork_sgeqrf_mn
393  wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
394  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
395  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
396  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
397  wrkbl = max( wrkbl, 3*n + bdspac )
398  maxwrk = wrkbl + n*n
399  minwrk = bdspac + n*n + 3*n
400  ELSE IF( wntqa ) THEN
401 *
402 * Path 4 (M >> N, JOBZ='A')
403 *
404  wrkbl = n + lwork_sgeqrf_mn
405  wrkbl = max( wrkbl, n + lwork_sorgqr_mm )
406  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
407  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
408  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
409  wrkbl = max( wrkbl, 3*n + bdspac )
410  maxwrk = wrkbl + n*n
411  minwrk = n*n + max( 3*n + bdspac, n + m )
412  END IF
413  ELSE
414 *
415 * Path 5 (M >= N, but not much larger)
416 *
417  wrkbl = 3*n + lwork_sgebrd_mn
418  IF( wntqn ) THEN
419 * Path 5n (M >= N, jobz='N')
420  maxwrk = max( wrkbl, 3*n + bdspac )
421  minwrk = 3*n + max( m, bdspac )
422  ELSE IF( wntqo ) THEN
423 * Path 5o (M >= N, jobz='O')
424  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
425  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
426  wrkbl = max( wrkbl, 3*n + bdspac )
427  maxwrk = wrkbl + m*n
428  minwrk = 3*n + max( m, n*n + bdspac )
429  ELSE IF( wntqs ) THEN
430 * Path 5s (M >= N, jobz='S')
431  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
432  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
433  maxwrk = max( wrkbl, 3*n + bdspac )
434  minwrk = 3*n + max( m, bdspac )
435  ELSE IF( wntqa ) THEN
436 * Path 5a (M >= N, jobz='A')
437  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mm )
438  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
439  maxwrk = max( wrkbl, 3*n + bdspac )
440  minwrk = 3*n + max( m, bdspac )
441  END IF
442  END IF
443  ELSE IF( minmn.GT.0 ) THEN
444 *
445 * Compute space needed for SBDSDC
446 *
447  IF( wntqn ) THEN
448 * sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
449 * keep 7*N for backwards compatibility.
450  bdspac = 7*m
451  ELSE
452  bdspac = 3*m*m + 4*m
453  END IF
454 *
455 * Compute space preferred for each routine
456  CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
457  $ dum(1), dum(1), -1, ierr )
458  lwork_sgebrd_mn = int( dum(1) )
459 *
460  CALL sgebrd( m, m, a, m, s, dum(1), dum(1),
461  $ dum(1), dum(1), -1, ierr )
462  lwork_sgebrd_mm = int( dum(1) )
463 *
464  CALL sgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
465  lwork_sgelqf_mn = int( dum(1) )
466 *
467  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
468  lwork_sorglq_nn = int( dum(1) )
469 *
470  CALL sorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
471  lwork_sorglq_mn = int( dum(1) )
472 *
473  CALL sorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )
474  lwork_sorgbr_p_mm = int( dum(1) )
475 *
476  CALL sormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
477  $ dum(1), dum(1), m, dum(1), -1, ierr )
478  lwork_sormbr_prt_mm = int( dum(1) )
479 *
480  CALL sormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
481  $ dum(1), dum(1), m, dum(1), -1, ierr )
482  lwork_sormbr_prt_mn = int( dum(1) )
483 *
484  CALL sormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
485  $ dum(1), dum(1), n, dum(1), -1, ierr )
486  lwork_sormbr_prt_nn = int( dum(1) )
487 *
488  CALL sormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
489  $ dum(1), dum(1), m, dum(1), -1, ierr )
490  lwork_sormbr_qln_mm = int( dum(1) )
491 *
492  IF( n.GE.mnthr ) THEN
493  IF( wntqn ) THEN
494 *
495 * Path 1t (N >> M, JOBZ='N')
496 *
497  wrkbl = m + lwork_sgelqf_mn
498  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
499  maxwrk = max( wrkbl, bdspac + m )
500  minwrk = bdspac + m
501  ELSE IF( wntqo ) THEN
502 *
503 * Path 2t (N >> M, JOBZ='O')
504 *
505  wrkbl = m + lwork_sgelqf_mn
506  wrkbl = max( wrkbl, m + lwork_sorglq_mn )
507  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
508  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
509  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
510  wrkbl = max( wrkbl, 3*m + bdspac )
511  maxwrk = wrkbl + 2*m*m
512  minwrk = bdspac + 2*m*m + 3*m
513  ELSE IF( wntqs ) THEN
514 *
515 * Path 3t (N >> M, JOBZ='S')
516 *
517  wrkbl = m + lwork_sgelqf_mn
518  wrkbl = max( wrkbl, m + lwork_sorglq_mn )
519  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
520  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
521  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
522  wrkbl = max( wrkbl, 3*m + bdspac )
523  maxwrk = wrkbl + m*m
524  minwrk = bdspac + m*m + 3*m
525  ELSE IF( wntqa ) THEN
526 *
527 * Path 4t (N >> M, JOBZ='A')
528 *
529  wrkbl = m + lwork_sgelqf_mn
530  wrkbl = max( wrkbl, m + lwork_sorglq_nn )
531  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
532  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
533  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
534  wrkbl = max( wrkbl, 3*m + bdspac )
535  maxwrk = wrkbl + m*m
536  minwrk = m*m + max( 3*m + bdspac, m + n )
537  END IF
538  ELSE
539 *
540 * Path 5t (N > M, but not much larger)
541 *
542  wrkbl = 3*m + lwork_sgebrd_mn
543  IF( wntqn ) THEN
544 * Path 5tn (N > M, jobz='N')
545  maxwrk = max( wrkbl, 3*m + bdspac )
546  minwrk = 3*m + max( n, bdspac )
547  ELSE IF( wntqo ) THEN
548 * Path 5to (N > M, jobz='O')
549  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
550  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
551  wrkbl = max( wrkbl, 3*m + bdspac )
552  maxwrk = wrkbl + m*n
553  minwrk = 3*m + max( n, m*m + bdspac )
554  ELSE IF( wntqs ) THEN
555 * Path 5ts (N > M, jobz='S')
556  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
557  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
558  maxwrk = max( wrkbl, 3*m + bdspac )
559  minwrk = 3*m + max( n, bdspac )
560  ELSE IF( wntqa ) THEN
561 * Path 5ta (N > M, jobz='A')
562  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
563  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_nn )
564  maxwrk = max( wrkbl, 3*m + bdspac )
565  minwrk = 3*m + max( n, bdspac )
566  END IF
567  END IF
568  END IF
569 
570  maxwrk = max( maxwrk, minwrk )
571  work( 1 ) = maxwrk
572 *
573  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
574  info = -12
575  END IF
576  END IF
577 *
578  IF( info.NE.0 ) THEN
579  CALL xerbla( 'SGESDD', -info )
580  RETURN
581  ELSE IF( lquery ) THEN
582  RETURN
583  END IF
584 *
585 * Quick return if possible
586 *
587  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
588  RETURN
589  END IF
590 *
591 * Get machine constants
592 *
593  eps = slamch( 'P' )
594  smlnum = sqrt( slamch( 'S' ) ) / eps
595  bignum = one / smlnum
596 *
597 * Scale A if max element outside range [SMLNUM,BIGNUM]
598 *
599  anrm = slange( 'M', m, n, a, lda, dum )
600  IF( sisnan( anrm ) ) THEN
601  info = -4
602  RETURN
603  END IF
604  iscl = 0
605  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
606  iscl = 1
607  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
608  ELSE IF( anrm.GT.bignum ) THEN
609  iscl = 1
610  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
611  END IF
612 *
613  IF( m.GE.n ) THEN
614 *
615 * A has at least as many rows as columns. If A has sufficiently
616 * more rows than columns, first reduce using the QR
617 * decomposition (if sufficient workspace available)
618 *
619  IF( m.GE.mnthr ) THEN
620 *
621  IF( wntqn ) THEN
622 *
623 * Path 1 (M >> N, JOBZ='N')
624 * No singular vectors to be computed
625 *
626  itau = 1
627  nwork = itau + n
628 *
629 * Compute A=Q*R
630 * Workspace: need N [tau] + N [work]
631 * Workspace: prefer N [tau] + N*NB [work]
632 *
633  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
634  $ lwork - nwork + 1, ierr )
635 *
636 * Zero out below R
637 *
638  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
639  ie = 1
640  itauq = ie + n
641  itaup = itauq + n
642  nwork = itaup + n
643 *
644 * Bidiagonalize R in A
645 * Workspace: need 3*N [e, tauq, taup] + N [work]
646 * Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
647 *
648  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
649  $ work( itaup ), work( nwork ), lwork-nwork+1,
650  $ ierr )
651  nwork = ie + n
652 *
653 * Perform bidiagonal SVD, computing singular values only
654 * Workspace: need N [e] + BDSPAC
655 *
656  CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
657  $ dum, idum, work( nwork ), iwork, info )
658 *
659  ELSE IF( wntqo ) THEN
660 *
661 * Path 2 (M >> N, JOBZ = 'O')
662 * N left singular vectors to be overwritten on A and
663 * N right singular vectors to be computed in VT
664 *
665  ir = 1
666 *
667 * WORK(IR) is LDWRKR by N
668 *
669  IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
670  ldwrkr = lda
671  ELSE
672  ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
673  END IF
674  itau = ir + ldwrkr*n
675  nwork = itau + n
676 *
677 * Compute A=Q*R
678 * Workspace: need N*N [R] + N [tau] + N [work]
679 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
680 *
681  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
682  $ lwork - nwork + 1, ierr )
683 *
684 * Copy R to WORK(IR), zeroing out below it
685 *
686  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
687  CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
688  $ ldwrkr )
689 *
690 * Generate Q in A
691 * Workspace: need N*N [R] + N [tau] + N [work]
692 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
693 *
694  CALL sorgqr( m, n, n, a, lda, work( itau ),
695  $ work( nwork ), lwork - nwork + 1, ierr )
696  ie = itau
697  itauq = ie + n
698  itaup = itauq + n
699  nwork = itaup + n
700 *
701 * Bidiagonalize R in WORK(IR)
702 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
703 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
704 *
705  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
706  $ work( itauq ), work( itaup ), work( nwork ),
707  $ lwork - nwork + 1, ierr )
708 *
709 * WORK(IU) is N by N
710 *
711  iu = nwork
712  nwork = iu + n*n
713 *
714 * Perform bidiagonal SVD, computing left singular vectors
715 * of bidiagonal matrix in WORK(IU) and computing right
716 * singular vectors of bidiagonal matrix in VT
717 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
718 *
719  CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
720  $ vt, ldvt, dum, idum, work( nwork ), iwork,
721  $ info )
722 *
723 * Overwrite WORK(IU) by left singular vectors of R
724 * and VT by right singular vectors of R
725 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
726 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
727 *
728  CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
729  $ work( itauq ), work( iu ), n, work( nwork ),
730  $ lwork - nwork + 1, ierr )
731  CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
732  $ work( itaup ), vt, ldvt, work( nwork ),
733  $ lwork - nwork + 1, ierr )
734 *
735 * Multiply Q in A by left singular vectors of R in
736 * WORK(IU), storing result in WORK(IR) and copying to A
737 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
738 * Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
739 *
740  DO 10 i = 1, m, ldwrkr
741  chunk = min( m - i + 1, ldwrkr )
742  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
743  $ lda, work( iu ), n, zero, work( ir ),
744  $ ldwrkr )
745  CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
746  $ a( i, 1 ), lda )
747  10 CONTINUE
748 *
749  ELSE IF( wntqs ) THEN
750 *
751 * Path 3 (M >> N, JOBZ='S')
752 * N left singular vectors to be computed in U and
753 * N right singular vectors to be computed in VT
754 *
755  ir = 1
756 *
757 * WORK(IR) is N by N
758 *
759  ldwrkr = n
760  itau = ir + ldwrkr*n
761  nwork = itau + n
762 *
763 * Compute A=Q*R
764 * Workspace: need N*N [R] + N [tau] + N [work]
765 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
766 *
767  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
768  $ lwork - nwork + 1, ierr )
769 *
770 * Copy R to WORK(IR), zeroing out below it
771 *
772  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
773  CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
774  $ ldwrkr )
775 *
776 * Generate Q in A
777 * Workspace: need N*N [R] + N [tau] + N [work]
778 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
779 *
780  CALL sorgqr( m, n, n, a, lda, work( itau ),
781  $ work( nwork ), lwork - nwork + 1, ierr )
782  ie = itau
783  itauq = ie + n
784  itaup = itauq + n
785  nwork = itaup + n
786 *
787 * Bidiagonalize R in WORK(IR)
788 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
789 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
790 *
791  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
792  $ work( itauq ), work( itaup ), work( nwork ),
793  $ lwork - nwork + 1, ierr )
794 *
795 * Perform bidiagonal SVD, computing left singular vectors
796 * of bidiagoal matrix in U and computing right singular
797 * vectors of bidiagonal matrix in VT
798 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
799 *
800  CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
801  $ ldvt, dum, idum, work( nwork ), iwork,
802  $ info )
803 *
804 * Overwrite U by left singular vectors of R and VT
805 * by right singular vectors of R
806 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
807 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
808 *
809  CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
810  $ work( itauq ), u, ldu, work( nwork ),
811  $ lwork - nwork + 1, ierr )
812 *
813  CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
814  $ work( itaup ), vt, ldvt, work( nwork ),
815  $ lwork - nwork + 1, ierr )
816 *
817 * Multiply Q in A by left singular vectors of R in
818 * WORK(IR), storing result in U
819 * Workspace: need N*N [R]
820 *
821  CALL slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
822  CALL sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),
823  $ ldwrkr, zero, u, ldu )
824 *
825  ELSE IF( wntqa ) THEN
826 *
827 * Path 4 (M >> N, JOBZ='A')
828 * M left singular vectors to be computed in U and
829 * N right singular vectors to be computed in VT
830 *
831  iu = 1
832 *
833 * WORK(IU) is N by N
834 *
835  ldwrku = n
836  itau = iu + ldwrku*n
837  nwork = itau + n
838 *
839 * Compute A=Q*R, copying result to U
840 * Workspace: need N*N [U] + N [tau] + N [work]
841 * Workspace: prefer N*N [U] + N [tau] + N*NB [work]
842 *
843  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
844  $ lwork - nwork + 1, ierr )
845  CALL slacpy( 'L', m, n, a, lda, u, ldu )
846 *
847 * Generate Q in U
848 * Workspace: need N*N [U] + N [tau] + M [work]
849 * Workspace: prefer N*N [U] + N [tau] + M*NB [work]
850  CALL sorgqr( m, m, n, u, ldu, work( itau ),
851  $ work( nwork ), lwork - nwork + 1, ierr )
852 *
853 * Produce R in A, zeroing out other entries
854 *
855  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
856  ie = itau
857  itauq = ie + n
858  itaup = itauq + n
859  nwork = itaup + n
860 *
861 * Bidiagonalize R in A
862 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
863 * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
864 *
865  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
866  $ work( itaup ), work( nwork ), lwork-nwork+1,
867  $ ierr )
868 *
869 * Perform bidiagonal SVD, computing left singular vectors
870 * of bidiagonal matrix in WORK(IU) and computing right
871 * singular vectors of bidiagonal matrix in VT
872 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
873 *
874  CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
875  $ vt, ldvt, dum, idum, work( nwork ), iwork,
876  $ info )
877 *
878 * Overwrite WORK(IU) by left singular vectors of R and VT
879 * by right singular vectors of R
880 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
881 * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
882 *
883  CALL sormbr( 'Q', 'L', 'N', n, n, n, a, lda,
884  $ work( itauq ), work( iu ), ldwrku,
885  $ work( nwork ), lwork - nwork + 1, ierr )
886  CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
887  $ work( itaup ), vt, ldvt, work( nwork ),
888  $ lwork - nwork + 1, ierr )
889 *
890 * Multiply Q in U by left singular vectors of R in
891 * WORK(IU), storing result in A
892 * Workspace: need N*N [U]
893 *
894  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),
895  $ ldwrku, zero, a, lda )
896 *
897 * Copy left singular vectors of A from A to U
898 *
899  CALL slacpy( 'F', m, n, a, lda, u, ldu )
900 *
901  END IF
902 *
903  ELSE
904 *
905 * M .LT. MNTHR
906 *
907 * Path 5 (M >= N, but not much larger)
908 * Reduce to bidiagonal form without QR decomposition
909 *
910  ie = 1
911  itauq = ie + n
912  itaup = itauq + n
913  nwork = itaup + n
914 *
915 * Bidiagonalize A
916 * Workspace: need 3*N [e, tauq, taup] + M [work]
917 * Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
918 *
919  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
920  $ work( itaup ), work( nwork ), lwork-nwork+1,
921  $ ierr )
922  IF( wntqn ) THEN
923 *
924 * Path 5n (M >= N, JOBZ='N')
925 * Perform bidiagonal SVD, only computing singular values
926 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
927 *
928  CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
929  $ dum, idum, work( nwork ), iwork, info )
930  ELSE IF( wntqo ) THEN
931 * Path 5o (M >= N, JOBZ='O')
932  iu = nwork
933  IF( lwork .GE. m*n + 3*n + bdspac ) THEN
934 *
935 * WORK( IU ) is M by N
936 *
937  ldwrku = m
938  nwork = iu + ldwrku*n
939  CALL slaset( 'F', m, n, zero, zero, work( iu ),
940  $ ldwrku )
941 * IR is unused; silence compile warnings
942  ir = -1
943  ELSE
944 *
945 * WORK( IU ) is N by N
946 *
947  ldwrku = n
948  nwork = iu + ldwrku*n
949 *
950 * WORK(IR) is LDWRKR by N
951 *
952  ir = nwork
953  ldwrkr = ( lwork - n*n - 3*n ) / n
954  END IF
955  nwork = iu + ldwrku*n
956 *
957 * Perform bidiagonal SVD, computing left singular vectors
958 * of bidiagonal matrix in WORK(IU) and computing right
959 * singular vectors of bidiagonal matrix in VT
960 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
961 *
962  CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
963  $ ldwrku, vt, ldvt, dum, idum, work( nwork ),
964  $ iwork, info )
965 *
966 * Overwrite VT by right singular vectors of A
967 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
968 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
969 *
970  CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
971  $ work( itaup ), vt, ldvt, work( nwork ),
972  $ lwork - nwork + 1, ierr )
973 *
974  IF( lwork .GE. m*n + 3*n + bdspac ) THEN
975 *
976 * Path 5o-fast
977 * Overwrite WORK(IU) by left singular vectors of A
978 * Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
979 * Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
980 *
981  CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
982  $ work( itauq ), work( iu ), ldwrku,
983  $ work( nwork ), lwork - nwork + 1, ierr )
984 *
985 * Copy left singular vectors of A from WORK(IU) to A
986 *
987  CALL slacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
988  ELSE
989 *
990 * Path 5o-slow
991 * Generate Q in A
992 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
993 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
994 *
995  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
996  $ work( nwork ), lwork - nwork + 1, ierr )
997 *
998 * Multiply Q in A by left singular vectors of
999 * bidiagonal matrix in WORK(IU), storing result in
1000 * WORK(IR) and copying to A
1001 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
1002 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
1003 *
1004  DO 20 i = 1, m, ldwrkr
1005  chunk = min( m - i + 1, ldwrkr )
1006  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
1007  $ lda, work( iu ), ldwrku, zero,
1008  $ work( ir ), ldwrkr )
1009  CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
1010  $ a( i, 1 ), lda )
1011  20 CONTINUE
1012  END IF
1013 *
1014  ELSE IF( wntqs ) THEN
1015 *
1016 * Path 5s (M >= N, JOBZ='S')
1017 * Perform bidiagonal SVD, computing left singular vectors
1018 * of bidiagonal matrix in U and computing right singular
1019 * vectors of bidiagonal matrix in VT
1020 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
1021 *
1022  CALL slaset( 'F', m, n, zero, zero, u, ldu )
1023  CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1024  $ ldvt, dum, idum, work( nwork ), iwork,
1025  $ info )
1026 *
1027 * Overwrite U by left singular vectors of A and VT
1028 * by right singular vectors of A
1029 * Workspace: need 3*N [e, tauq, taup] + N [work]
1030 * Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
1031 *
1032  CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
1033  $ work( itauq ), u, ldu, work( nwork ),
1034  $ lwork - nwork + 1, ierr )
1035  CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
1036  $ work( itaup ), vt, ldvt, work( nwork ),
1037  $ lwork - nwork + 1, ierr )
1038  ELSE IF( wntqa ) THEN
1039 *
1040 * Path 5a (M >= N, JOBZ='A')
1041 * Perform bidiagonal SVD, computing left singular vectors
1042 * of bidiagonal matrix in U and computing right singular
1043 * vectors of bidiagonal matrix in VT
1044 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
1045 *
1046  CALL slaset( 'F', m, m, zero, zero, u, ldu )
1047  CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1048  $ ldvt, dum, idum, work( nwork ), iwork,
1049  $ info )
1050 *
1051 * Set the right corner of U to identity matrix
1052 *
1053  IF( m.GT.n ) THEN
1054  CALL slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),
1055  $ ldu )
1056  END IF
1057 *
1058 * Overwrite U by left singular vectors of A and VT
1059 * by right singular vectors of A
1060 * Workspace: need 3*N [e, tauq, taup] + M [work]
1061 * Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
1062 *
1063  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1064  $ work( itauq ), u, ldu, work( nwork ),
1065  $ lwork - nwork + 1, ierr )
1066  CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1067  $ work( itaup ), vt, ldvt, work( nwork ),
1068  $ lwork - nwork + 1, ierr )
1069  END IF
1070 *
1071  END IF
1072 *
1073  ELSE
1074 *
1075 * A has more columns than rows. If A has sufficiently more
1076 * columns than rows, first reduce using the LQ decomposition (if
1077 * sufficient workspace available)
1078 *
1079  IF( n.GE.mnthr ) THEN
1080 *
1081  IF( wntqn ) THEN
1082 *
1083 * Path 1t (N >> M, JOBZ='N')
1084 * No singular vectors to be computed
1085 *
1086  itau = 1
1087  nwork = itau + m
1088 *
1089 * Compute A=L*Q
1090 * Workspace: need M [tau] + M [work]
1091 * Workspace: prefer M [tau] + M*NB [work]
1092 *
1093  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1094  $ lwork - nwork + 1, ierr )
1095 *
1096 * Zero out above L
1097 *
1098  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1099  ie = 1
1100  itauq = ie + m
1101  itaup = itauq + m
1102  nwork = itaup + m
1103 *
1104 * Bidiagonalize L in A
1105 * Workspace: need 3*M [e, tauq, taup] + M [work]
1106 * Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
1107 *
1108  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1109  $ work( itaup ), work( nwork ), lwork-nwork+1,
1110  $ ierr )
1111  nwork = ie + m
1112 *
1113 * Perform bidiagonal SVD, computing singular values only
1114 * Workspace: need M [e] + BDSPAC
1115 *
1116  CALL sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,
1117  $ dum, idum, work( nwork ), iwork, info )
1118 *
1119  ELSE IF( wntqo ) THEN
1120 *
1121 * Path 2t (N >> M, JOBZ='O')
1122 * M right singular vectors to be overwritten on A and
1123 * M left singular vectors to be computed in U
1124 *
1125  ivt = 1
1126 *
1127 * WORK(IVT) is M by M
1128 * WORK(IL) is M by M; it is later resized to M by chunk for gemm
1129 *
1130  il = ivt + m*m
1131  IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
1132  ldwrkl = m
1133  chunk = n
1134  ELSE
1135  ldwrkl = m
1136  chunk = ( lwork - m*m ) / m
1137  END IF
1138  itau = il + ldwrkl*m
1139  nwork = itau + m
1140 *
1141 * Compute A=L*Q
1142 * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1143 * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1144 *
1145  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1146  $ lwork - nwork + 1, ierr )
1147 *
1148 * Copy L to WORK(IL), zeroing about above it
1149 *
1150  CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1151  CALL slaset( 'U', m - 1, m - 1, zero, zero,
1152  $ work( il + ldwrkl ), ldwrkl )
1153 *
1154 * Generate Q in A
1155 * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1156 * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1157 *
1158  CALL sorglq( m, n, m, a, lda, work( itau ),
1159  $ work( nwork ), lwork - nwork + 1, ierr )
1160  ie = itau
1161  itauq = ie + m
1162  itaup = itauq + m
1163  nwork = itaup + m
1164 *
1165 * Bidiagonalize L in WORK(IL)
1166 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1167 * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1168 *
1169  CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1170  $ work( itauq ), work( itaup ), work( nwork ),
1171  $ lwork - nwork + 1, ierr )
1172 *
1173 * Perform bidiagonal SVD, computing left singular vectors
1174 * of bidiagonal matrix in U, and computing right singular
1175 * vectors of bidiagonal matrix in WORK(IVT)
1176 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1177 *
1178  CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1179  $ work( ivt ), m, dum, idum, work( nwork ),
1180  $ iwork, info )
1181 *
1182 * Overwrite U by left singular vectors of L and WORK(IVT)
1183 * by right singular vectors of L
1184 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1185 * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1186 *
1187  CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1188  $ work( itauq ), u, ldu, work( nwork ),
1189  $ lwork - nwork + 1, ierr )
1190  CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1191  $ work( itaup ), work( ivt ), m,
1192  $ work( nwork ), lwork - nwork + 1, ierr )
1193 *
1194 * Multiply right singular vectors of L in WORK(IVT) by Q
1195 * in A, storing result in WORK(IL) and copying to A
1196 * Workspace: need M*M [VT] + M*M [L]
1197 * Workspace: prefer M*M [VT] + M*N [L]
1198 * At this point, L is resized as M by chunk.
1199 *
1200  DO 30 i = 1, n, chunk
1201  blk = min( n - i + 1, chunk )
1202  CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,
1203  $ a( 1, i ), lda, zero, work( il ), ldwrkl )
1204  CALL slacpy( 'F', m, blk, work( il ), ldwrkl,
1205  $ a( 1, i ), lda )
1206  30 CONTINUE
1207 *
1208  ELSE IF( wntqs ) THEN
1209 *
1210 * Path 3t (N >> M, JOBZ='S')
1211 * M right singular vectors to be computed in VT and
1212 * M left singular vectors to be computed in U
1213 *
1214  il = 1
1215 *
1216 * WORK(IL) is M by M
1217 *
1218  ldwrkl = m
1219  itau = il + ldwrkl*m
1220  nwork = itau + m
1221 *
1222 * Compute A=L*Q
1223 * Workspace: need M*M [L] + M [tau] + M [work]
1224 * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1225 *
1226  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1227  $ lwork - nwork + 1, ierr )
1228 *
1229 * Copy L to WORK(IL), zeroing out above it
1230 *
1231  CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1232  CALL slaset( 'U', m - 1, m - 1, zero, zero,
1233  $ work( il + ldwrkl ), ldwrkl )
1234 *
1235 * Generate Q in A
1236 * Workspace: need M*M [L] + M [tau] + M [work]
1237 * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1238 *
1239  CALL sorglq( m, n, m, a, lda, work( itau ),
1240  $ work( nwork ), lwork - nwork + 1, ierr )
1241  ie = itau
1242  itauq = ie + m
1243  itaup = itauq + m
1244  nwork = itaup + m
1245 *
1246 * Bidiagonalize L in WORK(IU).
1247 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1248 * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1249 *
1250  CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1251  $ work( itauq ), work( itaup ), work( nwork ),
1252  $ lwork - nwork + 1, ierr )
1253 *
1254 * Perform bidiagonal SVD, computing left singular vectors
1255 * of bidiagonal matrix in U and computing right singular
1256 * vectors of bidiagonal matrix in VT
1257 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1258 *
1259  CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
1260  $ ldvt, dum, idum, work( nwork ), iwork,
1261  $ info )
1262 *
1263 * Overwrite U by left singular vectors of L and VT
1264 * by right singular vectors of L
1265 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1266 * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1267 *
1268  CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1269  $ work( itauq ), u, ldu, work( nwork ),
1270  $ lwork - nwork + 1, ierr )
1271  CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1272  $ work( itaup ), vt, ldvt, work( nwork ),
1273  $ lwork - nwork + 1, ierr )
1274 *
1275 * Multiply right singular vectors of L in WORK(IL) by
1276 * Q in A, storing result in VT
1277 * Workspace: need M*M [L]
1278 *
1279  CALL slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1280  CALL sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,
1281  $ a, lda, zero, vt, ldvt )
1282 *
1283  ELSE IF( wntqa ) THEN
1284 *
1285 * Path 4t (N >> M, JOBZ='A')
1286 * N right singular vectors to be computed in VT and
1287 * M left singular vectors to be computed in U
1288 *
1289  ivt = 1
1290 *
1291 * WORK(IVT) is M by M
1292 *
1293  ldwkvt = m
1294  itau = ivt + ldwkvt*m
1295  nwork = itau + m
1296 *
1297 * Compute A=L*Q, copying result to VT
1298 * Workspace: need M*M [VT] + M [tau] + M [work]
1299 * Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
1300 *
1301  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1302  $ lwork - nwork + 1, ierr )
1303  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
1304 *
1305 * Generate Q in VT
1306 * Workspace: need M*M [VT] + M [tau] + N [work]
1307 * Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
1308 *
1309  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
1310  $ work( nwork ), lwork - nwork + 1, ierr )
1311 *
1312 * Produce L in A, zeroing out other entries
1313 *
1314  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1315  ie = itau
1316  itauq = ie + m
1317  itaup = itauq + m
1318  nwork = itaup + m
1319 *
1320 * Bidiagonalize L in A
1321 * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
1322 * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
1323 *
1324  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1325  $ work( itaup ), work( nwork ), lwork-nwork+1,
1326  $ ierr )
1327 *
1328 * Perform bidiagonal SVD, computing left singular vectors
1329 * of bidiagonal matrix in U and computing right singular
1330 * vectors of bidiagonal matrix in WORK(IVT)
1331 * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
1332 *
1333  CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1334  $ work( ivt ), ldwkvt, dum, idum,
1335  $ work( nwork ), iwork, info )
1336 *
1337 * Overwrite U by left singular vectors of L and WORK(IVT)
1338 * by right singular vectors of L
1339 * Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
1340 * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
1341 *
1342  CALL sormbr( 'Q', 'L', 'N', m, m, m, a, lda,
1343  $ work( itauq ), u, ldu, work( nwork ),
1344  $ lwork - nwork + 1, ierr )
1345  CALL sormbr( 'P', 'R', 'T', m, m, m, a, lda,
1346  $ work( itaup ), work( ivt ), ldwkvt,
1347  $ work( nwork ), lwork - nwork + 1, ierr )
1348 *
1349 * Multiply right singular vectors of L in WORK(IVT) by
1350 * Q in VT, storing result in A
1351 * Workspace: need M*M [VT]
1352 *
1353  CALL sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,
1354  $ vt, ldvt, zero, a, lda )
1355 *
1356 * Copy right singular vectors of A from A to VT
1357 *
1358  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
1359 *
1360  END IF
1361 *
1362  ELSE
1363 *
1364 * N .LT. MNTHR
1365 *
1366 * Path 5t (N > M, but not much larger)
1367 * Reduce to bidiagonal form without LQ decomposition
1368 *
1369  ie = 1
1370  itauq = ie + m
1371  itaup = itauq + m
1372  nwork = itaup + m
1373 *
1374 * Bidiagonalize A
1375 * Workspace: need 3*M [e, tauq, taup] + N [work]
1376 * Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
1377 *
1378  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1379  $ work( itaup ), work( nwork ), lwork-nwork+1,
1380  $ ierr )
1381  IF( wntqn ) THEN
1382 *
1383 * Path 5tn (N > M, JOBZ='N')
1384 * Perform bidiagonal SVD, only computing singular values
1385 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1386 *
1387  CALL sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,
1388  $ dum, idum, work( nwork ), iwork, info )
1389  ELSE IF( wntqo ) THEN
1390 * Path 5to (N > M, JOBZ='O')
1391  ldwkvt = m
1392  ivt = nwork
1393  IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1394 *
1395 * WORK( IVT ) is M by N
1396 *
1397  CALL slaset( 'F', m, n, zero, zero, work( ivt ),
1398  $ ldwkvt )
1399  nwork = ivt + ldwkvt*n
1400 * IL is unused; silence compile warnings
1401  il = -1
1402  ELSE
1403 *
1404 * WORK( IVT ) is M by M
1405 *
1406  nwork = ivt + ldwkvt*m
1407  il = nwork
1408 *
1409 * WORK(IL) is M by CHUNK
1410 *
1411  chunk = ( lwork - m*m - 3*m ) / m
1412  END IF
1413 *
1414 * Perform bidiagonal SVD, computing left singular vectors
1415 * of bidiagonal matrix in U and computing right singular
1416 * vectors of bidiagonal matrix in WORK(IVT)
1417 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
1418 *
1419  CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
1420  $ work( ivt ), ldwkvt, dum, idum,
1421  $ work( nwork ), iwork, info )
1422 *
1423 * Overwrite U by left singular vectors of A
1424 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1425 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1426 *
1427  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1428  $ work( itauq ), u, ldu, work( nwork ),
1429  $ lwork - nwork + 1, ierr )
1430 *
1431  IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1432 *
1433 * Path 5to-fast
1434 * Overwrite WORK(IVT) by left singular vectors of A
1435 * Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
1436 * Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
1437 *
1438  CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1439  $ work( itaup ), work( ivt ), ldwkvt,
1440  $ work( nwork ), lwork - nwork + 1, ierr )
1441 *
1442 * Copy right singular vectors of A from WORK(IVT) to A
1443 *
1444  CALL slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1445  ELSE
1446 *
1447 * Path 5to-slow
1448 * Generate P**T in A
1449 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1450 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1451 *
1452  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
1453  $ work( nwork ), lwork - nwork + 1, ierr )
1454 *
1455 * Multiply Q in A by right singular vectors of
1456 * bidiagonal matrix in WORK(IVT), storing result in
1457 * WORK(IL) and copying to A
1458 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
1459 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
1460 *
1461  DO 40 i = 1, n, chunk
1462  blk = min( n - i + 1, chunk )
1463  CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ),
1464  $ ldwkvt, a( 1, i ), lda, zero,
1465  $ work( il ), m )
1466  CALL slacpy( 'F', m, blk, work( il ), m, a( 1, i ),
1467  $ lda )
1468  40 CONTINUE
1469  END IF
1470  ELSE IF( wntqs ) THEN
1471 *
1472 * Path 5ts (N > M, JOBZ='S')
1473 * Perform bidiagonal SVD, computing left singular vectors
1474 * of bidiagonal matrix in U and computing right singular
1475 * vectors of bidiagonal matrix in VT
1476 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1477 *
1478  CALL slaset( 'F', m, n, zero, zero, vt, ldvt )
1479  CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1480  $ ldvt, dum, idum, work( nwork ), iwork,
1481  $ info )
1482 *
1483 * Overwrite U by left singular vectors of A and VT
1484 * by right singular vectors of A
1485 * Workspace: need 3*M [e, tauq, taup] + M [work]
1486 * Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
1487 *
1488  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1489  $ work( itauq ), u, ldu, work( nwork ),
1490  $ lwork - nwork + 1, ierr )
1491  CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1492  $ work( itaup ), vt, ldvt, work( nwork ),
1493  $ lwork - nwork + 1, ierr )
1494  ELSE IF( wntqa ) THEN
1495 *
1496 * Path 5ta (N > M, JOBZ='A')
1497 * Perform bidiagonal SVD, computing left singular vectors
1498 * of bidiagonal matrix in U and computing right singular
1499 * vectors of bidiagonal matrix in VT
1500 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1501 *
1502  CALL slaset( 'F', n, n, zero, zero, vt, ldvt )
1503  CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1504  $ ldvt, dum, idum, work( nwork ), iwork,
1505  $ info )
1506 *
1507 * Set the right corner of VT to identity matrix
1508 *
1509  IF( n.GT.m ) THEN
1510  CALL slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
1511  $ ldvt )
1512  END IF
1513 *
1514 * Overwrite U by left singular vectors of A and VT
1515 * by right singular vectors of A
1516 * Workspace: need 3*M [e, tauq, taup] + N [work]
1517 * Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
1518 *
1519  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1520  $ work( itauq ), u, ldu, work( nwork ),
1521  $ lwork - nwork + 1, ierr )
1522  CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1523  $ work( itaup ), vt, ldvt, work( nwork ),
1524  $ lwork - nwork + 1, ierr )
1525  END IF
1526 *
1527  END IF
1528 *
1529  END IF
1530 *
1531 * Undo scaling if necessary
1532 *
1533  IF( iscl.EQ.1 ) THEN
1534  IF( anrm.GT.bignum )
1535  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1536  $ ierr )
1537  IF( anrm.LT.smlnum )
1538  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1539  $ ierr )
1540  END IF
1541 *
1542 * Return optimal workspace in WORK(1)
1543 *
1544  work( 1 ) = maxwrk
1545 *
1546  RETURN
1547 *
1548 * End of SGESDD
1549 *
1550  END
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:143
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine sbdsdc(UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, WORK, IWORK, INFO)
SBDSDC
Definition: sbdsdc.f:205
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:157
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:145
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:205
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:143
subroutine sgesdd(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO)
SGESDD
Definition: sgesdd.f:219
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:196
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:127
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:128
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187