LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
sgesdd.f
Go to the documentation of this file.
1 *> \brief \b SGESDD
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGESDD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesdd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesdd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesdd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, IWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBZ
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * INTEGER IWORK( * )
30 * REAL A( LDA, * ), S( * ), U( LDU, * ),
31 * $ VT( LDVT, * ), WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> SGESDD computes the singular value decomposition (SVD) of a real
41 *> M-by-N matrix A, optionally computing the left and right singular
42 *> vectors. If singular vectors are desired, it uses a
43 *> divide-and-conquer algorithm.
44 *>
45 *> The SVD is written
46 *>
47 *> A = U * SIGMA * transpose(V)
48 *>
49 *> where SIGMA is an M-by-N matrix which is zero except for its
50 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
51 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
52 *> are the singular values of A; they are real and non-negative, and
53 *> are returned in descending order. The first min(m,n) columns of
54 *> U and V are the left and right singular vectors of A.
55 *>
56 *> Note that the routine returns VT = V**T, not V.
57 *>
58 *> The divide and conquer algorithm makes very mild assumptions about
59 *> floating point arithmetic. It will work on machines with a guard
60 *> digit in add/subtract, or on those binary machines without guard
61 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
62 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
63 *> without guard digits, but we know of none.
64 *> \endverbatim
65 *
66 * Arguments:
67 * ==========
68 *
69 *> \param[in] JOBZ
70 *> \verbatim
71 *> JOBZ is CHARACTER*1
72 *> Specifies options for computing all or part of the matrix U:
73 *> = 'A': all M columns of U and all N rows of V**T are
74 *> returned in the arrays U and VT;
75 *> = 'S': the first min(M,N) columns of U and the first
76 *> min(M,N) rows of V**T are returned in the arrays U
77 *> and VT;
78 *> = 'O': If M >= N, the first N columns of U are overwritten
79 *> on the array A and all rows of V**T are returned in
80 *> the array VT;
81 *> otherwise, all columns of U are returned in the
82 *> array U and the first M rows of V**T are overwritten
83 *> in the array A;
84 *> = 'N': no columns of U or rows of V**T are computed.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is REAL array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBZ = 'O', A is overwritten with the first N columns
105 *> of U (the left singular vectors, stored
106 *> columnwise) if M >= N;
107 *> A is overwritten with the first M rows
108 *> of V**T (the right singular vectors, stored
109 *> rowwise) otherwise.
110 *> if JOBZ .ne. 'O', the contents of A are destroyed.
111 *> \endverbatim
112 *>
113 *> \param[in] LDA
114 *> \verbatim
115 *> LDA is INTEGER
116 *> The leading dimension of the array A. LDA >= max(1,M).
117 *> \endverbatim
118 *>
119 *> \param[out] S
120 *> \verbatim
121 *> S is REAL array, dimension (min(M,N))
122 *> The singular values of A, sorted so that S(i) >= S(i+1).
123 *> \endverbatim
124 *>
125 *> \param[out] U
126 *> \verbatim
127 *> U is REAL array, dimension (LDU,UCOL)
128 *> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
129 *> UCOL = min(M,N) if JOBZ = 'S'.
130 *> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
131 *> orthogonal matrix U;
132 *> if JOBZ = 'S', U contains the first min(M,N) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is REAL array, dimension (LDVT,N)
147 *> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
148 *> N-by-N orthogonal matrix V**T;
149 *> if JOBZ = 'S', VT contains the first min(M,N) rows of
150 *> V**T (the right singular vectors, stored rowwise);
151 *> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1;
158 *> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
159 *> if JOBZ = 'S', LDVT >= min(M,N).
160 *> \endverbatim
161 *>
162 *> \param[out] WORK
163 *> \verbatim
164 *> WORK is REAL array, dimension (MAX(1,LWORK))
165 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
166 *> \endverbatim
167 *>
168 *> \param[in] LWORK
169 *> \verbatim
170 *> LWORK is INTEGER
171 *> The dimension of the array WORK. LWORK >= 1.
172 *> If LWORK = -1, a workspace query is assumed. The optimal
173 *> size for the WORK array is calculated and stored in WORK(1),
174 *> and no other work except argument checking is performed.
175 *>
176 *> Let mx = max(M,N) and mn = min(M,N).
177 *> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
178 *> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
179 *> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
180 *> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
181 *> These are not tight minimums in all cases; see comments inside code.
182 *> For good performance, LWORK should generally be larger;
183 *> a query is recommended.
184 *> \endverbatim
185 *>
186 *> \param[out] IWORK
187 *> \verbatim
188 *> IWORK is INTEGER array, dimension (8*min(M,N))
189 *> \endverbatim
190 *>
191 *> \param[out] INFO
192 *> \verbatim
193 *> INFO is INTEGER
194 *> = 0: successful exit.
195 *> < 0: if INFO = -i, the i-th argument had an illegal value.
196 *> > 0: SBDSDC did not converge, updating process failed.
197 *> \endverbatim
198 *
199 * Authors:
200 * ========
201 *
202 *> \author Univ. of Tennessee
203 *> \author Univ. of California Berkeley
204 *> \author Univ. of Colorado Denver
205 *> \author NAG Ltd.
206 *
207 *> \date June 2016
208 *
209 *> \ingroup realGEsing
210 *
211 *> \par Contributors:
212 * ==================
213 *>
214 *> Ming Gu and Huan Ren, Computer Science Division, University of
215 *> California at Berkeley, USA
216 *>
217 * =====================================================================
218  SUBROUTINE sgesdd( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
219  $ work, lwork, iwork, info )
220  implicit none
221 *
222 * -- LAPACK driver routine (version 3.6.1) --
223 * -- LAPACK is a software package provided by Univ. of Tennessee, --
224 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
225 * June 2016
226 *
227 * .. Scalar Arguments ..
228  CHARACTER JOBZ
229  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
230 * ..
231 * .. Array Arguments ..
232  INTEGER IWORK( * )
233  REAL A( lda, * ), S( * ), U( ldu, * ),
234  $ vt( ldvt, * ), work( * )
235 * ..
236 *
237 * =====================================================================
238 *
239 * .. Parameters ..
240  REAL ZERO, ONE
241  parameter ( zero = 0.0e0, one = 1.0e0 )
242 * ..
243 * .. Local Scalars ..
244  LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
245  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
246  $ ir, iscl, itau, itaup, itauq, iu, ivt, ldwkvt,
247  $ ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,
248  $ mnthr, nwork, wrkbl
249  INTEGER LWORK_SGEBRD_MN, LWORK_SGEBRD_MM,
250  $ lwork_sgebrd_nn, lwork_sgelqf_mn,
251  $ lwork_sgeqrf_mn,
252  $ lwork_sorgbr_p_mm, lwork_sorgbr_q_nn,
253  $ lwork_sorglq_mn, lwork_sorglq_nn,
254  $ lwork_sorgqr_mm, lwork_sorgqr_mn,
255  $ lwork_sormbr_prt_mm, lwork_sormbr_qln_mm,
256  $ lwork_sormbr_prt_mn, lwork_sormbr_qln_mn,
257  $ lwork_sormbr_prt_nn, lwork_sormbr_qln_nn
258  REAL ANRM, BIGNUM, EPS, SMLNUM
259 * ..
260 * .. Local Arrays ..
261  INTEGER IDUM( 1 )
262  REAL DUM( 1 )
263 * ..
264 * .. External Subroutines ..
265  EXTERNAL sbdsdc, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
267  $ xerbla
268 * ..
269 * .. External Functions ..
270  LOGICAL LSAME
271  REAL SLAMCH, SLANGE
272  EXTERNAL slamch, slange, lsame
273 * ..
274 * .. Intrinsic Functions ..
275  INTRINSIC int, max, min, sqrt
276 * ..
277 * .. Executable Statements ..
278 *
279 * Test the input arguments
280 *
281  info = 0
282  minmn = min( m, n )
283  wntqa = lsame( jobz, 'A' )
284  wntqs = lsame( jobz, 'S' )
285  wntqas = wntqa .OR. wntqs
286  wntqo = lsame( jobz, 'O' )
287  wntqn = lsame( jobz, 'N' )
288  lquery = ( lwork.EQ.-1 )
289 *
290  IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
291  info = -1
292  ELSE IF( m.LT.0 ) THEN
293  info = -2
294  ELSE IF( n.LT.0 ) THEN
295  info = -3
296  ELSE IF( lda.LT.max( 1, m ) ) THEN
297  info = -5
298  ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
299  $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
300  info = -8
301  ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
302  $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
303  $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
304  info = -10
305  END IF
306 *
307 * Compute workspace
308 * Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace allocated at that point in the code,
310 * as well as the preferred amount for good performance.
311 * NB refers to the optimal block size for the immediately
312 * following subroutine, as returned by ILAENV.
313 *
314  IF( info.EQ.0 ) THEN
315  minwrk = 1
316  maxwrk = 1
317  bdspac = 0
318  mnthr = int( minmn*11.0e0 / 6.0e0 )
319  IF( m.GE.n .AND. minmn.GT.0 ) THEN
320 *
321 * Compute space needed for SBDSDC
322 *
323  IF( wntqn ) THEN
324 * sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
325 * keep 7*N for backwards compatability.
326  bdspac = 7*n
327  ELSE
328  bdspac = 3*n*n + 4*n
329  END IF
330 *
331 * Compute space preferred for each routine
332  CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
333  $ dum(1), dum(1), -1, ierr )
334  lwork_sgebrd_mn = int( dum(1) )
335 *
336  CALL sgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
337  $ dum(1), dum(1), -1, ierr )
338  lwork_sgebrd_nn = int( dum(1) )
339 *
340  CALL sgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
341  lwork_sgeqrf_mn = int( dum(1) )
342 *
343  CALL sorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
344  $ ierr )
345  lwork_sorgbr_q_nn = int( dum(1) )
346 *
347  CALL sorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )
348  lwork_sorgqr_mm = int( dum(1) )
349 *
350  CALL sorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )
351  lwork_sorgqr_mn = int( dum(1) )
352 *
353  CALL sormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
354  $ dum(1), dum(1), n, dum(1), -1, ierr )
355  lwork_sormbr_prt_nn = int( dum(1) )
356 *
357  CALL sormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
358  $ dum(1), dum(1), n, dum(1), -1, ierr )
359  lwork_sormbr_qln_nn = int( dum(1) )
360 *
361  CALL sormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
362  $ dum(1), dum(1), m, dum(1), -1, ierr )
363  lwork_sormbr_qln_mn = int( dum(1) )
364 *
365  CALL sormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
366  $ dum(1), dum(1), m, dum(1), -1, ierr )
367  lwork_sormbr_qln_mm = int( dum(1) )
368 *
369  IF( m.GE.mnthr ) THEN
370  IF( wntqn ) THEN
371 *
372 * Path 1 (M >> N, JOBZ='N')
373 *
374  wrkbl = n + lwork_sgeqrf_mn
375  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
376  maxwrk = max( wrkbl, bdspac + n )
377  minwrk = bdspac + n
378  ELSE IF( wntqo ) THEN
379 *
380 * Path 2 (M >> N, JOBZ='O')
381 *
382  wrkbl = n + lwork_sgeqrf_mn
383  wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
384  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
385  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
386  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
387  wrkbl = max( wrkbl, 3*n + bdspac )
388  maxwrk = wrkbl + 2*n*n
389  minwrk = bdspac + 2*n*n + 3*n
390  ELSE IF( wntqs ) THEN
391 *
392 * Path 3 (M >> N, JOBZ='S')
393 *
394  wrkbl = n + lwork_sgeqrf_mn
395  wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
396  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
397  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
398  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
399  wrkbl = max( wrkbl, 3*n + bdspac )
400  maxwrk = wrkbl + n*n
401  minwrk = bdspac + n*n + 3*n
402  ELSE IF( wntqa ) THEN
403 *
404 * Path 4 (M >> N, JOBZ='A')
405 *
406  wrkbl = n + lwork_sgeqrf_mn
407  wrkbl = max( wrkbl, n + lwork_sorgqr_mm )
408  wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
409  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
410  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
411  wrkbl = max( wrkbl, 3*n + bdspac )
412  maxwrk = wrkbl + n*n
413  minwrk = n*n + max( 3*n + bdspac, n + m )
414  END IF
415  ELSE
416 *
417 * Path 5 (M >= N, but not much larger)
418 *
419  wrkbl = 3*n + lwork_sgebrd_mn
420  IF( wntqn ) THEN
421 * Path 5n (M >= N, jobz='N')
422  maxwrk = max( wrkbl, 3*n + bdspac )
423  minwrk = 3*n + max( m, bdspac )
424  ELSE IF( wntqo ) THEN
425 * Path 5o (M >= N, jobz='O')
426  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
427  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
428  wrkbl = max( wrkbl, 3*n + bdspac )
429  maxwrk = wrkbl + m*n
430  minwrk = 3*n + max( m, n*n + bdspac )
431  ELSE IF( wntqs ) THEN
432 * Path 5s (M >= N, jobz='S')
433  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
434  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
435  maxwrk = max( wrkbl, 3*n + bdspac )
436  minwrk = 3*n + max( m, bdspac )
437  ELSE IF( wntqa ) THEN
438 * Path 5a (M >= N, jobz='A')
439  wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mm )
440  wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
441  maxwrk = max( wrkbl, 3*n + bdspac )
442  minwrk = 3*n + max( m, bdspac )
443  END IF
444  END IF
445  ELSE IF( minmn.GT.0 ) THEN
446 *
447 * Compute space needed for SBDSDC
448 *
449  IF( wntqn ) THEN
450 * sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
451 * keep 7*N for backwards compatability.
452  bdspac = 7*m
453  ELSE
454  bdspac = 3*m*m + 4*m
455  END IF
456 *
457 * Compute space preferred for each routine
458  CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
459  $ dum(1), dum(1), -1, ierr )
460  lwork_sgebrd_mn = int( dum(1) )
461 *
462  CALL sgebrd( m, m, a, m, s, dum(1), dum(1),
463  $ dum(1), dum(1), -1, ierr )
464  lwork_sgebrd_mm = int( dum(1) )
465 *
466  CALL sgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
467  lwork_sgelqf_mn = int( dum(1) )
468 *
469  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
470  lwork_sorglq_nn = int( dum(1) )
471 *
472  CALL sorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
473  lwork_sorglq_mn = int( dum(1) )
474 *
475  CALL sorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )
476  lwork_sorgbr_p_mm = int( dum(1) )
477 *
478  CALL sormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
479  $ dum(1), dum(1), m, dum(1), -1, ierr )
480  lwork_sormbr_prt_mm = int( dum(1) )
481 *
482  CALL sormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
483  $ dum(1), dum(1), m, dum(1), -1, ierr )
484  lwork_sormbr_prt_mn = int( dum(1) )
485 *
486  CALL sormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
487  $ dum(1), dum(1), n, dum(1), -1, ierr )
488  lwork_sormbr_prt_nn = int( dum(1) )
489 *
490  CALL sormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
491  $ dum(1), dum(1), m, dum(1), -1, ierr )
492  lwork_sormbr_qln_mm = int( dum(1) )
493 *
494  IF( n.GE.mnthr ) THEN
495  IF( wntqn ) THEN
496 *
497 * Path 1t (N >> M, JOBZ='N')
498 *
499  wrkbl = m + lwork_sgelqf_mn
500  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
501  maxwrk = max( wrkbl, bdspac + m )
502  minwrk = bdspac + m
503  ELSE IF( wntqo ) THEN
504 *
505 * Path 2t (N >> M, JOBZ='O')
506 *
507  wrkbl = m + lwork_sgelqf_mn
508  wrkbl = max( wrkbl, m + lwork_sorglq_mn )
509  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
510  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
511  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
512  wrkbl = max( wrkbl, 3*m + bdspac )
513  maxwrk = wrkbl + 2*m*m
514  minwrk = bdspac + 2*m*m + 3*m
515  ELSE IF( wntqs ) THEN
516 *
517 * Path 3t (N >> M, JOBZ='S')
518 *
519  wrkbl = m + lwork_sgelqf_mn
520  wrkbl = max( wrkbl, m + lwork_sorglq_mn )
521  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
522  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
523  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
524  wrkbl = max( wrkbl, 3*m + bdspac )
525  maxwrk = wrkbl + m*m
526  minwrk = bdspac + m*m + 3*m
527  ELSE IF( wntqa ) THEN
528 *
529 * Path 4t (N >> M, JOBZ='A')
530 *
531  wrkbl = m + lwork_sgelqf_mn
532  wrkbl = max( wrkbl, m + lwork_sorglq_nn )
533  wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
534  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
535  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
536  wrkbl = max( wrkbl, 3*m + bdspac )
537  maxwrk = wrkbl + m*m
538  minwrk = m*m + max( 3*m + bdspac, m + n )
539  END IF
540  ELSE
541 *
542 * Path 5t (N > M, but not much larger)
543 *
544  wrkbl = 3*m + lwork_sgebrd_mn
545  IF( wntqn ) THEN
546 * Path 5tn (N > M, jobz='N')
547  maxwrk = max( wrkbl, 3*m + bdspac )
548  minwrk = 3*m + max( n, bdspac )
549  ELSE IF( wntqo ) THEN
550 * Path 5to (N > M, jobz='O')
551  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
552  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
553  wrkbl = max( wrkbl, 3*m + bdspac )
554  maxwrk = wrkbl + m*n
555  minwrk = 3*m + max( n, m*m + bdspac )
556  ELSE IF( wntqs ) THEN
557 * Path 5ts (N > M, jobz='S')
558  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
559  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
560  maxwrk = max( wrkbl, 3*m + bdspac )
561  minwrk = 3*m + max( n, bdspac )
562  ELSE IF( wntqa ) THEN
563 * Path 5ta (N > M, jobz='A')
564  wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
565  wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_nn )
566  maxwrk = max( wrkbl, 3*m + bdspac )
567  minwrk = 3*m + max( n, bdspac )
568  END IF
569  END IF
570  END IF
571 
572  maxwrk = max( maxwrk, minwrk )
573  work( 1 ) = maxwrk
574 *
575  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
576  info = -12
577  END IF
578  END IF
579 *
580  IF( info.NE.0 ) THEN
581  CALL xerbla( 'SGESDD', -info )
582  RETURN
583  ELSE IF( lquery ) THEN
584  RETURN
585  END IF
586 *
587 * Quick return if possible
588 *
589  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
590  RETURN
591  END IF
592 *
593 * Get machine constants
594 *
595  eps = slamch( 'P' )
596  smlnum = sqrt( slamch( 'S' ) ) / eps
597  bignum = one / smlnum
598 *
599 * Scale A if max element outside range [SMLNUM,BIGNUM]
600 *
601  anrm = slange( 'M', m, n, a, lda, dum )
602  iscl = 0
603  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
604  iscl = 1
605  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
606  ELSE IF( anrm.GT.bignum ) THEN
607  iscl = 1
608  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
609  END IF
610 *
611  IF( m.GE.n ) THEN
612 *
613 * A has at least as many rows as columns. If A has sufficiently
614 * more rows than columns, first reduce using the QR
615 * decomposition (if sufficient workspace available)
616 *
617  IF( m.GE.mnthr ) THEN
618 *
619  IF( wntqn ) THEN
620 *
621 * Path 1 (M >> N, JOBZ='N')
622 * No singular vectors to be computed
623 *
624  itau = 1
625  nwork = itau + n
626 *
627 * Compute A=Q*R
628 * Workspace: need N [tau] + N [work]
629 * Workspace: prefer N [tau] + N*NB [work]
630 *
631  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
632  $ lwork - nwork + 1, ierr )
633 *
634 * Zero out below R
635 *
636  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
637  ie = 1
638  itauq = ie + n
639  itaup = itauq + n
640  nwork = itaup + n
641 *
642 * Bidiagonalize R in A
643 * Workspace: need 3*N [e, tauq, taup] + N [work]
644 * Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
645 *
646  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
647  $ work( itaup ), work( nwork ), lwork-nwork+1,
648  $ ierr )
649  nwork = ie + n
650 *
651 * Perform bidiagonal SVD, computing singular values only
652 * Workspace: need N [e] + BDSPAC
653 *
654  CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
655  $ dum, idum, work( nwork ), iwork, info )
656 *
657  ELSE IF( wntqo ) THEN
658 *
659 * Path 2 (M >> N, JOBZ = 'O')
660 * N left singular vectors to be overwritten on A and
661 * N right singular vectors to be computed in VT
662 *
663  ir = 1
664 *
665 * WORK(IR) is LDWRKR by N
666 *
667  IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
668  ldwrkr = lda
669  ELSE
670  ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
671  END IF
672  itau = ir + ldwrkr*n
673  nwork = itau + n
674 *
675 * Compute A=Q*R
676 * Workspace: need N*N [R] + N [tau] + N [work]
677 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
678 *
679  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
680  $ lwork - nwork + 1, ierr )
681 *
682 * Copy R to WORK(IR), zeroing out below it
683 *
684  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
685  CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
686  $ ldwrkr )
687 *
688 * Generate Q in A
689 * Workspace: need N*N [R] + N [tau] + N [work]
690 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
691 *
692  CALL sorgqr( m, n, n, a, lda, work( itau ),
693  $ work( nwork ), lwork - nwork + 1, ierr )
694  ie = itau
695  itauq = ie + n
696  itaup = itauq + n
697  nwork = itaup + n
698 *
699 * Bidiagonalize R in WORK(IR)
700 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
701 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
702 *
703  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
704  $ work( itauq ), work( itaup ), work( nwork ),
705  $ lwork - nwork + 1, ierr )
706 *
707 * WORK(IU) is N by N
708 *
709  iu = nwork
710  nwork = iu + n*n
711 *
712 * Perform bidiagonal SVD, computing left singular vectors
713 * of bidiagonal matrix in WORK(IU) and computing right
714 * singular vectors of bidiagonal matrix in VT
715 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
716 *
717  CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
718  $ vt, ldvt, dum, idum, work( nwork ), iwork,
719  $ info )
720 *
721 * Overwrite WORK(IU) by left singular vectors of R
722 * and VT by right singular vectors of R
723 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
724 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
725 *
726  CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
727  $ work( itauq ), work( iu ), n, work( nwork ),
728  $ lwork - nwork + 1, ierr )
729  CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
730  $ work( itaup ), vt, ldvt, work( nwork ),
731  $ lwork - nwork + 1, ierr )
732 *
733 * Multiply Q in A by left singular vectors of R in
734 * WORK(IU), storing result in WORK(IR) and copying to A
735 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
736 * Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
737 *
738  DO 10 i = 1, m, ldwrkr
739  chunk = min( m - i + 1, ldwrkr )
740  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
741  $ lda, work( iu ), n, zero, work( ir ),
742  $ ldwrkr )
743  CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
744  $ a( i, 1 ), lda )
745  10 CONTINUE
746 *
747  ELSE IF( wntqs ) THEN
748 *
749 * Path 3 (M >> N, JOBZ='S')
750 * N left singular vectors to be computed in U and
751 * N right singular vectors to be computed in VT
752 *
753  ir = 1
754 *
755 * WORK(IR) is N by N
756 *
757  ldwrkr = n
758  itau = ir + ldwrkr*n
759  nwork = itau + n
760 *
761 * Compute A=Q*R
762 * Workspace: need N*N [R] + N [tau] + N [work]
763 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
764 *
765  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
766  $ lwork - nwork + 1, ierr )
767 *
768 * Copy R to WORK(IR), zeroing out below it
769 *
770  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
771  CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
772  $ ldwrkr )
773 *
774 * Generate Q in A
775 * Workspace: need N*N [R] + N [tau] + N [work]
776 * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
777 *
778  CALL sorgqr( m, n, n, a, lda, work( itau ),
779  $ work( nwork ), lwork - nwork + 1, ierr )
780  ie = itau
781  itauq = ie + n
782  itaup = itauq + n
783  nwork = itaup + n
784 *
785 * Bidiagonalize R in WORK(IR)
786 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
787 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
788 *
789  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
790  $ work( itauq ), work( itaup ), work( nwork ),
791  $ lwork - nwork + 1, ierr )
792 *
793 * Perform bidiagonal SVD, computing left singular vectors
794 * of bidiagoal matrix in U and computing right singular
795 * vectors of bidiagonal matrix in VT
796 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
797 *
798  CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
799  $ ldvt, dum, idum, work( nwork ), iwork,
800  $ info )
801 *
802 * Overwrite U by left singular vectors of R and VT
803 * by right singular vectors of R
804 * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
805 * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
806 *
807  CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
808  $ work( itauq ), u, ldu, work( nwork ),
809  $ lwork - nwork + 1, ierr )
810 *
811  CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
812  $ work( itaup ), vt, ldvt, work( nwork ),
813  $ lwork - nwork + 1, ierr )
814 *
815 * Multiply Q in A by left singular vectors of R in
816 * WORK(IR), storing result in U
817 * Workspace: need N*N [R]
818 *
819  CALL slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
820  CALL sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),
821  $ ldwrkr, zero, u, ldu )
822 *
823  ELSE IF( wntqa ) THEN
824 *
825 * Path 4 (M >> N, JOBZ='A')
826 * M left singular vectors to be computed in U and
827 * N right singular vectors to be computed in VT
828 *
829  iu = 1
830 *
831 * WORK(IU) is N by N
832 *
833  ldwrku = n
834  itau = iu + ldwrku*n
835  nwork = itau + n
836 *
837 * Compute A=Q*R, copying result to U
838 * Workspace: need N*N [U] + N [tau] + N [work]
839 * Workspace: prefer N*N [U] + N [tau] + N*NB [work]
840 *
841  CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
842  $ lwork - nwork + 1, ierr )
843  CALL slacpy( 'L', m, n, a, lda, u, ldu )
844 *
845 * Generate Q in U
846 * Workspace: need N*N [U] + N [tau] + M [work]
847 * Workspace: prefer N*N [U] + N [tau] + M*NB [work]
848  CALL sorgqr( m, m, n, u, ldu, work( itau ),
849  $ work( nwork ), lwork - nwork + 1, ierr )
850 *
851 * Produce R in A, zeroing out other entries
852 *
853  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
854  ie = itau
855  itauq = ie + n
856  itaup = itauq + n
857  nwork = itaup + n
858 *
859 * Bidiagonalize R in A
860 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
861 * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
862 *
863  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
864  $ work( itaup ), work( nwork ), lwork-nwork+1,
865  $ ierr )
866 *
867 * Perform bidiagonal SVD, computing left singular vectors
868 * of bidiagonal matrix in WORK(IU) and computing right
869 * singular vectors of bidiagonal matrix in VT
870 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
871 *
872  CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
873  $ vt, ldvt, dum, idum, work( nwork ), iwork,
874  $ info )
875 *
876 * Overwrite WORK(IU) by left singular vectors of R and VT
877 * by right singular vectors of R
878 * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
879 * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
880 *
881  CALL sormbr( 'Q', 'L', 'N', n, n, n, a, lda,
882  $ work( itauq ), work( iu ), ldwrku,
883  $ work( nwork ), lwork - nwork + 1, ierr )
884  CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
885  $ work( itaup ), vt, ldvt, work( nwork ),
886  $ lwork - nwork + 1, ierr )
887 *
888 * Multiply Q in U by left singular vectors of R in
889 * WORK(IU), storing result in A
890 * Workspace: need N*N [U]
891 *
892  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),
893  $ ldwrku, zero, a, lda )
894 *
895 * Copy left singular vectors of A from A to U
896 *
897  CALL slacpy( 'F', m, n, a, lda, u, ldu )
898 *
899  END IF
900 *
901  ELSE
902 *
903 * M .LT. MNTHR
904 *
905 * Path 5 (M >= N, but not much larger)
906 * Reduce to bidiagonal form without QR decomposition
907 *
908  ie = 1
909  itauq = ie + n
910  itaup = itauq + n
911  nwork = itaup + n
912 *
913 * Bidiagonalize A
914 * Workspace: need 3*N [e, tauq, taup] + M [work]
915 * Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
916 *
917  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
918  $ work( itaup ), work( nwork ), lwork-nwork+1,
919  $ ierr )
920  IF( wntqn ) THEN
921 *
922 * Path 5n (M >= N, JOBZ='N')
923 * Perform bidiagonal SVD, only computing singular values
924 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
925 *
926  CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
927  $ dum, idum, work( nwork ), iwork, info )
928  ELSE IF( wntqo ) THEN
929 * Path 5o (M >= N, JOBZ='O')
930  iu = nwork
931  IF( lwork .GE. m*n + 3*n + bdspac ) THEN
932 *
933 * WORK( IU ) is M by N
934 *
935  ldwrku = m
936  nwork = iu + ldwrku*n
937  CALL slaset( 'F', m, n, zero, zero, work( iu ),
938  $ ldwrku )
939 * IR is unused; silence compile warnings
940  ir = -1
941  ELSE
942 *
943 * WORK( IU ) is N by N
944 *
945  ldwrku = n
946  nwork = iu + ldwrku*n
947 *
948 * WORK(IR) is LDWRKR by N
949 *
950  ir = nwork
951  ldwrkr = ( lwork - n*n - 3*n ) / n
952  END IF
953  nwork = iu + ldwrku*n
954 *
955 * Perform bidiagonal SVD, computing left singular vectors
956 * of bidiagonal matrix in WORK(IU) and computing right
957 * singular vectors of bidiagonal matrix in VT
958 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
959 *
960  CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
961  $ ldwrku, vt, ldvt, dum, idum, work( nwork ),
962  $ iwork, info )
963 *
964 * Overwrite VT by right singular vectors of A
965 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
966 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
967 *
968  CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
969  $ work( itaup ), vt, ldvt, work( nwork ),
970  $ lwork - nwork + 1, ierr )
971 *
972  IF( lwork .GE. m*n + 3*n + bdspac ) THEN
973 *
974 * Path 5o-fast
975 * Overwrite WORK(IU) by left singular vectors of A
976 * Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
977 * Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
978 *
979  CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
980  $ work( itauq ), work( iu ), ldwrku,
981  $ work( nwork ), lwork - nwork + 1, ierr )
982 *
983 * Copy left singular vectors of A from WORK(IU) to A
984 *
985  CALL slacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
986  ELSE
987 *
988 * Path 5o-slow
989 * Generate Q in A
990 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
991 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
992 *
993  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
994  $ work( nwork ), lwork - nwork + 1, ierr )
995 *
996 * Multiply Q in A by left singular vectors of
997 * bidiagonal matrix in WORK(IU), storing result in
998 * WORK(IR) and copying to A
999 * Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
1000 * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
1001 *
1002  DO 20 i = 1, m, ldwrkr
1003  chunk = min( m - i + 1, ldwrkr )
1004  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
1005  $ lda, work( iu ), ldwrku, zero,
1006  $ work( ir ), ldwrkr )
1007  CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
1008  $ a( i, 1 ), lda )
1009  20 CONTINUE
1010  END IF
1011 *
1012  ELSE IF( wntqs ) THEN
1013 *
1014 * Path 5s (M >= N, JOBZ='S')
1015 * Perform bidiagonal SVD, computing left singular vectors
1016 * of bidiagonal matrix in U and computing right singular
1017 * vectors of bidiagonal matrix in VT
1018 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
1019 *
1020  CALL slaset( 'F', m, n, zero, zero, u, ldu )
1021  CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1022  $ ldvt, dum, idum, work( nwork ), iwork,
1023  $ info )
1024 *
1025 * Overwrite U by left singular vectors of A and VT
1026 * by right singular vectors of A
1027 * Workspace: need 3*N [e, tauq, taup] + N [work]
1028 * Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
1029 *
1030  CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
1031  $ work( itauq ), u, ldu, work( nwork ),
1032  $ lwork - nwork + 1, ierr )
1033  CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
1034  $ work( itaup ), vt, ldvt, work( nwork ),
1035  $ lwork - nwork + 1, ierr )
1036  ELSE IF( wntqa ) THEN
1037 *
1038 * Path 5a (M >= N, JOBZ='A')
1039 * Perform bidiagonal SVD, computing left singular vectors
1040 * of bidiagonal matrix in U and computing right singular
1041 * vectors of bidiagonal matrix in VT
1042 * Workspace: need 3*N [e, tauq, taup] + BDSPAC
1043 *
1044  CALL slaset( 'F', m, m, zero, zero, u, ldu )
1045  CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1046  $ ldvt, dum, idum, work( nwork ), iwork,
1047  $ info )
1048 *
1049 * Set the right corner of U to identity matrix
1050 *
1051  IF( m.GT.n ) THEN
1052  CALL slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),
1053  $ ldu )
1054  END IF
1055 *
1056 * Overwrite U by left singular vectors of A and VT
1057 * by right singular vectors of A
1058 * Workspace: need 3*N [e, tauq, taup] + M [work]
1059 * Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
1060 *
1061  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1062  $ work( itauq ), u, ldu, work( nwork ),
1063  $ lwork - nwork + 1, ierr )
1064  CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1065  $ work( itaup ), vt, ldvt, work( nwork ),
1066  $ lwork - nwork + 1, ierr )
1067  END IF
1068 *
1069  END IF
1070 *
1071  ELSE
1072 *
1073 * A has more columns than rows. If A has sufficiently more
1074 * columns than rows, first reduce using the LQ decomposition (if
1075 * sufficient workspace available)
1076 *
1077  IF( n.GE.mnthr ) THEN
1078 *
1079  IF( wntqn ) THEN
1080 *
1081 * Path 1t (N >> M, JOBZ='N')
1082 * No singular vectors to be computed
1083 *
1084  itau = 1
1085  nwork = itau + m
1086 *
1087 * Compute A=L*Q
1088 * Workspace: need M [tau] + M [work]
1089 * Workspace: prefer M [tau] + M*NB [work]
1090 *
1091  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1092  $ lwork - nwork + 1, ierr )
1093 *
1094 * Zero out above L
1095 *
1096  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1097  ie = 1
1098  itauq = ie + m
1099  itaup = itauq + m
1100  nwork = itaup + m
1101 *
1102 * Bidiagonalize L in A
1103 * Workspace: need 3*M [e, tauq, taup] + M [work]
1104 * Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
1105 *
1106  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1107  $ work( itaup ), work( nwork ), lwork-nwork+1,
1108  $ ierr )
1109  nwork = ie + m
1110 *
1111 * Perform bidiagonal SVD, computing singular values only
1112 * Workspace: need M [e] + BDSPAC
1113 *
1114  CALL sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,
1115  $ dum, idum, work( nwork ), iwork, info )
1116 *
1117  ELSE IF( wntqo ) THEN
1118 *
1119 * Path 2t (N >> M, JOBZ='O')
1120 * M right singular vectors to be overwritten on A and
1121 * M left singular vectors to be computed in U
1122 *
1123  ivt = 1
1124 *
1125 * WORK(IVT) is M by M
1126 * WORK(IL) is M by M; it is later resized to M by chunk for gemm
1127 *
1128  il = ivt + m*m
1129  IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
1130  ldwrkl = m
1131  chunk = n
1132  ELSE
1133  ldwrkl = m
1134  chunk = ( lwork - m*m ) / m
1135  END IF
1136  itau = il + ldwrkl*m
1137  nwork = itau + m
1138 *
1139 * Compute A=L*Q
1140 * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1141 * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1142 *
1143  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1144  $ lwork - nwork + 1, ierr )
1145 *
1146 * Copy L to WORK(IL), zeroing about above it
1147 *
1148  CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1149  CALL slaset( 'U', m - 1, m - 1, zero, zero,
1150  $ work( il + ldwrkl ), ldwrkl )
1151 *
1152 * Generate Q in A
1153 * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1154 * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1155 *
1156  CALL sorglq( m, n, m, a, lda, work( itau ),
1157  $ work( nwork ), lwork - nwork + 1, ierr )
1158  ie = itau
1159  itauq = ie + m
1160  itaup = itauq + m
1161  nwork = itaup + m
1162 *
1163 * Bidiagonalize L in WORK(IL)
1164 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1165 * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1166 *
1167  CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1168  $ work( itauq ), work( itaup ), work( nwork ),
1169  $ lwork - nwork + 1, ierr )
1170 *
1171 * Perform bidiagonal SVD, computing left singular vectors
1172 * of bidiagonal matrix in U, and computing right singular
1173 * vectors of bidiagonal matrix in WORK(IVT)
1174 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1175 *
1176  CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1177  $ work( ivt ), m, dum, idum, work( nwork ),
1178  $ iwork, info )
1179 *
1180 * Overwrite U by left singular vectors of L and WORK(IVT)
1181 * by right singular vectors of L
1182 * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1183 * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1184 *
1185  CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1186  $ work( itauq ), u, ldu, work( nwork ),
1187  $ lwork - nwork + 1, ierr )
1188  CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1189  $ work( itaup ), work( ivt ), m,
1190  $ work( nwork ), lwork - nwork + 1, ierr )
1191 *
1192 * Multiply right singular vectors of L in WORK(IVT) by Q
1193 * in A, storing result in WORK(IL) and copying to A
1194 * Workspace: need M*M [VT] + M*M [L]
1195 * Workspace: prefer M*M [VT] + M*N [L]
1196 * At this point, L is resized as M by chunk.
1197 *
1198  DO 30 i = 1, n, chunk
1199  blk = min( n - i + 1, chunk )
1200  CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,
1201  $ a( 1, i ), lda, zero, work( il ), ldwrkl )
1202  CALL slacpy( 'F', m, blk, work( il ), ldwrkl,
1203  $ a( 1, i ), lda )
1204  30 CONTINUE
1205 *
1206  ELSE IF( wntqs ) THEN
1207 *
1208 * Path 3t (N >> M, JOBZ='S')
1209 * M right singular vectors to be computed in VT and
1210 * M left singular vectors to be computed in U
1211 *
1212  il = 1
1213 *
1214 * WORK(IL) is M by M
1215 *
1216  ldwrkl = m
1217  itau = il + ldwrkl*m
1218  nwork = itau + m
1219 *
1220 * Compute A=L*Q
1221 * Workspace: need M*M [L] + M [tau] + M [work]
1222 * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1223 *
1224  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1225  $ lwork - nwork + 1, ierr )
1226 *
1227 * Copy L to WORK(IL), zeroing out above it
1228 *
1229  CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1230  CALL slaset( 'U', m - 1, m - 1, zero, zero,
1231  $ work( il + ldwrkl ), ldwrkl )
1232 *
1233 * Generate Q in A
1234 * Workspace: need M*M [L] + M [tau] + M [work]
1235 * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1236 *
1237  CALL sorglq( m, n, m, a, lda, work( itau ),
1238  $ work( nwork ), lwork - nwork + 1, ierr )
1239  ie = itau
1240  itauq = ie + m
1241  itaup = itauq + m
1242  nwork = itaup + m
1243 *
1244 * Bidiagonalize L in WORK(IU).
1245 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1246 * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1247 *
1248  CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1249  $ work( itauq ), work( itaup ), work( nwork ),
1250  $ lwork - nwork + 1, ierr )
1251 *
1252 * Perform bidiagonal SVD, computing left singular vectors
1253 * of bidiagonal matrix in U and computing right singular
1254 * vectors of bidiagonal matrix in VT
1255 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1256 *
1257  CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
1258  $ ldvt, dum, idum, work( nwork ), iwork,
1259  $ info )
1260 *
1261 * Overwrite U by left singular vectors of L and VT
1262 * by right singular vectors of L
1263 * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1264 * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1265 *
1266  CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1267  $ work( itauq ), u, ldu, work( nwork ),
1268  $ lwork - nwork + 1, ierr )
1269  CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1270  $ work( itaup ), vt, ldvt, work( nwork ),
1271  $ lwork - nwork + 1, ierr )
1272 *
1273 * Multiply right singular vectors of L in WORK(IL) by
1274 * Q in A, storing result in VT
1275 * Workspace: need M*M [L]
1276 *
1277  CALL slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1278  CALL sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,
1279  $ a, lda, zero, vt, ldvt )
1280 *
1281  ELSE IF( wntqa ) THEN
1282 *
1283 * Path 4t (N >> M, JOBZ='A')
1284 * N right singular vectors to be computed in VT and
1285 * M left singular vectors to be computed in U
1286 *
1287  ivt = 1
1288 *
1289 * WORK(IVT) is M by M
1290 *
1291  ldwkvt = m
1292  itau = ivt + ldwkvt*m
1293  nwork = itau + m
1294 *
1295 * Compute A=L*Q, copying result to VT
1296 * Workspace: need M*M [VT] + M [tau] + M [work]
1297 * Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
1298 *
1299  CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1300  $ lwork - nwork + 1, ierr )
1301  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
1302 *
1303 * Generate Q in VT
1304 * Workspace: need M*M [VT] + M [tau] + N [work]
1305 * Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
1306 *
1307  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
1308  $ work( nwork ), lwork - nwork + 1, ierr )
1309 *
1310 * Produce L in A, zeroing out other entries
1311 *
1312  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1313  ie = itau
1314  itauq = ie + m
1315  itaup = itauq + m
1316  nwork = itaup + m
1317 *
1318 * Bidiagonalize L in A
1319 * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
1320 * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
1321 *
1322  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1323  $ work( itaup ), work( nwork ), lwork-nwork+1,
1324  $ ierr )
1325 *
1326 * Perform bidiagonal SVD, computing left singular vectors
1327 * of bidiagonal matrix in U and computing right singular
1328 * vectors of bidiagonal matrix in WORK(IVT)
1329 * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
1330 *
1331  CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1332  $ work( ivt ), ldwkvt, dum, idum,
1333  $ work( nwork ), iwork, info )
1334 *
1335 * Overwrite U by left singular vectors of L and WORK(IVT)
1336 * by right singular vectors of L
1337 * Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
1338 * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
1339 *
1340  CALL sormbr( 'Q', 'L', 'N', m, m, m, a, lda,
1341  $ work( itauq ), u, ldu, work( nwork ),
1342  $ lwork - nwork + 1, ierr )
1343  CALL sormbr( 'P', 'R', 'T', m, m, m, a, lda,
1344  $ work( itaup ), work( ivt ), ldwkvt,
1345  $ work( nwork ), lwork - nwork + 1, ierr )
1346 *
1347 * Multiply right singular vectors of L in WORK(IVT) by
1348 * Q in VT, storing result in A
1349 * Workspace: need M*M [VT]
1350 *
1351  CALL sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,
1352  $ vt, ldvt, zero, a, lda )
1353 *
1354 * Copy right singular vectors of A from A to VT
1355 *
1356  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
1357 *
1358  END IF
1359 *
1360  ELSE
1361 *
1362 * N .LT. MNTHR
1363 *
1364 * Path 5t (N > M, but not much larger)
1365 * Reduce to bidiagonal form without LQ decomposition
1366 *
1367  ie = 1
1368  itauq = ie + m
1369  itaup = itauq + m
1370  nwork = itaup + m
1371 *
1372 * Bidiagonalize A
1373 * Workspace: need 3*M [e, tauq, taup] + N [work]
1374 * Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
1375 *
1376  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1377  $ work( itaup ), work( nwork ), lwork-nwork+1,
1378  $ ierr )
1379  IF( wntqn ) THEN
1380 *
1381 * Path 5tn (N > M, JOBZ='N')
1382 * Perform bidiagonal SVD, only computing singular values
1383 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1384 *
1385  CALL sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,
1386  $ dum, idum, work( nwork ), iwork, info )
1387  ELSE IF( wntqo ) THEN
1388 * Path 5to (N > M, JOBZ='O')
1389  ldwkvt = m
1390  ivt = nwork
1391  IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1392 *
1393 * WORK( IVT ) is M by N
1394 *
1395  CALL slaset( 'F', m, n, zero, zero, work( ivt ),
1396  $ ldwkvt )
1397  nwork = ivt + ldwkvt*n
1398 * IL is unused; silence compile warnings
1399  il = -1
1400  ELSE
1401 *
1402 * WORK( IVT ) is M by M
1403 *
1404  nwork = ivt + ldwkvt*m
1405  il = nwork
1406 *
1407 * WORK(IL) is M by CHUNK
1408 *
1409  chunk = ( lwork - m*m - 3*m ) / m
1410  END IF
1411 *
1412 * Perform bidiagonal SVD, computing left singular vectors
1413 * of bidiagonal matrix in U and computing right singular
1414 * vectors of bidiagonal matrix in WORK(IVT)
1415 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
1416 *
1417  CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
1418  $ work( ivt ), ldwkvt, dum, idum,
1419  $ work( nwork ), iwork, info )
1420 *
1421 * Overwrite U by left singular vectors of A
1422 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1423 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1424 *
1425  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1426  $ work( itauq ), u, ldu, work( nwork ),
1427  $ lwork - nwork + 1, ierr )
1428 *
1429  IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1430 *
1431 * Path 5to-fast
1432 * Overwrite WORK(IVT) by left singular vectors of A
1433 * Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
1434 * Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
1435 *
1436  CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1437  $ work( itaup ), work( ivt ), ldwkvt,
1438  $ work( nwork ), lwork - nwork + 1, ierr )
1439 *
1440 * Copy right singular vectors of A from WORK(IVT) to A
1441 *
1442  CALL slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1443  ELSE
1444 *
1445 * Path 5to-slow
1446 * Generate P**T in A
1447 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1448 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1449 *
1450  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
1451  $ work( nwork ), lwork - nwork + 1, ierr )
1452 *
1453 * Multiply Q in A by right singular vectors of
1454 * bidiagonal matrix in WORK(IVT), storing result in
1455 * WORK(IL) and copying to A
1456 * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
1457 * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
1458 *
1459  DO 40 i = 1, n, chunk
1460  blk = min( n - i + 1, chunk )
1461  CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ),
1462  $ ldwkvt, a( 1, i ), lda, zero,
1463  $ work( il ), m )
1464  CALL slacpy( 'F', m, blk, work( il ), m, a( 1, i ),
1465  $ lda )
1466  40 CONTINUE
1467  END IF
1468  ELSE IF( wntqs ) THEN
1469 *
1470 * Path 5ts (N > M, JOBZ='S')
1471 * Perform bidiagonal SVD, computing left singular vectors
1472 * of bidiagonal matrix in U and computing right singular
1473 * vectors of bidiagonal matrix in VT
1474 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1475 *
1476  CALL slaset( 'F', m, n, zero, zero, vt, ldvt )
1477  CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1478  $ ldvt, dum, idum, work( nwork ), iwork,
1479  $ info )
1480 *
1481 * Overwrite U by left singular vectors of A and VT
1482 * by right singular vectors of A
1483 * Workspace: need 3*M [e, tauq, taup] + M [work]
1484 * Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
1485 *
1486  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1487  $ work( itauq ), u, ldu, work( nwork ),
1488  $ lwork - nwork + 1, ierr )
1489  CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1490  $ work( itaup ), vt, ldvt, work( nwork ),
1491  $ lwork - nwork + 1, ierr )
1492  ELSE IF( wntqa ) THEN
1493 *
1494 * Path 5ta (N > M, JOBZ='A')
1495 * Perform bidiagonal SVD, computing left singular vectors
1496 * of bidiagonal matrix in U and computing right singular
1497 * vectors of bidiagonal matrix in VT
1498 * Workspace: need 3*M [e, tauq, taup] + BDSPAC
1499 *
1500  CALL slaset( 'F', n, n, zero, zero, vt, ldvt )
1501  CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1502  $ ldvt, dum, idum, work( nwork ), iwork,
1503  $ info )
1504 *
1505 * Set the right corner of VT to identity matrix
1506 *
1507  IF( n.GT.m ) THEN
1508  CALL slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
1509  $ ldvt )
1510  END IF
1511 *
1512 * Overwrite U by left singular vectors of A and VT
1513 * by right singular vectors of A
1514 * Workspace: need 3*M [e, tauq, taup] + N [work]
1515 * Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
1516 *
1517  CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1518  $ work( itauq ), u, ldu, work( nwork ),
1519  $ lwork - nwork + 1, ierr )
1520  CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1521  $ work( itaup ), vt, ldvt, work( nwork ),
1522  $ lwork - nwork + 1, ierr )
1523  END IF
1524 *
1525  END IF
1526 *
1527  END IF
1528 *
1529 * Undo scaling if necessary
1530 *
1531  IF( iscl.EQ.1 ) THEN
1532  IF( anrm.GT.bignum )
1533  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1534  $ ierr )
1535  IF( anrm.LT.smlnum )
1536  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1537  $ ierr )
1538  END IF
1539 *
1540 * Return optimal workspace in WORK(1)
1541 *
1542  work( 1 ) = maxwrk
1543 *
1544  RETURN
1545 *
1546 * End of SGESDD
1547 *
1548  END
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:129
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:145
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:138
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: slaset.f:112
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:198
subroutine sbdsdc(UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, WORK, IWORK, INFO)
SBDSDC
Definition: sbdsdc.f:207
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:207
subroutine sgesdd(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO)
SGESDD
Definition: sgesdd.f:220
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:130
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:159
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:137