LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
sgesdd.f
Go to the documentation of this file.
1*> \brief \b SGESDD
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SGESDD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesdd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesdd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesdd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
22* WORK, LWORK, IWORK, INFO )
23*
24* .. Scalar Arguments ..
25* CHARACTER JOBZ
26* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27* ..
28* .. Array Arguments ..
29* INTEGER IWORK( * )
30* REAL A( LDA, * ), S( * ), U( LDU, * ),
31* $ VT( LDVT, * ), WORK( * )
32* ..
33*
34*
35*> \par Purpose:
36* =============
37*>
38*> \verbatim
39*>
40*> SGESDD computes the singular value decomposition (SVD) of a real
41*> M-by-N matrix A, optionally computing the left and right singular
42*> vectors. If singular vectors are desired, it uses a
43*> divide-and-conquer algorithm.
44*>
45*> The SVD is written
46*>
47*> A = U * SIGMA * transpose(V)
48*>
49*> where SIGMA is an M-by-N matrix which is zero except for its
50*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
51*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
52*> are the singular values of A; they are real and non-negative, and
53*> are returned in descending order. The first min(m,n) columns of
54*> U and V are the left and right singular vectors of A.
55*>
56*> Note that the routine returns VT = V**T, not V.
57*>
58*> The divide and conquer algorithm makes very mild assumptions about
59*> floating point arithmetic. It will work on machines with a guard
60*> digit in add/subtract, or on those binary machines without guard
61*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
62*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
63*> without guard digits, but we know of none.
64*> \endverbatim
65*
66* Arguments:
67* ==========
68*
69*> \param[in] JOBZ
70*> \verbatim
71*> JOBZ is CHARACTER*1
72*> Specifies options for computing all or part of the matrix U:
73*> = 'A': all M columns of U and all N rows of V**T are
74*> returned in the arrays U and VT;
75*> = 'S': the first min(M,N) columns of U and the first
76*> min(M,N) rows of V**T are returned in the arrays U
77*> and VT;
78*> = 'O': If M >= N, the first N columns of U are overwritten
79*> on the array A and all rows of V**T are returned in
80*> the array VT;
81*> otherwise, all columns of U are returned in the
82*> array U and the first M rows of V**T are overwritten
83*> in the array A;
84*> = 'N': no columns of U or rows of V**T are computed.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*> M is INTEGER
90*> The number of rows of the input matrix A. M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*> N is INTEGER
96*> The number of columns of the input matrix A. N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*> A is REAL array, dimension (LDA,N)
102*> On entry, the M-by-N matrix A.
103*> On exit,
104*> if JOBZ = 'O', A is overwritten with the first N columns
105*> of U (the left singular vectors, stored
106*> columnwise) if M >= N;
107*> A is overwritten with the first M rows
108*> of V**T (the right singular vectors, stored
109*> rowwise) otherwise.
110*> if JOBZ .ne. 'O', the contents of A are destroyed.
111*> \endverbatim
112*>
113*> \param[in] LDA
114*> \verbatim
115*> LDA is INTEGER
116*> The leading dimension of the array A. LDA >= max(1,M).
117*> \endverbatim
118*>
119*> \param[out] S
120*> \verbatim
121*> S is REAL array, dimension (min(M,N))
122*> The singular values of A, sorted so that S(i) >= S(i+1).
123*> \endverbatim
124*>
125*> \param[out] U
126*> \verbatim
127*> U is REAL array, dimension (LDU,UCOL)
128*> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
129*> UCOL = min(M,N) if JOBZ = 'S'.
130*> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
131*> orthogonal matrix U;
132*> if JOBZ = 'S', U contains the first min(M,N) columns of U
133*> (the left singular vectors, stored columnwise);
134*> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
135*> \endverbatim
136*>
137*> \param[in] LDU
138*> \verbatim
139*> LDU is INTEGER
140*> The leading dimension of the array U. LDU >= 1; if
141*> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
142*> \endverbatim
143*>
144*> \param[out] VT
145*> \verbatim
146*> VT is REAL array, dimension (LDVT,N)
147*> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
148*> N-by-N orthogonal matrix V**T;
149*> if JOBZ = 'S', VT contains the first min(M,N) rows of
150*> V**T (the right singular vectors, stored rowwise);
151*> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
152*> \endverbatim
153*>
154*> \param[in] LDVT
155*> \verbatim
156*> LDVT is INTEGER
157*> The leading dimension of the array VT. LDVT >= 1;
158*> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
159*> if JOBZ = 'S', LDVT >= min(M,N).
160*> \endverbatim
161*>
162*> \param[out] WORK
163*> \verbatim
164*> WORK is REAL array, dimension (MAX(1,LWORK))
165*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
166*> \endverbatim
167*>
168*> \param[in] LWORK
169*> \verbatim
170*> LWORK is INTEGER
171*> The dimension of the array WORK. LWORK >= 1.
172*> If LWORK = -1, a workspace query is assumed. The optimal
173*> size for the WORK array is calculated and stored in WORK(1),
174*> and no other work except argument checking is performed.
175*>
176*> Let mx = max(M,N) and mn = min(M,N).
177*> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
178*> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
179*> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
180*> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
181*> These are not tight minimums in all cases; see comments inside code.
182*> For good performance, LWORK should generally be larger;
183*> a query is recommended.
184*> \endverbatim
185*>
186*> \param[out] IWORK
187*> \verbatim
188*> IWORK is INTEGER array, dimension (8*min(M,N))
189*> \endverbatim
190*>
191*> \param[out] INFO
192*> \verbatim
193*> INFO is INTEGER
194*> < 0: if INFO = -i, the i-th argument had an illegal value.
195*> = -4: if A had a NAN entry.
196*> > 0: SBDSDC did not converge, updating process failed.
197*> = 0: successful exit.
198*> \endverbatim
199*
200* Authors:
201* ========
202*
203*> \author Univ. of Tennessee
204*> \author Univ. of California Berkeley
205*> \author Univ. of Colorado Denver
206*> \author NAG Ltd.
207*
208*> \ingroup realGEsing
209*
210*> \par Contributors:
211* ==================
212*>
213*> Ming Gu and Huan Ren, Computer Science Division, University of
214*> California at Berkeley, USA
215*>
216* =====================================================================
217 SUBROUTINE sgesdd( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
218 $ WORK, LWORK, IWORK, INFO )
219 implicit none
220*
221* -- LAPACK driver routine --
222* -- LAPACK is a software package provided by Univ. of Tennessee, --
223* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
224*
225* .. Scalar Arguments ..
226 CHARACTER JOBZ
227 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
228* ..
229* .. Array Arguments ..
230 INTEGER IWORK( * )
231 REAL A( LDA, * ), S( * ), U( LDU, * ),
232 $ vt( ldvt, * ), work( * )
233* ..
234*
235* =====================================================================
236*
237* .. Parameters ..
238 REAL ZERO, ONE
239 parameter( zero = 0.0e0, one = 1.0e0 )
240* ..
241* .. Local Scalars ..
242 LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
243 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
244 $ ir, iscl, itau, itaup, itauq, iu, ivt, ldwkvt,
245 $ ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,
246 $ mnthr, nwork, wrkbl
247 INTEGER LWORK_SGEBRD_MN, LWORK_SGEBRD_MM,
248 $ lwork_sgebrd_nn, lwork_sgelqf_mn,
249 $ lwork_sgeqrf_mn,
250 $ lwork_sorgbr_p_mm, lwork_sorgbr_q_nn,
251 $ lwork_sorglq_mn, lwork_sorglq_nn,
252 $ lwork_sorgqr_mm, lwork_sorgqr_mn,
253 $ lwork_sormbr_prt_mm, lwork_sormbr_qln_mm,
254 $ lwork_sormbr_prt_mn, lwork_sormbr_qln_mn,
255 $ lwork_sormbr_prt_nn, lwork_sormbr_qln_nn
256 REAL ANRM, BIGNUM, EPS, SMLNUM
257* ..
258* .. Local Arrays ..
259 INTEGER IDUM( 1 )
260 REAL DUM( 1 )
261* ..
262* .. External Subroutines ..
263 EXTERNAL sbdsdc, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
265 $ xerbla
266* ..
267* .. External Functions ..
268 LOGICAL LSAME, SISNAN
269 REAL SLAMCH, SLANGE, SROUNDUP_LWORK
270 EXTERNAL slamch, slange, lsame, sisnan,
271 $ sroundup_lwork
272* ..
273* .. Intrinsic Functions ..
274 INTRINSIC int, max, min, sqrt
275* ..
276* .. Executable Statements ..
277*
278* Test the input arguments
279*
280 info = 0
281 minmn = min( m, n )
282 wntqa = lsame( jobz, 'A' )
283 wntqs = lsame( jobz, 'S' )
284 wntqas = wntqa .OR. wntqs
285 wntqo = lsame( jobz, 'O' )
286 wntqn = lsame( jobz, 'N' )
287 lquery = ( lwork.EQ.-1 )
288*
289 IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
290 info = -1
291 ELSE IF( m.LT.0 ) THEN
292 info = -2
293 ELSE IF( n.LT.0 ) THEN
294 info = -3
295 ELSE IF( lda.LT.max( 1, m ) ) THEN
296 info = -5
297 ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
298 $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
299 info = -8
300 ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
301 $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
302 $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
303 info = -10
304 END IF
305*
306* Compute workspace
307* Note: Comments in the code beginning "Workspace:" describe the
308* minimal amount of workspace allocated at that point in the code,
309* as well as the preferred amount for good performance.
310* NB refers to the optimal block size for the immediately
311* following subroutine, as returned by ILAENV.
312*
313 IF( info.EQ.0 ) THEN
314 minwrk = 1
315 maxwrk = 1
316 bdspac = 0
317 mnthr = int( minmn*11.0e0 / 6.0e0 )
318 IF( m.GE.n .AND. minmn.GT.0 ) THEN
319*
320* Compute space needed for SBDSDC
321*
322 IF( wntqn ) THEN
323* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
324* keep 7*N for backwards compatibility.
325 bdspac = 7*n
326 ELSE
327 bdspac = 3*n*n + 4*n
328 END IF
329*
330* Compute space preferred for each routine
331 CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
332 $ dum(1), dum(1), -1, ierr )
333 lwork_sgebrd_mn = int( dum(1) )
334*
335 CALL sgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
336 $ dum(1), dum(1), -1, ierr )
337 lwork_sgebrd_nn = int( dum(1) )
338*
339 CALL sgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
340 lwork_sgeqrf_mn = int( dum(1) )
341*
342 CALL sorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
343 $ ierr )
344 lwork_sorgbr_q_nn = int( dum(1) )
345*
346 CALL sorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )
347 lwork_sorgqr_mm = int( dum(1) )
348*
349 CALL sorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )
350 lwork_sorgqr_mn = int( dum(1) )
351*
352 CALL sormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
353 $ dum(1), dum(1), n, dum(1), -1, ierr )
354 lwork_sormbr_prt_nn = int( dum(1) )
355*
356 CALL sormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
357 $ dum(1), dum(1), n, dum(1), -1, ierr )
358 lwork_sormbr_qln_nn = int( dum(1) )
359*
360 CALL sormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
361 $ dum(1), dum(1), m, dum(1), -1, ierr )
362 lwork_sormbr_qln_mn = int( dum(1) )
363*
364 CALL sormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
365 $ dum(1), dum(1), m, dum(1), -1, ierr )
366 lwork_sormbr_qln_mm = int( dum(1) )
367*
368 IF( m.GE.mnthr ) THEN
369 IF( wntqn ) THEN
370*
371* Path 1 (M >> N, JOBZ='N')
372*
373 wrkbl = n + lwork_sgeqrf_mn
374 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
375 maxwrk = max( wrkbl, bdspac + n )
376 minwrk = bdspac + n
377 ELSE IF( wntqo ) THEN
378*
379* Path 2 (M >> N, JOBZ='O')
380*
381 wrkbl = n + lwork_sgeqrf_mn
382 wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
383 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
384 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
385 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
386 wrkbl = max( wrkbl, 3*n + bdspac )
387 maxwrk = wrkbl + 2*n*n
388 minwrk = bdspac + 2*n*n + 3*n
389 ELSE IF( wntqs ) THEN
390*
391* Path 3 (M >> N, JOBZ='S')
392*
393 wrkbl = n + lwork_sgeqrf_mn
394 wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
395 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
396 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
397 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
398 wrkbl = max( wrkbl, 3*n + bdspac )
399 maxwrk = wrkbl + n*n
400 minwrk = bdspac + n*n + 3*n
401 ELSE IF( wntqa ) THEN
402*
403* Path 4 (M >> N, JOBZ='A')
404*
405 wrkbl = n + lwork_sgeqrf_mn
406 wrkbl = max( wrkbl, n + lwork_sorgqr_mm )
407 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
408 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
409 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
410 wrkbl = max( wrkbl, 3*n + bdspac )
411 maxwrk = wrkbl + n*n
412 minwrk = n*n + max( 3*n + bdspac, n + m )
413 END IF
414 ELSE
415*
416* Path 5 (M >= N, but not much larger)
417*
418 wrkbl = 3*n + lwork_sgebrd_mn
419 IF( wntqn ) THEN
420* Path 5n (M >= N, jobz='N')
421 maxwrk = max( wrkbl, 3*n + bdspac )
422 minwrk = 3*n + max( m, bdspac )
423 ELSE IF( wntqo ) THEN
424* Path 5o (M >= N, jobz='O')
425 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
426 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
427 wrkbl = max( wrkbl, 3*n + bdspac )
428 maxwrk = wrkbl + m*n
429 minwrk = 3*n + max( m, n*n + bdspac )
430 ELSE IF( wntqs ) THEN
431* Path 5s (M >= N, jobz='S')
432 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
433 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
434 maxwrk = max( wrkbl, 3*n + bdspac )
435 minwrk = 3*n + max( m, bdspac )
436 ELSE IF( wntqa ) THEN
437* Path 5a (M >= N, jobz='A')
438 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mm )
439 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
440 maxwrk = max( wrkbl, 3*n + bdspac )
441 minwrk = 3*n + max( m, bdspac )
442 END IF
443 END IF
444 ELSE IF( minmn.GT.0 ) THEN
445*
446* Compute space needed for SBDSDC
447*
448 IF( wntqn ) THEN
449* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
450* keep 7*N for backwards compatibility.
451 bdspac = 7*m
452 ELSE
453 bdspac = 3*m*m + 4*m
454 END IF
455*
456* Compute space preferred for each routine
457 CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
458 $ dum(1), dum(1), -1, ierr )
459 lwork_sgebrd_mn = int( dum(1) )
460*
461 CALL sgebrd( m, m, a, m, s, dum(1), dum(1),
462 $ dum(1), dum(1), -1, ierr )
463 lwork_sgebrd_mm = int( dum(1) )
464*
465 CALL sgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
466 lwork_sgelqf_mn = int( dum(1) )
467*
468 CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
469 lwork_sorglq_nn = int( dum(1) )
470*
471 CALL sorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
472 lwork_sorglq_mn = int( dum(1) )
473*
474 CALL sorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )
475 lwork_sorgbr_p_mm = int( dum(1) )
476*
477 CALL sormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
478 $ dum(1), dum(1), m, dum(1), -1, ierr )
479 lwork_sormbr_prt_mm = int( dum(1) )
480*
481 CALL sormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
482 $ dum(1), dum(1), m, dum(1), -1, ierr )
483 lwork_sormbr_prt_mn = int( dum(1) )
484*
485 CALL sormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
486 $ dum(1), dum(1), n, dum(1), -1, ierr )
487 lwork_sormbr_prt_nn = int( dum(1) )
488*
489 CALL sormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
490 $ dum(1), dum(1), m, dum(1), -1, ierr )
491 lwork_sormbr_qln_mm = int( dum(1) )
492*
493 IF( n.GE.mnthr ) THEN
494 IF( wntqn ) THEN
495*
496* Path 1t (N >> M, JOBZ='N')
497*
498 wrkbl = m + lwork_sgelqf_mn
499 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
500 maxwrk = max( wrkbl, bdspac + m )
501 minwrk = bdspac + m
502 ELSE IF( wntqo ) THEN
503*
504* Path 2t (N >> M, JOBZ='O')
505*
506 wrkbl = m + lwork_sgelqf_mn
507 wrkbl = max( wrkbl, m + lwork_sorglq_mn )
508 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
509 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
510 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
511 wrkbl = max( wrkbl, 3*m + bdspac )
512 maxwrk = wrkbl + 2*m*m
513 minwrk = bdspac + 2*m*m + 3*m
514 ELSE IF( wntqs ) THEN
515*
516* Path 3t (N >> M, JOBZ='S')
517*
518 wrkbl = m + lwork_sgelqf_mn
519 wrkbl = max( wrkbl, m + lwork_sorglq_mn )
520 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
521 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
522 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
523 wrkbl = max( wrkbl, 3*m + bdspac )
524 maxwrk = wrkbl + m*m
525 minwrk = bdspac + m*m + 3*m
526 ELSE IF( wntqa ) THEN
527*
528* Path 4t (N >> M, JOBZ='A')
529*
530 wrkbl = m + lwork_sgelqf_mn
531 wrkbl = max( wrkbl, m + lwork_sorglq_nn )
532 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
533 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
534 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
535 wrkbl = max( wrkbl, 3*m + bdspac )
536 maxwrk = wrkbl + m*m
537 minwrk = m*m + max( 3*m + bdspac, m + n )
538 END IF
539 ELSE
540*
541* Path 5t (N > M, but not much larger)
542*
543 wrkbl = 3*m + lwork_sgebrd_mn
544 IF( wntqn ) THEN
545* Path 5tn (N > M, jobz='N')
546 maxwrk = max( wrkbl, 3*m + bdspac )
547 minwrk = 3*m + max( n, bdspac )
548 ELSE IF( wntqo ) THEN
549* Path 5to (N > M, jobz='O')
550 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
551 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
552 wrkbl = max( wrkbl, 3*m + bdspac )
553 maxwrk = wrkbl + m*n
554 minwrk = 3*m + max( n, m*m + bdspac )
555 ELSE IF( wntqs ) THEN
556* Path 5ts (N > M, jobz='S')
557 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
558 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
559 maxwrk = max( wrkbl, 3*m + bdspac )
560 minwrk = 3*m + max( n, bdspac )
561 ELSE IF( wntqa ) THEN
562* Path 5ta (N > M, jobz='A')
563 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
564 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_nn )
565 maxwrk = max( wrkbl, 3*m + bdspac )
566 minwrk = 3*m + max( n, bdspac )
567 END IF
568 END IF
569 END IF
570
571 maxwrk = max( maxwrk, minwrk )
572 work( 1 ) = sroundup_lwork( maxwrk )
573*
574 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
575 info = -12
576 END IF
577 END IF
578*
579 IF( info.NE.0 ) THEN
580 CALL xerbla( 'SGESDD', -info )
581 RETURN
582 ELSE IF( lquery ) THEN
583 RETURN
584 END IF
585*
586* Quick return if possible
587*
588 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
589 RETURN
590 END IF
591*
592* Get machine constants
593*
594 eps = slamch( 'P' )
595 smlnum = sqrt( slamch( 'S' ) ) / eps
596 bignum = one / smlnum
597*
598* Scale A if max element outside range [SMLNUM,BIGNUM]
599*
600 anrm = slange( 'M', m, n, a, lda, dum )
601 IF( sisnan( anrm ) ) THEN
602 info = -4
603 RETURN
604 END IF
605 iscl = 0
606 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
607 iscl = 1
608 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
609 ELSE IF( anrm.GT.bignum ) THEN
610 iscl = 1
611 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
612 END IF
613*
614 IF( m.GE.n ) THEN
615*
616* A has at least as many rows as columns. If A has sufficiently
617* more rows than columns, first reduce using the QR
618* decomposition (if sufficient workspace available)
619*
620 IF( m.GE.mnthr ) THEN
621*
622 IF( wntqn ) THEN
623*
624* Path 1 (M >> N, JOBZ='N')
625* No singular vectors to be computed
626*
627 itau = 1
628 nwork = itau + n
629*
630* Compute A=Q*R
631* Workspace: need N [tau] + N [work]
632* Workspace: prefer N [tau] + N*NB [work]
633*
634 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
635 $ lwork - nwork + 1, ierr )
636*
637* Zero out below R
638*
639 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
640 ie = 1
641 itauq = ie + n
642 itaup = itauq + n
643 nwork = itaup + n
644*
645* Bidiagonalize R in A
646* Workspace: need 3*N [e, tauq, taup] + N [work]
647* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
648*
649 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
650 $ work( itaup ), work( nwork ), lwork-nwork+1,
651 $ ierr )
652 nwork = ie + n
653*
654* Perform bidiagonal SVD, computing singular values only
655* Workspace: need N [e] + BDSPAC
656*
657 CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
658 $ dum, idum, work( nwork ), iwork, info )
659*
660 ELSE IF( wntqo ) THEN
661*
662* Path 2 (M >> N, JOBZ = 'O')
663* N left singular vectors to be overwritten on A and
664* N right singular vectors to be computed in VT
665*
666 ir = 1
667*
668* WORK(IR) is LDWRKR by N
669*
670 IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
671 ldwrkr = lda
672 ELSE
673 ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
674 END IF
675 itau = ir + ldwrkr*n
676 nwork = itau + n
677*
678* Compute A=Q*R
679* Workspace: need N*N [R] + N [tau] + N [work]
680* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
681*
682 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
683 $ lwork - nwork + 1, ierr )
684*
685* Copy R to WORK(IR), zeroing out below it
686*
687 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
688 CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
689 $ ldwrkr )
690*
691* Generate Q in A
692* Workspace: need N*N [R] + N [tau] + N [work]
693* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
694*
695 CALL sorgqr( m, n, n, a, lda, work( itau ),
696 $ work( nwork ), lwork - nwork + 1, ierr )
697 ie = itau
698 itauq = ie + n
699 itaup = itauq + n
700 nwork = itaup + n
701*
702* Bidiagonalize R in WORK(IR)
703* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
704* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
705*
706 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
707 $ work( itauq ), work( itaup ), work( nwork ),
708 $ lwork - nwork + 1, ierr )
709*
710* WORK(IU) is N by N
711*
712 iu = nwork
713 nwork = iu + n*n
714*
715* Perform bidiagonal SVD, computing left singular vectors
716* of bidiagonal matrix in WORK(IU) and computing right
717* singular vectors of bidiagonal matrix in VT
718* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
719*
720 CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
721 $ vt, ldvt, dum, idum, work( nwork ), iwork,
722 $ info )
723*
724* Overwrite WORK(IU) by left singular vectors of R
725* and VT by right singular vectors of R
726* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
727* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
728*
729 CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
730 $ work( itauq ), work( iu ), n, work( nwork ),
731 $ lwork - nwork + 1, ierr )
732 CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
733 $ work( itaup ), vt, ldvt, work( nwork ),
734 $ lwork - nwork + 1, ierr )
735*
736* Multiply Q in A by left singular vectors of R in
737* WORK(IU), storing result in WORK(IR) and copying to A
738* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
739* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
740*
741 DO 10 i = 1, m, ldwrkr
742 chunk = min( m - i + 1, ldwrkr )
743 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
744 $ lda, work( iu ), n, zero, work( ir ),
745 $ ldwrkr )
746 CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
747 $ a( i, 1 ), lda )
748 10 CONTINUE
749*
750 ELSE IF( wntqs ) THEN
751*
752* Path 3 (M >> N, JOBZ='S')
753* N left singular vectors to be computed in U and
754* N right singular vectors to be computed in VT
755*
756 ir = 1
757*
758* WORK(IR) is N by N
759*
760 ldwrkr = n
761 itau = ir + ldwrkr*n
762 nwork = itau + n
763*
764* Compute A=Q*R
765* Workspace: need N*N [R] + N [tau] + N [work]
766* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
767*
768 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
769 $ lwork - nwork + 1, ierr )
770*
771* Copy R to WORK(IR), zeroing out below it
772*
773 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774 CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
775 $ ldwrkr )
776*
777* Generate Q in A
778* Workspace: need N*N [R] + N [tau] + N [work]
779* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
780*
781 CALL sorgqr( m, n, n, a, lda, work( itau ),
782 $ work( nwork ), lwork - nwork + 1, ierr )
783 ie = itau
784 itauq = ie + n
785 itaup = itauq + n
786 nwork = itaup + n
787*
788* Bidiagonalize R in WORK(IR)
789* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
790* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
791*
792 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
793 $ work( itauq ), work( itaup ), work( nwork ),
794 $ lwork - nwork + 1, ierr )
795*
796* Perform bidiagonal SVD, computing left singular vectors
797* of bidiagoal matrix in U and computing right singular
798* vectors of bidiagonal matrix in VT
799* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
800*
801 CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
802 $ ldvt, dum, idum, work( nwork ), iwork,
803 $ info )
804*
805* Overwrite U by left singular vectors of R and VT
806* by right singular vectors of R
807* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
808* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
809*
810 CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
811 $ work( itauq ), u, ldu, work( nwork ),
812 $ lwork - nwork + 1, ierr )
813*
814 CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
815 $ work( itaup ), vt, ldvt, work( nwork ),
816 $ lwork - nwork + 1, ierr )
817*
818* Multiply Q in A by left singular vectors of R in
819* WORK(IR), storing result in U
820* Workspace: need N*N [R]
821*
822 CALL slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
823 CALL sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),
824 $ ldwrkr, zero, u, ldu )
825*
826 ELSE IF( wntqa ) THEN
827*
828* Path 4 (M >> N, JOBZ='A')
829* M left singular vectors to be computed in U and
830* N right singular vectors to be computed in VT
831*
832 iu = 1
833*
834* WORK(IU) is N by N
835*
836 ldwrku = n
837 itau = iu + ldwrku*n
838 nwork = itau + n
839*
840* Compute A=Q*R, copying result to U
841* Workspace: need N*N [U] + N [tau] + N [work]
842* Workspace: prefer N*N [U] + N [tau] + N*NB [work]
843*
844 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
845 $ lwork - nwork + 1, ierr )
846 CALL slacpy( 'L', m, n, a, lda, u, ldu )
847*
848* Generate Q in U
849* Workspace: need N*N [U] + N [tau] + M [work]
850* Workspace: prefer N*N [U] + N [tau] + M*NB [work]
851 CALL sorgqr( m, m, n, u, ldu, work( itau ),
852 $ work( nwork ), lwork - nwork + 1, ierr )
853*
854* Produce R in A, zeroing out other entries
855*
856 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
857 ie = itau
858 itauq = ie + n
859 itaup = itauq + n
860 nwork = itaup + n
861*
862* Bidiagonalize R in A
863* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
864* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
865*
866 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
867 $ work( itaup ), work( nwork ), lwork-nwork+1,
868 $ ierr )
869*
870* Perform bidiagonal SVD, computing left singular vectors
871* of bidiagonal matrix in WORK(IU) and computing right
872* singular vectors of bidiagonal matrix in VT
873* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
874*
875 CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
876 $ vt, ldvt, dum, idum, work( nwork ), iwork,
877 $ info )
878*
879* Overwrite WORK(IU) by left singular vectors of R and VT
880* by right singular vectors of R
881* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
882* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
883*
884 CALL sormbr( 'Q', 'L', 'N', n, n, n, a, lda,
885 $ work( itauq ), work( iu ), ldwrku,
886 $ work( nwork ), lwork - nwork + 1, ierr )
887 CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
888 $ work( itaup ), vt, ldvt, work( nwork ),
889 $ lwork - nwork + 1, ierr )
890*
891* Multiply Q in U by left singular vectors of R in
892* WORK(IU), storing result in A
893* Workspace: need N*N [U]
894*
895 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),
896 $ ldwrku, zero, a, lda )
897*
898* Copy left singular vectors of A from A to U
899*
900 CALL slacpy( 'F', m, n, a, lda, u, ldu )
901*
902 END IF
903*
904 ELSE
905*
906* M .LT. MNTHR
907*
908* Path 5 (M >= N, but not much larger)
909* Reduce to bidiagonal form without QR decomposition
910*
911 ie = 1
912 itauq = ie + n
913 itaup = itauq + n
914 nwork = itaup + n
915*
916* Bidiagonalize A
917* Workspace: need 3*N [e, tauq, taup] + M [work]
918* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
919*
920 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
921 $ work( itaup ), work( nwork ), lwork-nwork+1,
922 $ ierr )
923 IF( wntqn ) THEN
924*
925* Path 5n (M >= N, JOBZ='N')
926* Perform bidiagonal SVD, only computing singular values
927* Workspace: need 3*N [e, tauq, taup] + BDSPAC
928*
929 CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
930 $ dum, idum, work( nwork ), iwork, info )
931 ELSE IF( wntqo ) THEN
932* Path 5o (M >= N, JOBZ='O')
933 iu = nwork
934 IF( lwork .GE. m*n + 3*n + bdspac ) THEN
935*
936* WORK( IU ) is M by N
937*
938 ldwrku = m
939 nwork = iu + ldwrku*n
940 CALL slaset( 'F', m, n, zero, zero, work( iu ),
941 $ ldwrku )
942* IR is unused; silence compile warnings
943 ir = -1
944 ELSE
945*
946* WORK( IU ) is N by N
947*
948 ldwrku = n
949 nwork = iu + ldwrku*n
950*
951* WORK(IR) is LDWRKR by N
952*
953 ir = nwork
954 ldwrkr = ( lwork - n*n - 3*n ) / n
955 END IF
956 nwork = iu + ldwrku*n
957*
958* Perform bidiagonal SVD, computing left singular vectors
959* of bidiagonal matrix in WORK(IU) and computing right
960* singular vectors of bidiagonal matrix in VT
961* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
962*
963 CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
964 $ ldwrku, vt, ldvt, dum, idum, work( nwork ),
965 $ iwork, info )
966*
967* Overwrite VT by right singular vectors of A
968* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
969* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
970*
971 CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
972 $ work( itaup ), vt, ldvt, work( nwork ),
973 $ lwork - nwork + 1, ierr )
974*
975 IF( lwork .GE. m*n + 3*n + bdspac ) THEN
976*
977* Path 5o-fast
978* Overwrite WORK(IU) by left singular vectors of A
979* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
980* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
981*
982 CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
983 $ work( itauq ), work( iu ), ldwrku,
984 $ work( nwork ), lwork - nwork + 1, ierr )
985*
986* Copy left singular vectors of A from WORK(IU) to A
987*
988 CALL slacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
989 ELSE
990*
991* Path 5o-slow
992* Generate Q in A
993* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
994* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
995*
996 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
997 $ work( nwork ), lwork - nwork + 1, ierr )
998*
999* Multiply Q in A by left singular vectors of
1000* bidiagonal matrix in WORK(IU), storing result in
1001* WORK(IR) and copying to A
1002* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
1003* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
1004*
1005 DO 20 i = 1, m, ldwrkr
1006 chunk = min( m - i + 1, ldwrkr )
1007 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
1008 $ lda, work( iu ), ldwrku, zero,
1009 $ work( ir ), ldwrkr )
1010 CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
1011 $ a( i, 1 ), lda )
1012 20 CONTINUE
1013 END IF
1014*
1015 ELSE IF( wntqs ) THEN
1016*
1017* Path 5s (M >= N, JOBZ='S')
1018* Perform bidiagonal SVD, computing left singular vectors
1019* of bidiagonal matrix in U and computing right singular
1020* vectors of bidiagonal matrix in VT
1021* Workspace: need 3*N [e, tauq, taup] + BDSPAC
1022*
1023 CALL slaset( 'F', m, n, zero, zero, u, ldu )
1024 CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1025 $ ldvt, dum, idum, work( nwork ), iwork,
1026 $ info )
1027*
1028* Overwrite U by left singular vectors of A and VT
1029* by right singular vectors of A
1030* Workspace: need 3*N [e, tauq, taup] + N [work]
1031* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
1032*
1033 CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
1034 $ work( itauq ), u, ldu, work( nwork ),
1035 $ lwork - nwork + 1, ierr )
1036 CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
1037 $ work( itaup ), vt, ldvt, work( nwork ),
1038 $ lwork - nwork + 1, ierr )
1039 ELSE IF( wntqa ) THEN
1040*
1041* Path 5a (M >= N, JOBZ='A')
1042* Perform bidiagonal SVD, computing left singular vectors
1043* of bidiagonal matrix in U and computing right singular
1044* vectors of bidiagonal matrix in VT
1045* Workspace: need 3*N [e, tauq, taup] + BDSPAC
1046*
1047 CALL slaset( 'F', m, m, zero, zero, u, ldu )
1048 CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1049 $ ldvt, dum, idum, work( nwork ), iwork,
1050 $ info )
1051*
1052* Set the right corner of U to identity matrix
1053*
1054 IF( m.GT.n ) THEN
1055 CALL slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),
1056 $ ldu )
1057 END IF
1058*
1059* Overwrite U by left singular vectors of A and VT
1060* by right singular vectors of A
1061* Workspace: need 3*N [e, tauq, taup] + M [work]
1062* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
1063*
1064 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1065 $ work( itauq ), u, ldu, work( nwork ),
1066 $ lwork - nwork + 1, ierr )
1067 CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1068 $ work( itaup ), vt, ldvt, work( nwork ),
1069 $ lwork - nwork + 1, ierr )
1070 END IF
1071*
1072 END IF
1073*
1074 ELSE
1075*
1076* A has more columns than rows. If A has sufficiently more
1077* columns than rows, first reduce using the LQ decomposition (if
1078* sufficient workspace available)
1079*
1080 IF( n.GE.mnthr ) THEN
1081*
1082 IF( wntqn ) THEN
1083*
1084* Path 1t (N >> M, JOBZ='N')
1085* No singular vectors to be computed
1086*
1087 itau = 1
1088 nwork = itau + m
1089*
1090* Compute A=L*Q
1091* Workspace: need M [tau] + M [work]
1092* Workspace: prefer M [tau] + M*NB [work]
1093*
1094 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1095 $ lwork - nwork + 1, ierr )
1096*
1097* Zero out above L
1098*
1099 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1100 ie = 1
1101 itauq = ie + m
1102 itaup = itauq + m
1103 nwork = itaup + m
1104*
1105* Bidiagonalize L in A
1106* Workspace: need 3*M [e, tauq, taup] + M [work]
1107* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
1108*
1109 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1110 $ work( itaup ), work( nwork ), lwork-nwork+1,
1111 $ ierr )
1112 nwork = ie + m
1113*
1114* Perform bidiagonal SVD, computing singular values only
1115* Workspace: need M [e] + BDSPAC
1116*
1117 CALL sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,
1118 $ dum, idum, work( nwork ), iwork, info )
1119*
1120 ELSE IF( wntqo ) THEN
1121*
1122* Path 2t (N >> M, JOBZ='O')
1123* M right singular vectors to be overwritten on A and
1124* M left singular vectors to be computed in U
1125*
1126 ivt = 1
1127*
1128* WORK(IVT) is M by M
1129* WORK(IL) is M by M; it is later resized to M by chunk for gemm
1130*
1131 il = ivt + m*m
1132 IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
1133 ldwrkl = m
1134 chunk = n
1135 ELSE
1136 ldwrkl = m
1137 chunk = ( lwork - m*m ) / m
1138 END IF
1139 itau = il + ldwrkl*m
1140 nwork = itau + m
1141*
1142* Compute A=L*Q
1143* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1144* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1145*
1146 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1147 $ lwork - nwork + 1, ierr )
1148*
1149* Copy L to WORK(IL), zeroing about above it
1150*
1151 CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1152 CALL slaset( 'U', m - 1, m - 1, zero, zero,
1153 $ work( il + ldwrkl ), ldwrkl )
1154*
1155* Generate Q in A
1156* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1157* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1158*
1159 CALL sorglq( m, n, m, a, lda, work( itau ),
1160 $ work( nwork ), lwork - nwork + 1, ierr )
1161 ie = itau
1162 itauq = ie + m
1163 itaup = itauq + m
1164 nwork = itaup + m
1165*
1166* Bidiagonalize L in WORK(IL)
1167* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1168* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1169*
1170 CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1171 $ work( itauq ), work( itaup ), work( nwork ),
1172 $ lwork - nwork + 1, ierr )
1173*
1174* Perform bidiagonal SVD, computing left singular vectors
1175* of bidiagonal matrix in U, and computing right singular
1176* vectors of bidiagonal matrix in WORK(IVT)
1177* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1178*
1179 CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1180 $ work( ivt ), m, dum, idum, work( nwork ),
1181 $ iwork, info )
1182*
1183* Overwrite U by left singular vectors of L and WORK(IVT)
1184* by right singular vectors of L
1185* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1186* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1187*
1188 CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1189 $ work( itauq ), u, ldu, work( nwork ),
1190 $ lwork - nwork + 1, ierr )
1191 CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1192 $ work( itaup ), work( ivt ), m,
1193 $ work( nwork ), lwork - nwork + 1, ierr )
1194*
1195* Multiply right singular vectors of L in WORK(IVT) by Q
1196* in A, storing result in WORK(IL) and copying to A
1197* Workspace: need M*M [VT] + M*M [L]
1198* Workspace: prefer M*M [VT] + M*N [L]
1199* At this point, L is resized as M by chunk.
1200*
1201 DO 30 i = 1, n, chunk
1202 blk = min( n - i + 1, chunk )
1203 CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,
1204 $ a( 1, i ), lda, zero, work( il ), ldwrkl )
1205 CALL slacpy( 'F', m, blk, work( il ), ldwrkl,
1206 $ a( 1, i ), lda )
1207 30 CONTINUE
1208*
1209 ELSE IF( wntqs ) THEN
1210*
1211* Path 3t (N >> M, JOBZ='S')
1212* M right singular vectors to be computed in VT and
1213* M left singular vectors to be computed in U
1214*
1215 il = 1
1216*
1217* WORK(IL) is M by M
1218*
1219 ldwrkl = m
1220 itau = il + ldwrkl*m
1221 nwork = itau + m
1222*
1223* Compute A=L*Q
1224* Workspace: need M*M [L] + M [tau] + M [work]
1225* Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1226*
1227 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1228 $ lwork - nwork + 1, ierr )
1229*
1230* Copy L to WORK(IL), zeroing out above it
1231*
1232 CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1233 CALL slaset( 'U', m - 1, m - 1, zero, zero,
1234 $ work( il + ldwrkl ), ldwrkl )
1235*
1236* Generate Q in A
1237* Workspace: need M*M [L] + M [tau] + M [work]
1238* Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1239*
1240 CALL sorglq( m, n, m, a, lda, work( itau ),
1241 $ work( nwork ), lwork - nwork + 1, ierr )
1242 ie = itau
1243 itauq = ie + m
1244 itaup = itauq + m
1245 nwork = itaup + m
1246*
1247* Bidiagonalize L in WORK(IU).
1248* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1249* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1250*
1251 CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1252 $ work( itauq ), work( itaup ), work( nwork ),
1253 $ lwork - nwork + 1, ierr )
1254*
1255* Perform bidiagonal SVD, computing left singular vectors
1256* of bidiagonal matrix in U and computing right singular
1257* vectors of bidiagonal matrix in VT
1258* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1259*
1260 CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
1261 $ ldvt, dum, idum, work( nwork ), iwork,
1262 $ info )
1263*
1264* Overwrite U by left singular vectors of L and VT
1265* by right singular vectors of L
1266* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1267* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1268*
1269 CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1270 $ work( itauq ), u, ldu, work( nwork ),
1271 $ lwork - nwork + 1, ierr )
1272 CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1273 $ work( itaup ), vt, ldvt, work( nwork ),
1274 $ lwork - nwork + 1, ierr )
1275*
1276* Multiply right singular vectors of L in WORK(IL) by
1277* Q in A, storing result in VT
1278* Workspace: need M*M [L]
1279*
1280 CALL slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1281 CALL sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,
1282 $ a, lda, zero, vt, ldvt )
1283*
1284 ELSE IF( wntqa ) THEN
1285*
1286* Path 4t (N >> M, JOBZ='A')
1287* N right singular vectors to be computed in VT and
1288* M left singular vectors to be computed in U
1289*
1290 ivt = 1
1291*
1292* WORK(IVT) is M by M
1293*
1294 ldwkvt = m
1295 itau = ivt + ldwkvt*m
1296 nwork = itau + m
1297*
1298* Compute A=L*Q, copying result to VT
1299* Workspace: need M*M [VT] + M [tau] + M [work]
1300* Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
1301*
1302 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1303 $ lwork - nwork + 1, ierr )
1304 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
1305*
1306* Generate Q in VT
1307* Workspace: need M*M [VT] + M [tau] + N [work]
1308* Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
1309*
1310 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
1311 $ work( nwork ), lwork - nwork + 1, ierr )
1312*
1313* Produce L in A, zeroing out other entries
1314*
1315 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1316 ie = itau
1317 itauq = ie + m
1318 itaup = itauq + m
1319 nwork = itaup + m
1320*
1321* Bidiagonalize L in A
1322* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
1323* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
1324*
1325 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1326 $ work( itaup ), work( nwork ), lwork-nwork+1,
1327 $ ierr )
1328*
1329* Perform bidiagonal SVD, computing left singular vectors
1330* of bidiagonal matrix in U and computing right singular
1331* vectors of bidiagonal matrix in WORK(IVT)
1332* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
1333*
1334 CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1335 $ work( ivt ), ldwkvt, dum, idum,
1336 $ work( nwork ), iwork, info )
1337*
1338* Overwrite U by left singular vectors of L and WORK(IVT)
1339* by right singular vectors of L
1340* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
1341* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
1342*
1343 CALL sormbr( 'Q', 'L', 'N', m, m, m, a, lda,
1344 $ work( itauq ), u, ldu, work( nwork ),
1345 $ lwork - nwork + 1, ierr )
1346 CALL sormbr( 'P', 'R', 'T', m, m, m, a, lda,
1347 $ work( itaup ), work( ivt ), ldwkvt,
1348 $ work( nwork ), lwork - nwork + 1, ierr )
1349*
1350* Multiply right singular vectors of L in WORK(IVT) by
1351* Q in VT, storing result in A
1352* Workspace: need M*M [VT]
1353*
1354 CALL sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,
1355 $ vt, ldvt, zero, a, lda )
1356*
1357* Copy right singular vectors of A from A to VT
1358*
1359 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
1360*
1361 END IF
1362*
1363 ELSE
1364*
1365* N .LT. MNTHR
1366*
1367* Path 5t (N > M, but not much larger)
1368* Reduce to bidiagonal form without LQ decomposition
1369*
1370 ie = 1
1371 itauq = ie + m
1372 itaup = itauq + m
1373 nwork = itaup + m
1374*
1375* Bidiagonalize A
1376* Workspace: need 3*M [e, tauq, taup] + N [work]
1377* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
1378*
1379 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1380 $ work( itaup ), work( nwork ), lwork-nwork+1,
1381 $ ierr )
1382 IF( wntqn ) THEN
1383*
1384* Path 5tn (N > M, JOBZ='N')
1385* Perform bidiagonal SVD, only computing singular values
1386* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1387*
1388 CALL sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,
1389 $ dum, idum, work( nwork ), iwork, info )
1390 ELSE IF( wntqo ) THEN
1391* Path 5to (N > M, JOBZ='O')
1392 ldwkvt = m
1393 ivt = nwork
1394 IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1395*
1396* WORK( IVT ) is M by N
1397*
1398 CALL slaset( 'F', m, n, zero, zero, work( ivt ),
1399 $ ldwkvt )
1400 nwork = ivt + ldwkvt*n
1401* IL is unused; silence compile warnings
1402 il = -1
1403 ELSE
1404*
1405* WORK( IVT ) is M by M
1406*
1407 nwork = ivt + ldwkvt*m
1408 il = nwork
1409*
1410* WORK(IL) is M by CHUNK
1411*
1412 chunk = ( lwork - m*m - 3*m ) / m
1413 END IF
1414*
1415* Perform bidiagonal SVD, computing left singular vectors
1416* of bidiagonal matrix in U and computing right singular
1417* vectors of bidiagonal matrix in WORK(IVT)
1418* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
1419*
1420 CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
1421 $ work( ivt ), ldwkvt, dum, idum,
1422 $ work( nwork ), iwork, info )
1423*
1424* Overwrite U by left singular vectors of A
1425* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1426* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1427*
1428 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1429 $ work( itauq ), u, ldu, work( nwork ),
1430 $ lwork - nwork + 1, ierr )
1431*
1432 IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1433*
1434* Path 5to-fast
1435* Overwrite WORK(IVT) by left singular vectors of A
1436* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
1437* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
1438*
1439 CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1440 $ work( itaup ), work( ivt ), ldwkvt,
1441 $ work( nwork ), lwork - nwork + 1, ierr )
1442*
1443* Copy right singular vectors of A from WORK(IVT) to A
1444*
1445 CALL slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1446 ELSE
1447*
1448* Path 5to-slow
1449* Generate P**T in A
1450* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1451* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1452*
1453 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
1454 $ work( nwork ), lwork - nwork + 1, ierr )
1455*
1456* Multiply Q in A by right singular vectors of
1457* bidiagonal matrix in WORK(IVT), storing result in
1458* WORK(IL) and copying to A
1459* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
1460* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
1461*
1462 DO 40 i = 1, n, chunk
1463 blk = min( n - i + 1, chunk )
1464 CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ),
1465 $ ldwkvt, a( 1, i ), lda, zero,
1466 $ work( il ), m )
1467 CALL slacpy( 'F', m, blk, work( il ), m, a( 1, i ),
1468 $ lda )
1469 40 CONTINUE
1470 END IF
1471 ELSE IF( wntqs ) THEN
1472*
1473* Path 5ts (N > M, JOBZ='S')
1474* Perform bidiagonal SVD, computing left singular vectors
1475* of bidiagonal matrix in U and computing right singular
1476* vectors of bidiagonal matrix in VT
1477* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1478*
1479 CALL slaset( 'F', m, n, zero, zero, vt, ldvt )
1480 CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1481 $ ldvt, dum, idum, work( nwork ), iwork,
1482 $ info )
1483*
1484* Overwrite U by left singular vectors of A and VT
1485* by right singular vectors of A
1486* Workspace: need 3*M [e, tauq, taup] + M [work]
1487* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
1488*
1489 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1490 $ work( itauq ), u, ldu, work( nwork ),
1491 $ lwork - nwork + 1, ierr )
1492 CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1493 $ work( itaup ), vt, ldvt, work( nwork ),
1494 $ lwork - nwork + 1, ierr )
1495 ELSE IF( wntqa ) THEN
1496*
1497* Path 5ta (N > M, JOBZ='A')
1498* Perform bidiagonal SVD, computing left singular vectors
1499* of bidiagonal matrix in U and computing right singular
1500* vectors of bidiagonal matrix in VT
1501* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1502*
1503 CALL slaset( 'F', n, n, zero, zero, vt, ldvt )
1504 CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1505 $ ldvt, dum, idum, work( nwork ), iwork,
1506 $ info )
1507*
1508* Set the right corner of VT to identity matrix
1509*
1510 IF( n.GT.m ) THEN
1511 CALL slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
1512 $ ldvt )
1513 END IF
1514*
1515* Overwrite U by left singular vectors of A and VT
1516* by right singular vectors of A
1517* Workspace: need 3*M [e, tauq, taup] + N [work]
1518* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
1519*
1520 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1521 $ work( itauq ), u, ldu, work( nwork ),
1522 $ lwork - nwork + 1, ierr )
1523 CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1524 $ work( itaup ), vt, ldvt, work( nwork ),
1525 $ lwork - nwork + 1, ierr )
1526 END IF
1527*
1528 END IF
1529*
1530 END IF
1531*
1532* Undo scaling if necessary
1533*
1534 IF( iscl.EQ.1 ) THEN
1535 IF( anrm.GT.bignum )
1536 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1537 $ ierr )
1538 IF( anrm.LT.smlnum )
1539 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1540 $ ierr )
1541 END IF
1542*
1543* Return optimal workspace in WORK(1)
1544*
1545 work( 1 ) = sroundup_lwork( maxwrk )
1546*
1547 RETURN
1548*
1549* End of SGESDD
1550*
1551 END
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:143
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine sbdsdc(UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, WORK, IWORK, INFO)
SBDSDC
Definition: sbdsdc.f:205
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:157
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:146
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:205
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:143
subroutine sgesdd(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO)
SGESDD
Definition: sgesdd.f:219
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:196
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:127
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:128
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187