LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
sgesvd.f
Go to the documentation of this file.
1 *> \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL A( LDA, * ), S( * ), U( LDU, * ),
30 * $ VT( LDVT, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> SGESVD computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, optionally computing the left and/or right singular
41 *> vectors. The SVD is written
42 *>
43 *> A = U * SIGMA * transpose(V)
44 *>
45 *> where SIGMA is an M-by-N matrix which is zero except for its
46 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48 *> are the singular values of A; they are real and non-negative, and
49 *> are returned in descending order. The first min(m,n) columns of
50 *> U and V are the left and right singular vectors of A.
51 *>
52 *> Note that the routine returns V**T, not V.
53 *> \endverbatim
54 *
55 * Arguments:
56 * ==========
57 *
58 *> \param[in] JOBU
59 *> \verbatim
60 *> JOBU is CHARACTER*1
61 *> Specifies options for computing all or part of the matrix U:
62 *> = 'A': all M columns of U are returned in array U:
63 *> = 'S': the first min(m,n) columns of U (the left singular
64 *> vectors) are returned in the array U;
65 *> = 'O': the first min(m,n) columns of U (the left singular
66 *> vectors) are overwritten on the array A;
67 *> = 'N': no columns of U (no left singular vectors) are
68 *> computed.
69 *> \endverbatim
70 *>
71 *> \param[in] JOBVT
72 *> \verbatim
73 *> JOBVT is CHARACTER*1
74 *> Specifies options for computing all or part of the matrix
75 *> V**T:
76 *> = 'A': all N rows of V**T are returned in the array VT;
77 *> = 'S': the first min(m,n) rows of V**T (the right singular
78 *> vectors) are returned in the array VT;
79 *> = 'O': the first min(m,n) rows of V**T (the right singular
80 *> vectors) are overwritten on the array A;
81 *> = 'N': no rows of V**T (no right singular vectors) are
82 *> computed.
83 *>
84 *> JOBVT and JOBU cannot both be 'O'.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is REAL array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBU = 'O', A is overwritten with the first min(m,n)
105 *> columns of U (the left singular vectors,
106 *> stored columnwise);
107 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108 *> rows of V**T (the right singular vectors,
109 *> stored rowwise);
110 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111 *> are destroyed.
112 *> \endverbatim
113 *>
114 *> \param[in] LDA
115 *> \verbatim
116 *> LDA is INTEGER
117 *> The leading dimension of the array A. LDA >= max(1,M).
118 *> \endverbatim
119 *>
120 *> \param[out] S
121 *> \verbatim
122 *> S is REAL array, dimension (min(M,N))
123 *> The singular values of A, sorted so that S(i) >= S(i+1).
124 *> \endverbatim
125 *>
126 *> \param[out] U
127 *> \verbatim
128 *> U is REAL array, dimension (LDU,UCOL)
129 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130 *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131 *> if JOBU = 'S', U contains the first min(m,n) columns of U
132 *> (the left singular vectors, stored columnwise);
133 *> if JOBU = 'N' or 'O', U is not referenced.
134 *> \endverbatim
135 *>
136 *> \param[in] LDU
137 *> \verbatim
138 *> LDU is INTEGER
139 *> The leading dimension of the array U. LDU >= 1; if
140 *> JOBU = 'S' or 'A', LDU >= M.
141 *> \endverbatim
142 *>
143 *> \param[out] VT
144 *> \verbatim
145 *> VT is REAL array, dimension (LDVT,N)
146 *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147 *> V**T;
148 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149 *> V**T (the right singular vectors, stored rowwise);
150 *> if JOBVT = 'N' or 'O', VT is not referenced.
151 *> \endverbatim
152 *>
153 *> \param[in] LDVT
154 *> \verbatim
155 *> LDVT is INTEGER
156 *> The leading dimension of the array VT. LDVT >= 1; if
157 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158 *> \endverbatim
159 *>
160 *> \param[out] WORK
161 *> \verbatim
162 *> WORK is REAL array, dimension (MAX(1,LWORK))
163 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164 *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165 *> superdiagonal elements of an upper bidiagonal matrix B
166 *> whose diagonal is in S (not necessarily sorted). B
167 *> satisfies A = U * B * VT, so it has the same singular values
168 *> as A, and singular vectors related by U and VT.
169 *> \endverbatim
170 *>
171 *> \param[in] LWORK
172 *> \verbatim
173 *> LWORK is INTEGER
174 *> The dimension of the array WORK.
175 *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176 *> - PATH 1 (M much larger than N, JOBU='N')
177 *> - PATH 1t (N much larger than M, JOBVT='N')
178 *> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179 *> For good performance, LWORK should generally be larger.
180 *>
181 *> If LWORK = -1, then a workspace query is assumed; the routine
182 *> only calculates the optimal size of the WORK array, returns
183 *> this value as the first entry of the WORK array, and no error
184 *> message related to LWORK is issued by XERBLA.
185 *> \endverbatim
186 *>
187 *> \param[out] INFO
188 *> \verbatim
189 *> INFO is INTEGER
190 *> = 0: successful exit.
191 *> < 0: if INFO = -i, the i-th argument had an illegal value.
192 *> > 0: if SBDSQR did not converge, INFO specifies how many
193 *> superdiagonals of an intermediate bidiagonal form B
194 *> did not converge to zero. See the description of WORK
195 *> above for details.
196 *> \endverbatim
197 *
198 * Authors:
199 * ========
200 *
201 *> \author Univ. of Tennessee
202 *> \author Univ. of California Berkeley
203 *> \author Univ. of Colorado Denver
204 *> \author NAG Ltd.
205 *
206 *> \ingroup realGEsing
207 *
208 * =====================================================================
209  SUBROUTINE sgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
210  $ WORK, LWORK, INFO )
211 *
212 * -- LAPACK driver routine --
213 * -- LAPACK is a software package provided by Univ. of Tennessee, --
214 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215 *
216 * .. Scalar Arguments ..
217  CHARACTER JOBU, JOBVT
218  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219 * ..
220 * .. Array Arguments ..
221  REAL A( LDA, * ), S( * ), U( LDU, * ),
222  $ vt( ldvt, * ), work( * )
223 * ..
224 *
225 * =====================================================================
226 *
227 * .. Parameters ..
228  REAL ZERO, ONE
229  parameter( zero = 0.0e0, one = 1.0e0 )
230 * ..
231 * .. Local Scalars ..
232  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233  $ wntva, wntvas, wntvn, wntvo, wntvs
234  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
236  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
237  $ nrvt, wrkbl
238  INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
239  $ lwork_sgebrd, lwork_sorgbr_p, lwork_sorgbr_q,
240  $ lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m
241  REAL ANRM, BIGNUM, EPS, SMLNUM
242 * ..
243 * .. Local Arrays ..
244  REAL DUM( 1 )
245 * ..
246 * .. External Subroutines ..
247  EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
249  $ xerbla
250 * ..
251 * .. External Functions ..
252  LOGICAL LSAME
253  INTEGER ILAENV
254  REAL SLAMCH, SLANGE
255  EXTERNAL lsame, ilaenv, slamch, slange
256 * ..
257 * .. Intrinsic Functions ..
258  INTRINSIC max, min, sqrt
259 * ..
260 * .. Executable Statements ..
261 *
262 * Test the input arguments
263 *
264  info = 0
265  minmn = min( m, n )
266  wntua = lsame( jobu, 'A' )
267  wntus = lsame( jobu, 'S' )
268  wntuas = wntua .OR. wntus
269  wntuo = lsame( jobu, 'O' )
270  wntun = lsame( jobu, 'N' )
271  wntva = lsame( jobvt, 'A' )
272  wntvs = lsame( jobvt, 'S' )
273  wntvas = wntva .OR. wntvs
274  wntvo = lsame( jobvt, 'O' )
275  wntvn = lsame( jobvt, 'N' )
276  lquery = ( lwork.EQ.-1 )
277 *
278  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279  info = -1
280  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281  $ ( wntvo .AND. wntuo ) ) THEN
282  info = -2
283  ELSE IF( m.LT.0 ) THEN
284  info = -3
285  ELSE IF( n.LT.0 ) THEN
286  info = -4
287  ELSE IF( lda.LT.max( 1, m ) ) THEN
288  info = -6
289  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290  info = -9
291  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293  info = -11
294  END IF
295 *
296 * Compute workspace
297 * (Note: Comments in the code beginning "Workspace:" describe the
298 * minimal amount of workspace needed at that point in the code,
299 * as well as the preferred amount for good performance.
300 * NB refers to the optimal block size for the immediately
301 * following subroutine, as returned by ILAENV.)
302 *
303  IF( info.EQ.0 ) THEN
304  minwrk = 1
305  maxwrk = 1
306  IF( m.GE.n .AND. minmn.GT.0 ) THEN
307 *
308 * Compute space needed for SBDSQR
309 *
310  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
311  bdspac = 5*n
312 * Compute space needed for SGEQRF
313  CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314  lwork_sgeqrf = int( dum(1) )
315 * Compute space needed for SORGQR
316  CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_sorgqr_n = int( dum(1) )
318  CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319  lwork_sorgqr_m = int( dum(1) )
320 * Compute space needed for SGEBRD
321  CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
322  $ dum(1), dum(1), -1, ierr )
323  lwork_sgebrd = int( dum(1) )
324 * Compute space needed for SORGBR P
325  CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
326  $ dum(1), -1, ierr )
327  lwork_sorgbr_p = int( dum(1) )
328 * Compute space needed for SORGBR Q
329  CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
330  $ dum(1), -1, ierr )
331  lwork_sorgbr_q = int( dum(1) )
332 *
333  IF( m.GE.mnthr ) THEN
334  IF( wntun ) THEN
335 *
336 * Path 1 (M much larger than N, JOBU='N')
337 *
338  maxwrk = n + lwork_sgeqrf
339  maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
340  IF( wntvo .OR. wntvas )
341  $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
342  maxwrk = max( maxwrk, bdspac )
343  minwrk = max( 4*n, bdspac )
344  ELSE IF( wntuo .AND. wntvn ) THEN
345 *
346 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347 *
348  wrkbl = n + lwork_sgeqrf
349  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
350  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
351  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
352  wrkbl = max( wrkbl, bdspac )
353  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
354  minwrk = max( 3*n+m, bdspac )
355  ELSE IF( wntuo .AND. wntvas ) THEN
356 *
357 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358 * 'A')
359 *
360  wrkbl = n + lwork_sgeqrf
361  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
362  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
363  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
364  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
365  wrkbl = max( wrkbl, bdspac )
366  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
367  minwrk = max( 3*n+m, bdspac )
368  ELSE IF( wntus .AND. wntvn ) THEN
369 *
370 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371 *
372  wrkbl = n + lwork_sgeqrf
373  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
374  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
375  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
376  wrkbl = max( wrkbl, bdspac )
377  maxwrk = n*n + wrkbl
378  minwrk = max( 3*n+m, bdspac )
379  ELSE IF( wntus .AND. wntvo ) THEN
380 *
381 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382 *
383  wrkbl = n + lwork_sgeqrf
384  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
385  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
386  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
387  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
388  wrkbl = max( wrkbl, bdspac )
389  maxwrk = 2*n*n + wrkbl
390  minwrk = max( 3*n+m, bdspac )
391  ELSE IF( wntus .AND. wntvas ) THEN
392 *
393 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394 * 'A')
395 *
396  wrkbl = n + lwork_sgeqrf
397  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
398  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
399  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
400  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
401  wrkbl = max( wrkbl, bdspac )
402  maxwrk = n*n + wrkbl
403  minwrk = max( 3*n+m, bdspac )
404  ELSE IF( wntua .AND. wntvn ) THEN
405 *
406 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407 *
408  wrkbl = n + lwork_sgeqrf
409  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
410  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
411  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
412  wrkbl = max( wrkbl, bdspac )
413  maxwrk = n*n + wrkbl
414  minwrk = max( 3*n+m, bdspac )
415  ELSE IF( wntua .AND. wntvo ) THEN
416 *
417 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418 *
419  wrkbl = n + lwork_sgeqrf
420  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
421  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
422  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
423  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
424  wrkbl = max( wrkbl, bdspac )
425  maxwrk = 2*n*n + wrkbl
426  minwrk = max( 3*n+m, bdspac )
427  ELSE IF( wntua .AND. wntvas ) THEN
428 *
429 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430 * 'A')
431 *
432  wrkbl = n + lwork_sgeqrf
433  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
434  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
435  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
436  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
437  wrkbl = max( wrkbl, bdspac )
438  maxwrk = n*n + wrkbl
439  minwrk = max( 3*n+m, bdspac )
440  END IF
441  ELSE
442 *
443 * Path 10 (M at least N, but not much larger)
444 *
445  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
446  $ dum(1), dum(1), -1, ierr )
447  lwork_sgebrd = int( dum(1) )
448  maxwrk = 3*n + lwork_sgebrd
449  IF( wntus .OR. wntuo ) THEN
450  CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
451  $ dum(1), -1, ierr )
452  lwork_sorgbr_q = int( dum(1) )
453  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
454  END IF
455  IF( wntua ) THEN
456  CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
457  $ dum(1), -1, ierr )
458  lwork_sorgbr_q = int( dum(1) )
459  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
460  END IF
461  IF( .NOT.wntvn ) THEN
462  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
463  END IF
464  maxwrk = max( maxwrk, bdspac )
465  minwrk = max( 3*n+m, bdspac )
466  END IF
467  ELSE IF( minmn.GT.0 ) THEN
468 *
469 * Compute space needed for SBDSQR
470 *
471  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
472  bdspac = 5*m
473 * Compute space needed for SGELQF
474  CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475  lwork_sgelqf = int( dum(1) )
476 * Compute space needed for SORGLQ
477  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478  lwork_sorglq_n = int( dum(1) )
479  CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480  lwork_sorglq_m = int( dum(1) )
481 * Compute space needed for SGEBRD
482  CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
483  $ dum(1), dum(1), -1, ierr )
484  lwork_sgebrd = int( dum(1) )
485 * Compute space needed for SORGBR P
486  CALL sorgbr( 'P', m, m, m, a, n, dum(1),
487  $ dum(1), -1, ierr )
488  lwork_sorgbr_p = int( dum(1) )
489 * Compute space needed for SORGBR Q
490  CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
491  $ dum(1), -1, ierr )
492  lwork_sorgbr_q = int( dum(1) )
493  IF( n.GE.mnthr ) THEN
494  IF( wntvn ) THEN
495 *
496 * Path 1t(N much larger than M, JOBVT='N')
497 *
498  maxwrk = m + lwork_sgelqf
499  maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
500  IF( wntuo .OR. wntuas )
501  $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
502  maxwrk = max( maxwrk, bdspac )
503  minwrk = max( 4*m, bdspac )
504  ELSE IF( wntvo .AND. wntun ) THEN
505 *
506 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507 *
508  wrkbl = m + lwork_sgelqf
509  wrkbl = max( wrkbl, m+lwork_sorglq_m )
510  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
511  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
512  wrkbl = max( wrkbl, bdspac )
513  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
514  minwrk = max( 3*m+n, bdspac )
515  ELSE IF( wntvo .AND. wntuas ) THEN
516 *
517 * Path 3t(N much larger than M, JOBU='S' or 'A',
518 * JOBVT='O')
519 *
520  wrkbl = m + lwork_sgelqf
521  wrkbl = max( wrkbl, m+lwork_sorglq_m )
522  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
523  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
524  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
525  wrkbl = max( wrkbl, bdspac )
526  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
527  minwrk = max( 3*m+n, bdspac )
528  ELSE IF( wntvs .AND. wntun ) THEN
529 *
530 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531 *
532  wrkbl = m + lwork_sgelqf
533  wrkbl = max( wrkbl, m+lwork_sorglq_m )
534  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
535  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
536  wrkbl = max( wrkbl, bdspac )
537  maxwrk = m*m + wrkbl
538  minwrk = max( 3*m+n, bdspac )
539  ELSE IF( wntvs .AND. wntuo ) THEN
540 *
541 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542 *
543  wrkbl = m + lwork_sgelqf
544  wrkbl = max( wrkbl, m+lwork_sorglq_m )
545  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
546  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
547  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
548  wrkbl = max( wrkbl, bdspac )
549  maxwrk = 2*m*m + wrkbl
550  minwrk = max( 3*m+n, bdspac )
551  maxwrk = max( maxwrk, minwrk )
552  ELSE IF( wntvs .AND. wntuas ) THEN
553 *
554 * Path 6t(N much larger than M, JOBU='S' or 'A',
555 * JOBVT='S')
556 *
557  wrkbl = m + lwork_sgelqf
558  wrkbl = max( wrkbl, m+lwork_sorglq_m )
559  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
560  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
561  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
562  wrkbl = max( wrkbl, bdspac )
563  maxwrk = m*m + wrkbl
564  minwrk = max( 3*m+n, bdspac )
565  ELSE IF( wntva .AND. wntun ) THEN
566 *
567 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
568 *
569  wrkbl = m + lwork_sgelqf
570  wrkbl = max( wrkbl, m+lwork_sorglq_n )
571  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
572  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
573  wrkbl = max( wrkbl, bdspac )
574  maxwrk = m*m + wrkbl
575  minwrk = max( 3*m+n, bdspac )
576  ELSE IF( wntva .AND. wntuo ) THEN
577 *
578 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
579 *
580  wrkbl = m + lwork_sgelqf
581  wrkbl = max( wrkbl, m+lwork_sorglq_n )
582  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
583  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
584  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
585  wrkbl = max( wrkbl, bdspac )
586  maxwrk = 2*m*m + wrkbl
587  minwrk = max( 3*m+n, bdspac )
588  ELSE IF( wntva .AND. wntuas ) THEN
589 *
590 * Path 9t(N much larger than M, JOBU='S' or 'A',
591 * JOBVT='A')
592 *
593  wrkbl = m + lwork_sgelqf
594  wrkbl = max( wrkbl, m+lwork_sorglq_n )
595  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
596  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
597  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
598  wrkbl = max( wrkbl, bdspac )
599  maxwrk = m*m + wrkbl
600  minwrk = max( 3*m+n, bdspac )
601  END IF
602  ELSE
603 *
604 * Path 10t(N greater than M, but not much larger)
605 *
606  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
607  $ dum(1), dum(1), -1, ierr )
608  lwork_sgebrd = int( dum(1) )
609  maxwrk = 3*m + lwork_sgebrd
610  IF( wntvs .OR. wntvo ) THEN
611 * Compute space needed for SORGBR P
612  CALL sorgbr( 'P', m, n, m, a, n, dum(1),
613  $ dum(1), -1, ierr )
614  lwork_sorgbr_p = int( dum(1) )
615  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
616  END IF
617  IF( wntva ) THEN
618  CALL sorgbr( 'P', n, n, m, a, n, dum(1),
619  $ dum(1), -1, ierr )
620  lwork_sorgbr_p = int( dum(1) )
621  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
622  END IF
623  IF( .NOT.wntun ) THEN
624  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
625  END IF
626  maxwrk = max( maxwrk, bdspac )
627  minwrk = max( 3*m+n, bdspac )
628  END IF
629  END IF
630  maxwrk = max( maxwrk, minwrk )
631  work( 1 ) = maxwrk
632 *
633  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
634  info = -13
635  END IF
636  END IF
637 *
638  IF( info.NE.0 ) THEN
639  CALL xerbla( 'SGESVD', -info )
640  RETURN
641  ELSE IF( lquery ) THEN
642  RETURN
643  END IF
644 *
645 * Quick return if possible
646 *
647  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
648  RETURN
649  END IF
650 *
651 * Get machine constants
652 *
653  eps = slamch( 'P' )
654  smlnum = sqrt( slamch( 'S' ) ) / eps
655  bignum = one / smlnum
656 *
657 * Scale A if max element outside range [SMLNUM,BIGNUM]
658 *
659  anrm = slange( 'M', m, n, a, lda, dum )
660  iscl = 0
661  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
662  iscl = 1
663  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
664  ELSE IF( anrm.GT.bignum ) THEN
665  iscl = 1
666  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
667  END IF
668 *
669  IF( m.GE.n ) THEN
670 *
671 * A has at least as many rows as columns. If A has sufficiently
672 * more rows than columns, first reduce using the QR
673 * decomposition (if sufficient workspace available)
674 *
675  IF( m.GE.mnthr ) THEN
676 *
677  IF( wntun ) THEN
678 *
679 * Path 1 (M much larger than N, JOBU='N')
680 * No left singular vectors to be computed
681 *
682  itau = 1
683  iwork = itau + n
684 *
685 * Compute A=Q*R
686 * (Workspace: need 2*N, prefer N+N*NB)
687 *
688  CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
689  $ lwork-iwork+1, ierr )
690 *
691 * Zero out below R
692 *
693  IF( n .GT. 1 ) THEN
694  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
695  $ lda )
696  END IF
697  ie = 1
698  itauq = ie + n
699  itaup = itauq + n
700  iwork = itaup + n
701 *
702 * Bidiagonalize R in A
703 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
704 *
705  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706  $ work( itaup ), work( iwork ), lwork-iwork+1,
707  $ ierr )
708  ncvt = 0
709  IF( wntvo .OR. wntvas ) THEN
710 *
711 * If right singular vectors desired, generate P'.
712 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713 *
714  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715  $ work( iwork ), lwork-iwork+1, ierr )
716  ncvt = n
717  END IF
718  iwork = ie + n
719 *
720 * Perform bidiagonal QR iteration, computing right
721 * singular vectors of A in A if desired
722 * (Workspace: need BDSPAC)
723 *
724  CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725  $ dum, 1, dum, 1, work( iwork ), info )
726 *
727 * If right singular vectors desired in VT, copy them there
728 *
729  IF( wntvas )
730  $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731 *
732  ELSE IF( wntuo .AND. wntvn ) THEN
733 *
734 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735 * N left singular vectors to be overwritten on A and
736 * no right singular vectors to be computed
737 *
738  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739 *
740 * Sufficient workspace for a fast algorithm
741 *
742  ir = 1
743  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744 *
745 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
746 *
747  ldwrku = lda
748  ldwrkr = lda
749  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750 *
751 * WORK(IU) is LDA by N, WORK(IR) is N by N
752 *
753  ldwrku = lda
754  ldwrkr = n
755  ELSE
756 *
757 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758 *
759  ldwrku = ( lwork-n*n-n ) / n
760  ldwrkr = n
761  END IF
762  itau = ir + ldwrkr*n
763  iwork = itau + n
764 *
765 * Compute A=Q*R
766 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767 *
768  CALL sgeqrf( m, n, a, lda, work( itau ),
769  $ work( iwork ), lwork-iwork+1, ierr )
770 *
771 * Copy R to WORK(IR) and zero out below it
772 *
773  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774  CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775  $ ldwrkr )
776 *
777 * Generate Q in A
778 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779 *
780  CALL sorgqr( m, n, n, a, lda, work( itau ),
781  $ work( iwork ), lwork-iwork+1, ierr )
782  ie = itau
783  itauq = ie + n
784  itaup = itauq + n
785  iwork = itaup + n
786 *
787 * Bidiagonalize R in WORK(IR)
788 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789 *
790  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791  $ work( itauq ), work( itaup ),
792  $ work( iwork ), lwork-iwork+1, ierr )
793 *
794 * Generate left vectors bidiagonalizing R
795 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796 *
797  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798  $ work( itauq ), work( iwork ),
799  $ lwork-iwork+1, ierr )
800  iwork = ie + n
801 *
802 * Perform bidiagonal QR iteration, computing left
803 * singular vectors of R in WORK(IR)
804 * (Workspace: need N*N+BDSPAC)
805 *
806  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807  $ work( ir ), ldwrkr, dum, 1,
808  $ work( iwork ), info )
809  iu = ie + n
810 *
811 * Multiply Q in A by left singular vectors of R in
812 * WORK(IR), storing result in WORK(IU) and copying to A
813 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814 *
815  DO 10 i = 1, m, ldwrku
816  chunk = min( m-i+1, ldwrku )
817  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818  $ lda, work( ir ), ldwrkr, zero,
819  $ work( iu ), ldwrku )
820  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821  $ a( i, 1 ), lda )
822  10 CONTINUE
823 *
824  ELSE
825 *
826 * Insufficient workspace for a fast algorithm
827 *
828  ie = 1
829  itauq = ie + n
830  itaup = itauq + n
831  iwork = itaup + n
832 *
833 * Bidiagonalize A
834 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835 *
836  CALL sgebrd( m, n, a, lda, s, work( ie ),
837  $ work( itauq ), work( itaup ),
838  $ work( iwork ), lwork-iwork+1, ierr )
839 *
840 * Generate left vectors bidiagonalizing A
841 * (Workspace: need 4*N, prefer 3*N+N*NB)
842 *
843  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844  $ work( iwork ), lwork-iwork+1, ierr )
845  iwork = ie + n
846 *
847 * Perform bidiagonal QR iteration, computing left
848 * singular vectors of A in A
849 * (Workspace: need BDSPAC)
850 *
851  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852  $ a, lda, dum, 1, work( iwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n-n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891 *
892  CALL sgeqrf( m, n, a, lda, work( itau ),
893  $ work( iwork ), lwork-iwork+1, ierr )
894 *
895 * Copy R to VT, zeroing out below it
896 *
897  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898  IF( n.GT.1 )
899  $ CALL slaset( 'L', n-1, n-1, zero, zero,
900  $ vt( 2, 1 ), ldvt )
901 *
902 * Generate Q in A
903 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904 *
905  CALL sorgqr( m, n, n, a, lda, work( itau ),
906  $ work( iwork ), lwork-iwork+1, ierr )
907  ie = itau
908  itauq = ie + n
909  itaup = itauq + n
910  iwork = itaup + n
911 *
912 * Bidiagonalize R in VT, copying result to WORK(IR)
913 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914 *
915  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916  $ work( itauq ), work( itaup ),
917  $ work( iwork ), lwork-iwork+1, ierr )
918  CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919 *
920 * Generate left vectors bidiagonalizing R in WORK(IR)
921 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922 *
923  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924  $ work( itauq ), work( iwork ),
925  $ lwork-iwork+1, ierr )
926 *
927 * Generate right vectors bidiagonalizing R in VT
928 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929 *
930  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931  $ work( iwork ), lwork-iwork+1, ierr )
932  iwork = ie + n
933 *
934 * Perform bidiagonal QR iteration, computing left
935 * singular vectors of R in WORK(IR) and computing right
936 * singular vectors of R in VT
937 * (Workspace: need N*N+BDSPAC)
938 *
939  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940  $ work( ir ), ldwrkr, dum, 1,
941  $ work( iwork ), info )
942  iu = ie + n
943 *
944 * Multiply Q in A by left singular vectors of R in
945 * WORK(IR), storing result in WORK(IU) and copying to A
946 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947 *
948  DO 20 i = 1, m, ldwrku
949  chunk = min( m-i+1, ldwrku )
950  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951  $ lda, work( ir ), ldwrkr, zero,
952  $ work( iu ), ldwrku )
953  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954  $ a( i, 1 ), lda )
955  20 CONTINUE
956 *
957  ELSE
958 *
959 * Insufficient workspace for a fast algorithm
960 *
961  itau = 1
962  iwork = itau + n
963 *
964 * Compute A=Q*R
965 * (Workspace: need 2*N, prefer N+N*NB)
966 *
967  CALL sgeqrf( m, n, a, lda, work( itau ),
968  $ work( iwork ), lwork-iwork+1, ierr )
969 *
970 * Copy R to VT, zeroing out below it
971 *
972  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973  IF( n.GT.1 )
974  $ CALL slaset( 'L', n-1, n-1, zero, zero,
975  $ vt( 2, 1 ), ldvt )
976 *
977 * Generate Q in A
978 * (Workspace: need 2*N, prefer N+N*NB)
979 *
980  CALL sorgqr( m, n, n, a, lda, work( itau ),
981  $ work( iwork ), lwork-iwork+1, ierr )
982  ie = itau
983  itauq = ie + n
984  itaup = itauq + n
985  iwork = itaup + n
986 *
987 * Bidiagonalize R in VT
988 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
989 *
990  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991  $ work( itauq ), work( itaup ),
992  $ work( iwork ), lwork-iwork+1, ierr )
993 *
994 * Multiply Q in A by left vectors bidiagonalizing R
995 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
996 *
997  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998  $ work( itauq ), a, lda, work( iwork ),
999  $ lwork-iwork+1, ierr )
1000 *
1001 * Generate right vectors bidiagonalizing R in VT
1002 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003 *
1004  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005  $ work( iwork ), lwork-iwork+1, ierr )
1006  iwork = ie + n
1007 *
1008 * Perform bidiagonal QR iteration, computing left
1009 * singular vectors of A in A and computing right
1010 * singular vectors of A in VT
1011 * (Workspace: need BDSPAC)
1012 *
1013  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014  $ a, lda, dum, 1, work( iwork ), info )
1015 *
1016  END IF
1017 *
1018  ELSE IF( wntus ) THEN
1019 *
1020  IF( wntvn ) THEN
1021 *
1022 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023 * N left singular vectors to be computed in U and
1024 * no right singular vectors to be computed
1025 *
1026  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027 *
1028 * Sufficient workspace for a fast algorithm
1029 *
1030  ir = 1
1031  IF( lwork.GE.wrkbl+lda*n ) THEN
1032 *
1033 * WORK(IR) is LDA by N
1034 *
1035  ldwrkr = lda
1036  ELSE
1037 *
1038 * WORK(IR) is N by N
1039 *
1040  ldwrkr = n
1041  END IF
1042  itau = ir + ldwrkr*n
1043  iwork = itau + n
1044 *
1045 * Compute A=Q*R
1046 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047 *
1048  CALL sgeqrf( m, n, a, lda, work( itau ),
1049  $ work( iwork ), lwork-iwork+1, ierr )
1050 *
1051 * Copy R to WORK(IR), zeroing out below it
1052 *
1053  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054  $ ldwrkr )
1055  CALL slaset( 'L', n-1, n-1, zero, zero,
1056  $ work( ir+1 ), ldwrkr )
1057 *
1058 * Generate Q in A
1059 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060 *
1061  CALL sorgqr( m, n, n, a, lda, work( itau ),
1062  $ work( iwork ), lwork-iwork+1, ierr )
1063  ie = itau
1064  itauq = ie + n
1065  itaup = itauq + n
1066  iwork = itaup + n
1067 *
1068 * Bidiagonalize R in WORK(IR)
1069 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070 *
1071  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072  $ work( ie ), work( itauq ),
1073  $ work( itaup ), work( iwork ),
1074  $ lwork-iwork+1, ierr )
1075 *
1076 * Generate left vectors bidiagonalizing R in WORK(IR)
1077 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078 *
1079  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080  $ work( itauq ), work( iwork ),
1081  $ lwork-iwork+1, ierr )
1082  iwork = ie + n
1083 *
1084 * Perform bidiagonal QR iteration, computing left
1085 * singular vectors of R in WORK(IR)
1086 * (Workspace: need N*N+BDSPAC)
1087 *
1088  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089  $ 1, work( ir ), ldwrkr, dum, 1,
1090  $ work( iwork ), info )
1091 *
1092 * Multiply Q in A by left singular vectors of R in
1093 * WORK(IR), storing result in U
1094 * (Workspace: need N*N)
1095 *
1096  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097  $ work( ir ), ldwrkr, zero, u, ldu )
1098 *
1099  ELSE
1100 *
1101 * Insufficient workspace for a fast algorithm
1102 *
1103  itau = 1
1104  iwork = itau + n
1105 *
1106 * Compute A=Q*R, copying result to U
1107 * (Workspace: need 2*N, prefer N+N*NB)
1108 *
1109  CALL sgeqrf( m, n, a, lda, work( itau ),
1110  $ work( iwork ), lwork-iwork+1, ierr )
1111  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112 *
1113 * Generate Q in U
1114 * (Workspace: need 2*N, prefer N+N*NB)
1115 *
1116  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117  $ work( iwork ), lwork-iwork+1, ierr )
1118  ie = itau
1119  itauq = ie + n
1120  itaup = itauq + n
1121  iwork = itaup + n
1122 *
1123 * Zero out below R in A
1124 *
1125  IF( n .GT. 1 ) THEN
1126  CALL slaset( 'L', n-1, n-1, zero, zero,
1127  $ a( 2, 1 ), lda )
1128  END IF
1129 *
1130 * Bidiagonalize R in A
1131 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1132 *
1133  CALL sgebrd( n, n, a, lda, s, work( ie ),
1134  $ work( itauq ), work( itaup ),
1135  $ work( iwork ), lwork-iwork+1, ierr )
1136 *
1137 * Multiply Q in U by left vectors bidiagonalizing R
1138 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1139 *
1140  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1141  $ work( itauq ), u, ldu, work( iwork ),
1142  $ lwork-iwork+1, ierr )
1143  iwork = ie + n
1144 *
1145 * Perform bidiagonal QR iteration, computing left
1146 * singular vectors of A in U
1147 * (Workspace: need BDSPAC)
1148 *
1149  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1150  $ 1, u, ldu, dum, 1, work( iwork ),
1151  $ info )
1152 *
1153  END IF
1154 *
1155  ELSE IF( wntvo ) THEN
1156 *
1157 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1158 * N left singular vectors to be computed in U and
1159 * N right singular vectors to be overwritten on A
1160 *
1161  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1162 *
1163 * Sufficient workspace for a fast algorithm
1164 *
1165  iu = 1
1166  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1167 *
1168 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1169 *
1170  ldwrku = lda
1171  ir = iu + ldwrku*n
1172  ldwrkr = lda
1173  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1174 *
1175 * WORK(IU) is LDA by N and WORK(IR) is N by N
1176 *
1177  ldwrku = lda
1178  ir = iu + ldwrku*n
1179  ldwrkr = n
1180  ELSE
1181 *
1182 * WORK(IU) is N by N and WORK(IR) is N by N
1183 *
1184  ldwrku = n
1185  ir = iu + ldwrku*n
1186  ldwrkr = n
1187  END IF
1188  itau = ir + ldwrkr*n
1189  iwork = itau + n
1190 *
1191 * Compute A=Q*R
1192 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1193 *
1194  CALL sgeqrf( m, n, a, lda, work( itau ),
1195  $ work( iwork ), lwork-iwork+1, ierr )
1196 *
1197 * Copy R to WORK(IU), zeroing out below it
1198 *
1199  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1200  $ ldwrku )
1201  CALL slaset( 'L', n-1, n-1, zero, zero,
1202  $ work( iu+1 ), ldwrku )
1203 *
1204 * Generate Q in A
1205 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1206 *
1207  CALL sorgqr( m, n, n, a, lda, work( itau ),
1208  $ work( iwork ), lwork-iwork+1, ierr )
1209  ie = itau
1210  itauq = ie + n
1211  itaup = itauq + n
1212  iwork = itaup + n
1213 *
1214 * Bidiagonalize R in WORK(IU), copying result to
1215 * WORK(IR)
1216 * (Workspace: need 2*N*N+4*N,
1217 * prefer 2*N*N+3*N+2*N*NB)
1218 *
1219  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1220  $ work( ie ), work( itauq ),
1221  $ work( itaup ), work( iwork ),
1222  $ lwork-iwork+1, ierr )
1223  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1224  $ work( ir ), ldwrkr )
1225 *
1226 * Generate left bidiagonalizing vectors in WORK(IU)
1227 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1228 *
1229  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1230  $ work( itauq ), work( iwork ),
1231  $ lwork-iwork+1, ierr )
1232 *
1233 * Generate right bidiagonalizing vectors in WORK(IR)
1234 * (Workspace: need 2*N*N+4*N-1,
1235 * prefer 2*N*N+3*N+(N-1)*NB)
1236 *
1237  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1238  $ work( itaup ), work( iwork ),
1239  $ lwork-iwork+1, ierr )
1240  iwork = ie + n
1241 *
1242 * Perform bidiagonal QR iteration, computing left
1243 * singular vectors of R in WORK(IU) and computing
1244 * right singular vectors of R in WORK(IR)
1245 * (Workspace: need 2*N*N+BDSPAC)
1246 *
1247  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1248  $ work( ir ), ldwrkr, work( iu ),
1249  $ ldwrku, dum, 1, work( iwork ), info )
1250 *
1251 * Multiply Q in A by left singular vectors of R in
1252 * WORK(IU), storing result in U
1253 * (Workspace: need N*N)
1254 *
1255  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1256  $ work( iu ), ldwrku, zero, u, ldu )
1257 *
1258 * Copy right singular vectors of R to A
1259 * (Workspace: need N*N)
1260 *
1261  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1262  $ lda )
1263 *
1264  ELSE
1265 *
1266 * Insufficient workspace for a fast algorithm
1267 *
1268  itau = 1
1269  iwork = itau + n
1270 *
1271 * Compute A=Q*R, copying result to U
1272 * (Workspace: need 2*N, prefer N+N*NB)
1273 *
1274  CALL sgeqrf( m, n, a, lda, work( itau ),
1275  $ work( iwork ), lwork-iwork+1, ierr )
1276  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1277 *
1278 * Generate Q in U
1279 * (Workspace: need 2*N, prefer N+N*NB)
1280 *
1281  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1282  $ work( iwork ), lwork-iwork+1, ierr )
1283  ie = itau
1284  itauq = ie + n
1285  itaup = itauq + n
1286  iwork = itaup + n
1287 *
1288 * Zero out below R in A
1289 *
1290  IF( n .GT. 1 ) THEN
1291  CALL slaset( 'L', n-1, n-1, zero, zero,
1292  $ a( 2, 1 ), lda )
1293  END IF
1294 *
1295 * Bidiagonalize R in A
1296 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1297 *
1298  CALL sgebrd( n, n, a, lda, s, work( ie ),
1299  $ work( itauq ), work( itaup ),
1300  $ work( iwork ), lwork-iwork+1, ierr )
1301 *
1302 * Multiply Q in U by left vectors bidiagonalizing R
1303 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1304 *
1305  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1306  $ work( itauq ), u, ldu, work( iwork ),
1307  $ lwork-iwork+1, ierr )
1308 *
1309 * Generate right vectors bidiagonalizing R in A
1310 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1311 *
1312  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1313  $ work( iwork ), lwork-iwork+1, ierr )
1314  iwork = ie + n
1315 *
1316 * Perform bidiagonal QR iteration, computing left
1317 * singular vectors of A in U and computing right
1318 * singular vectors of A in A
1319 * (Workspace: need BDSPAC)
1320 *
1321  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1322  $ lda, u, ldu, dum, 1, work( iwork ),
1323  $ info )
1324 *
1325  END IF
1326 *
1327  ELSE IF( wntvas ) THEN
1328 *
1329 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1330 * or 'A')
1331 * N left singular vectors to be computed in U and
1332 * N right singular vectors to be computed in VT
1333 *
1334  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1335 *
1336 * Sufficient workspace for a fast algorithm
1337 *
1338  iu = 1
1339  IF( lwork.GE.wrkbl+lda*n ) THEN
1340 *
1341 * WORK(IU) is LDA by N
1342 *
1343  ldwrku = lda
1344  ELSE
1345 *
1346 * WORK(IU) is N by N
1347 *
1348  ldwrku = n
1349  END IF
1350  itau = iu + ldwrku*n
1351  iwork = itau + n
1352 *
1353 * Compute A=Q*R
1354 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1355 *
1356  CALL sgeqrf( m, n, a, lda, work( itau ),
1357  $ work( iwork ), lwork-iwork+1, ierr )
1358 *
1359 * Copy R to WORK(IU), zeroing out below it
1360 *
1361  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1362  $ ldwrku )
1363  CALL slaset( 'L', n-1, n-1, zero, zero,
1364  $ work( iu+1 ), ldwrku )
1365 *
1366 * Generate Q in A
1367 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1368 *
1369  CALL sorgqr( m, n, n, a, lda, work( itau ),
1370  $ work( iwork ), lwork-iwork+1, ierr )
1371  ie = itau
1372  itauq = ie + n
1373  itaup = itauq + n
1374  iwork = itaup + n
1375 *
1376 * Bidiagonalize R in WORK(IU), copying result to VT
1377 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1378 *
1379  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1380  $ work( ie ), work( itauq ),
1381  $ work( itaup ), work( iwork ),
1382  $ lwork-iwork+1, ierr )
1383  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1384  $ ldvt )
1385 *
1386 * Generate left bidiagonalizing vectors in WORK(IU)
1387 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1388 *
1389  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1390  $ work( itauq ), work( iwork ),
1391  $ lwork-iwork+1, ierr )
1392 *
1393 * Generate right bidiagonalizing vectors in VT
1394 * (Workspace: need N*N+4*N-1,
1395 * prefer N*N+3*N+(N-1)*NB)
1396 *
1397  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1398  $ work( iwork ), lwork-iwork+1, ierr )
1399  iwork = ie + n
1400 *
1401 * Perform bidiagonal QR iteration, computing left
1402 * singular vectors of R in WORK(IU) and computing
1403 * right singular vectors of R in VT
1404 * (Workspace: need N*N+BDSPAC)
1405 *
1406  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1407  $ ldvt, work( iu ), ldwrku, dum, 1,
1408  $ work( iwork ), info )
1409 *
1410 * Multiply Q in A by left singular vectors of R in
1411 * WORK(IU), storing result in U
1412 * (Workspace: need N*N)
1413 *
1414  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1415  $ work( iu ), ldwrku, zero, u, ldu )
1416 *
1417  ELSE
1418 *
1419 * Insufficient workspace for a fast algorithm
1420 *
1421  itau = 1
1422  iwork = itau + n
1423 *
1424 * Compute A=Q*R, copying result to U
1425 * (Workspace: need 2*N, prefer N+N*NB)
1426 *
1427  CALL sgeqrf( m, n, a, lda, work( itau ),
1428  $ work( iwork ), lwork-iwork+1, ierr )
1429  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1430 *
1431 * Generate Q in U
1432 * (Workspace: need 2*N, prefer N+N*NB)
1433 *
1434  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1435  $ work( iwork ), lwork-iwork+1, ierr )
1436 *
1437 * Copy R to VT, zeroing out below it
1438 *
1439  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1440  IF( n.GT.1 )
1441  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1442  $ vt( 2, 1 ), ldvt )
1443  ie = itau
1444  itauq = ie + n
1445  itaup = itauq + n
1446  iwork = itaup + n
1447 *
1448 * Bidiagonalize R in VT
1449 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1450 *
1451  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1452  $ work( itauq ), work( itaup ),
1453  $ work( iwork ), lwork-iwork+1, ierr )
1454 *
1455 * Multiply Q in U by left bidiagonalizing vectors
1456 * in VT
1457 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1458 *
1459  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1460  $ work( itauq ), u, ldu, work( iwork ),
1461  $ lwork-iwork+1, ierr )
1462 *
1463 * Generate right bidiagonalizing vectors in VT
1464 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1465 *
1466  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1467  $ work( iwork ), lwork-iwork+1, ierr )
1468  iwork = ie + n
1469 *
1470 * Perform bidiagonal QR iteration, computing left
1471 * singular vectors of A in U and computing right
1472 * singular vectors of A in VT
1473 * (Workspace: need BDSPAC)
1474 *
1475  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1476  $ ldvt, u, ldu, dum, 1, work( iwork ),
1477  $ info )
1478 *
1479  END IF
1480 *
1481  END IF
1482 *
1483  ELSE IF( wntua ) THEN
1484 *
1485  IF( wntvn ) THEN
1486 *
1487 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1488 * M left singular vectors to be computed in U and
1489 * no right singular vectors to be computed
1490 *
1491  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1492 *
1493 * Sufficient workspace for a fast algorithm
1494 *
1495  ir = 1
1496  IF( lwork.GE.wrkbl+lda*n ) THEN
1497 *
1498 * WORK(IR) is LDA by N
1499 *
1500  ldwrkr = lda
1501  ELSE
1502 *
1503 * WORK(IR) is N by N
1504 *
1505  ldwrkr = n
1506  END IF
1507  itau = ir + ldwrkr*n
1508  iwork = itau + n
1509 *
1510 * Compute A=Q*R, copying result to U
1511 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1512 *
1513  CALL sgeqrf( m, n, a, lda, work( itau ),
1514  $ work( iwork ), lwork-iwork+1, ierr )
1515  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1516 *
1517 * Copy R to WORK(IR), zeroing out below it
1518 *
1519  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1520  $ ldwrkr )
1521  CALL slaset( 'L', n-1, n-1, zero, zero,
1522  $ work( ir+1 ), ldwrkr )
1523 *
1524 * Generate Q in U
1525 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1526 *
1527  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1528  $ work( iwork ), lwork-iwork+1, ierr )
1529  ie = itau
1530  itauq = ie + n
1531  itaup = itauq + n
1532  iwork = itaup + n
1533 *
1534 * Bidiagonalize R in WORK(IR)
1535 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1536 *
1537  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1538  $ work( ie ), work( itauq ),
1539  $ work( itaup ), work( iwork ),
1540  $ lwork-iwork+1, ierr )
1541 *
1542 * Generate left bidiagonalizing vectors in WORK(IR)
1543 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1544 *
1545  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1546  $ work( itauq ), work( iwork ),
1547  $ lwork-iwork+1, ierr )
1548  iwork = ie + n
1549 *
1550 * Perform bidiagonal QR iteration, computing left
1551 * singular vectors of R in WORK(IR)
1552 * (Workspace: need N*N+BDSPAC)
1553 *
1554  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1555  $ 1, work( ir ), ldwrkr, dum, 1,
1556  $ work( iwork ), info )
1557 *
1558 * Multiply Q in U by left singular vectors of R in
1559 * WORK(IR), storing result in A
1560 * (Workspace: need N*N)
1561 *
1562  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1563  $ work( ir ), ldwrkr, zero, a, lda )
1564 *
1565 * Copy left singular vectors of A from A to U
1566 *
1567  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1568 *
1569  ELSE
1570 *
1571 * Insufficient workspace for a fast algorithm
1572 *
1573  itau = 1
1574  iwork = itau + n
1575 *
1576 * Compute A=Q*R, copying result to U
1577 * (Workspace: need 2*N, prefer N+N*NB)
1578 *
1579  CALL sgeqrf( m, n, a, lda, work( itau ),
1580  $ work( iwork ), lwork-iwork+1, ierr )
1581  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1582 *
1583 * Generate Q in U
1584 * (Workspace: need N+M, prefer N+M*NB)
1585 *
1586  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1587  $ work( iwork ), lwork-iwork+1, ierr )
1588  ie = itau
1589  itauq = ie + n
1590  itaup = itauq + n
1591  iwork = itaup + n
1592 *
1593 * Zero out below R in A
1594 *
1595  IF( n .GT. 1 ) THEN
1596  CALL slaset( 'L', n-1, n-1, zero, zero,
1597  $ a( 2, 1 ), lda )
1598  END IF
1599 *
1600 * Bidiagonalize R in A
1601 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1602 *
1603  CALL sgebrd( n, n, a, lda, s, work( ie ),
1604  $ work( itauq ), work( itaup ),
1605  $ work( iwork ), lwork-iwork+1, ierr )
1606 *
1607 * Multiply Q in U by left bidiagonalizing vectors
1608 * in A
1609 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1610 *
1611  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1612  $ work( itauq ), u, ldu, work( iwork ),
1613  $ lwork-iwork+1, ierr )
1614  iwork = ie + n
1615 *
1616 * Perform bidiagonal QR iteration, computing left
1617 * singular vectors of A in U
1618 * (Workspace: need BDSPAC)
1619 *
1620  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1621  $ 1, u, ldu, dum, 1, work( iwork ),
1622  $ info )
1623 *
1624  END IF
1625 *
1626  ELSE IF( wntvo ) THEN
1627 *
1628 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1629 * M left singular vectors to be computed in U and
1630 * N right singular vectors to be overwritten on A
1631 *
1632  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1633 *
1634 * Sufficient workspace for a fast algorithm
1635 *
1636  iu = 1
1637  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1638 *
1639 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1640 *
1641  ldwrku = lda
1642  ir = iu + ldwrku*n
1643  ldwrkr = lda
1644  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1645 *
1646 * WORK(IU) is LDA by N and WORK(IR) is N by N
1647 *
1648  ldwrku = lda
1649  ir = iu + ldwrku*n
1650  ldwrkr = n
1651  ELSE
1652 *
1653 * WORK(IU) is N by N and WORK(IR) is N by N
1654 *
1655  ldwrku = n
1656  ir = iu + ldwrku*n
1657  ldwrkr = n
1658  END IF
1659  itau = ir + ldwrkr*n
1660  iwork = itau + n
1661 *
1662 * Compute A=Q*R, copying result to U
1663 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1664 *
1665  CALL sgeqrf( m, n, a, lda, work( itau ),
1666  $ work( iwork ), lwork-iwork+1, ierr )
1667  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1668 *
1669 * Generate Q in U
1670 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1671 *
1672  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1673  $ work( iwork ), lwork-iwork+1, ierr )
1674 *
1675 * Copy R to WORK(IU), zeroing out below it
1676 *
1677  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1678  $ ldwrku )
1679  CALL slaset( 'L', n-1, n-1, zero, zero,
1680  $ work( iu+1 ), ldwrku )
1681  ie = itau
1682  itauq = ie + n
1683  itaup = itauq + n
1684  iwork = itaup + n
1685 *
1686 * Bidiagonalize R in WORK(IU), copying result to
1687 * WORK(IR)
1688 * (Workspace: need 2*N*N+4*N,
1689 * prefer 2*N*N+3*N+2*N*NB)
1690 *
1691  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1692  $ work( ie ), work( itauq ),
1693  $ work( itaup ), work( iwork ),
1694  $ lwork-iwork+1, ierr )
1695  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1696  $ work( ir ), ldwrkr )
1697 *
1698 * Generate left bidiagonalizing vectors in WORK(IU)
1699 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1700 *
1701  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1702  $ work( itauq ), work( iwork ),
1703  $ lwork-iwork+1, ierr )
1704 *
1705 * Generate right bidiagonalizing vectors in WORK(IR)
1706 * (Workspace: need 2*N*N+4*N-1,
1707 * prefer 2*N*N+3*N+(N-1)*NB)
1708 *
1709  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1710  $ work( itaup ), work( iwork ),
1711  $ lwork-iwork+1, ierr )
1712  iwork = ie + n
1713 *
1714 * Perform bidiagonal QR iteration, computing left
1715 * singular vectors of R in WORK(IU) and computing
1716 * right singular vectors of R in WORK(IR)
1717 * (Workspace: need 2*N*N+BDSPAC)
1718 *
1719  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1720  $ work( ir ), ldwrkr, work( iu ),
1721  $ ldwrku, dum, 1, work( iwork ), info )
1722 *
1723 * Multiply Q in U by left singular vectors of R in
1724 * WORK(IU), storing result in A
1725 * (Workspace: need N*N)
1726 *
1727  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1728  $ work( iu ), ldwrku, zero, a, lda )
1729 *
1730 * Copy left singular vectors of A from A to U
1731 *
1732  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1733 *
1734 * Copy right singular vectors of R from WORK(IR) to A
1735 *
1736  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1737  $ lda )
1738 *
1739  ELSE
1740 *
1741 * Insufficient workspace for a fast algorithm
1742 *
1743  itau = 1
1744  iwork = itau + n
1745 *
1746 * Compute A=Q*R, copying result to U
1747 * (Workspace: need 2*N, prefer N+N*NB)
1748 *
1749  CALL sgeqrf( m, n, a, lda, work( itau ),
1750  $ work( iwork ), lwork-iwork+1, ierr )
1751  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1752 *
1753 * Generate Q in U
1754 * (Workspace: need N+M, prefer N+M*NB)
1755 *
1756  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1757  $ work( iwork ), lwork-iwork+1, ierr )
1758  ie = itau
1759  itauq = ie + n
1760  itaup = itauq + n
1761  iwork = itaup + n
1762 *
1763 * Zero out below R in A
1764 *
1765  IF( n .GT. 1 ) THEN
1766  CALL slaset( 'L', n-1, n-1, zero, zero,
1767  $ a( 2, 1 ), lda )
1768  END IF
1769 *
1770 * Bidiagonalize R in A
1771 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1772 *
1773  CALL sgebrd( n, n, a, lda, s, work( ie ),
1774  $ work( itauq ), work( itaup ),
1775  $ work( iwork ), lwork-iwork+1, ierr )
1776 *
1777 * Multiply Q in U by left bidiagonalizing vectors
1778 * in A
1779 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1780 *
1781  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1782  $ work( itauq ), u, ldu, work( iwork ),
1783  $ lwork-iwork+1, ierr )
1784 *
1785 * Generate right bidiagonalizing vectors in A
1786 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1787 *
1788  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1789  $ work( iwork ), lwork-iwork+1, ierr )
1790  iwork = ie + n
1791 *
1792 * Perform bidiagonal QR iteration, computing left
1793 * singular vectors of A in U and computing right
1794 * singular vectors of A in A
1795 * (Workspace: need BDSPAC)
1796 *
1797  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1798  $ lda, u, ldu, dum, 1, work( iwork ),
1799  $ info )
1800 *
1801  END IF
1802 *
1803  ELSE IF( wntvas ) THEN
1804 *
1805 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1806 * or 'A')
1807 * M left singular vectors to be computed in U and
1808 * N right singular vectors to be computed in VT
1809 *
1810  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1811 *
1812 * Sufficient workspace for a fast algorithm
1813 *
1814  iu = 1
1815  IF( lwork.GE.wrkbl+lda*n ) THEN
1816 *
1817 * WORK(IU) is LDA by N
1818 *
1819  ldwrku = lda
1820  ELSE
1821 *
1822 * WORK(IU) is N by N
1823 *
1824  ldwrku = n
1825  END IF
1826  itau = iu + ldwrku*n
1827  iwork = itau + n
1828 *
1829 * Compute A=Q*R, copying result to U
1830 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1831 *
1832  CALL sgeqrf( m, n, a, lda, work( itau ),
1833  $ work( iwork ), lwork-iwork+1, ierr )
1834  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1835 *
1836 * Generate Q in U
1837 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1838 *
1839  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1840  $ work( iwork ), lwork-iwork+1, ierr )
1841 *
1842 * Copy R to WORK(IU), zeroing out below it
1843 *
1844  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1845  $ ldwrku )
1846  CALL slaset( 'L', n-1, n-1, zero, zero,
1847  $ work( iu+1 ), ldwrku )
1848  ie = itau
1849  itauq = ie + n
1850  itaup = itauq + n
1851  iwork = itaup + n
1852 *
1853 * Bidiagonalize R in WORK(IU), copying result to VT
1854 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1855 *
1856  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1857  $ work( ie ), work( itauq ),
1858  $ work( itaup ), work( iwork ),
1859  $ lwork-iwork+1, ierr )
1860  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1861  $ ldvt )
1862 *
1863 * Generate left bidiagonalizing vectors in WORK(IU)
1864 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1865 *
1866  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1867  $ work( itauq ), work( iwork ),
1868  $ lwork-iwork+1, ierr )
1869 *
1870 * Generate right bidiagonalizing vectors in VT
1871 * (Workspace: need N*N+4*N-1,
1872 * prefer N*N+3*N+(N-1)*NB)
1873 *
1874  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1875  $ work( iwork ), lwork-iwork+1, ierr )
1876  iwork = ie + n
1877 *
1878 * Perform bidiagonal QR iteration, computing left
1879 * singular vectors of R in WORK(IU) and computing
1880 * right singular vectors of R in VT
1881 * (Workspace: need N*N+BDSPAC)
1882 *
1883  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1884  $ ldvt, work( iu ), ldwrku, dum, 1,
1885  $ work( iwork ), info )
1886 *
1887 * Multiply Q in U by left singular vectors of R in
1888 * WORK(IU), storing result in A
1889 * (Workspace: need N*N)
1890 *
1891  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1892  $ work( iu ), ldwrku, zero, a, lda )
1893 *
1894 * Copy left singular vectors of A from A to U
1895 *
1896  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1897 *
1898  ELSE
1899 *
1900 * Insufficient workspace for a fast algorithm
1901 *
1902  itau = 1
1903  iwork = itau + n
1904 *
1905 * Compute A=Q*R, copying result to U
1906 * (Workspace: need 2*N, prefer N+N*NB)
1907 *
1908  CALL sgeqrf( m, n, a, lda, work( itau ),
1909  $ work( iwork ), lwork-iwork+1, ierr )
1910  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1911 *
1912 * Generate Q in U
1913 * (Workspace: need N+M, prefer N+M*NB)
1914 *
1915  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1916  $ work( iwork ), lwork-iwork+1, ierr )
1917 *
1918 * Copy R from A to VT, zeroing out below it
1919 *
1920  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1921  IF( n.GT.1 )
1922  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1923  $ vt( 2, 1 ), ldvt )
1924  ie = itau
1925  itauq = ie + n
1926  itaup = itauq + n
1927  iwork = itaup + n
1928 *
1929 * Bidiagonalize R in VT
1930 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1931 *
1932  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1933  $ work( itauq ), work( itaup ),
1934  $ work( iwork ), lwork-iwork+1, ierr )
1935 *
1936 * Multiply Q in U by left bidiagonalizing vectors
1937 * in VT
1938 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1939 *
1940  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1941  $ work( itauq ), u, ldu, work( iwork ),
1942  $ lwork-iwork+1, ierr )
1943 *
1944 * Generate right bidiagonalizing vectors in VT
1945 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1946 *
1947  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1948  $ work( iwork ), lwork-iwork+1, ierr )
1949  iwork = ie + n
1950 *
1951 * Perform bidiagonal QR iteration, computing left
1952 * singular vectors of A in U and computing right
1953 * singular vectors of A in VT
1954 * (Workspace: need BDSPAC)
1955 *
1956  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1957  $ ldvt, u, ldu, dum, 1, work( iwork ),
1958  $ info )
1959 *
1960  END IF
1961 *
1962  END IF
1963 *
1964  END IF
1965 *
1966  ELSE
1967 *
1968 * M .LT. MNTHR
1969 *
1970 * Path 10 (M at least N, but not much larger)
1971 * Reduce to bidiagonal form without QR decomposition
1972 *
1973  ie = 1
1974  itauq = ie + n
1975  itaup = itauq + n
1976  iwork = itaup + n
1977 *
1978 * Bidiagonalize A
1979 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1980 *
1981  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1982  $ work( itaup ), work( iwork ), lwork-iwork+1,
1983  $ ierr )
1984  IF( wntuas ) THEN
1985 *
1986 * If left singular vectors desired in U, copy result to U
1987 * and generate left bidiagonalizing vectors in U
1988 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1989 *
1990  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1991  IF( wntus )
1992  $ ncu = n
1993  IF( wntua )
1994  $ ncu = m
1995  CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1996  $ work( iwork ), lwork-iwork+1, ierr )
1997  END IF
1998  IF( wntvas ) THEN
1999 *
2000 * If right singular vectors desired in VT, copy result to
2001 * VT and generate right bidiagonalizing vectors in VT
2002 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2003 *
2004  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2005  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2006  $ work( iwork ), lwork-iwork+1, ierr )
2007  END IF
2008  IF( wntuo ) THEN
2009 *
2010 * If left singular vectors desired in A, generate left
2011 * bidiagonalizing vectors in A
2012 * (Workspace: need 4*N, prefer 3*N+N*NB)
2013 *
2014  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2015  $ work( iwork ), lwork-iwork+1, ierr )
2016  END IF
2017  IF( wntvo ) THEN
2018 *
2019 * If right singular vectors desired in A, generate right
2020 * bidiagonalizing vectors in A
2021 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2022 *
2023  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2024  $ work( iwork ), lwork-iwork+1, ierr )
2025  END IF
2026  iwork = ie + n
2027  IF( wntuas .OR. wntuo )
2028  $ nru = m
2029  IF( wntun )
2030  $ nru = 0
2031  IF( wntvas .OR. wntvo )
2032  $ ncvt = n
2033  IF( wntvn )
2034  $ ncvt = 0
2035  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2036 *
2037 * Perform bidiagonal QR iteration, if desired, computing
2038 * left singular vectors in U and computing right singular
2039 * vectors in VT
2040 * (Workspace: need BDSPAC)
2041 *
2042  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2043  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2044  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2045 *
2046 * Perform bidiagonal QR iteration, if desired, computing
2047 * left singular vectors in U and computing right singular
2048 * vectors in A
2049 * (Workspace: need BDSPAC)
2050 *
2051  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2052  $ u, ldu, dum, 1, work( iwork ), info )
2053  ELSE
2054 *
2055 * Perform bidiagonal QR iteration, if desired, computing
2056 * left singular vectors in A and computing right singular
2057 * vectors in VT
2058 * (Workspace: need BDSPAC)
2059 *
2060  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2061  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2062  END IF
2063 *
2064  END IF
2065 *
2066  ELSE
2067 *
2068 * A has more columns than rows. If A has sufficiently more
2069 * columns than rows, first reduce using the LQ decomposition (if
2070 * sufficient workspace available)
2071 *
2072  IF( n.GE.mnthr ) THEN
2073 *
2074  IF( wntvn ) THEN
2075 *
2076 * Path 1t(N much larger than M, JOBVT='N')
2077 * No right singular vectors to be computed
2078 *
2079  itau = 1
2080  iwork = itau + m
2081 *
2082 * Compute A=L*Q
2083 * (Workspace: need 2*M, prefer M+M*NB)
2084 *
2085  CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2086  $ lwork-iwork+1, ierr )
2087 *
2088 * Zero out above L
2089 *
2090  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2091  ie = 1
2092  itauq = ie + m
2093  itaup = itauq + m
2094  iwork = itaup + m
2095 *
2096 * Bidiagonalize L in A
2097 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2098 *
2099  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2100  $ work( itaup ), work( iwork ), lwork-iwork+1,
2101  $ ierr )
2102  IF( wntuo .OR. wntuas ) THEN
2103 *
2104 * If left singular vectors desired, generate Q
2105 * (Workspace: need 4*M, prefer 3*M+M*NB)
2106 *
2107  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2108  $ work( iwork ), lwork-iwork+1, ierr )
2109  END IF
2110  iwork = ie + m
2111  nru = 0
2112  IF( wntuo .OR. wntuas )
2113  $ nru = m
2114 *
2115 * Perform bidiagonal QR iteration, computing left singular
2116 * vectors of A in A if desired
2117 * (Workspace: need BDSPAC)
2118 *
2119  CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2120  $ lda, dum, 1, work( iwork ), info )
2121 *
2122 * If left singular vectors desired in U, copy them there
2123 *
2124  IF( wntuas )
2125  $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2126 *
2127  ELSE IF( wntvo .AND. wntun ) THEN
2128 *
2129 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2130 * M right singular vectors to be overwritten on A and
2131 * no left singular vectors to be computed
2132 *
2133  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2134 *
2135 * Sufficient workspace for a fast algorithm
2136 *
2137  ir = 1
2138  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2139 *
2140 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2141 *
2142  ldwrku = lda
2143  chunk = n
2144  ldwrkr = lda
2145  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2146 *
2147 * WORK(IU) is LDA by N and WORK(IR) is M by M
2148 *
2149  ldwrku = lda
2150  chunk = n
2151  ldwrkr = m
2152  ELSE
2153 *
2154 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2155 *
2156  ldwrku = m
2157  chunk = ( lwork-m*m-m ) / m
2158  ldwrkr = m
2159  END IF
2160  itau = ir + ldwrkr*m
2161  iwork = itau + m
2162 *
2163 * Compute A=L*Q
2164 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2165 *
2166  CALL sgelqf( m, n, a, lda, work( itau ),
2167  $ work( iwork ), lwork-iwork+1, ierr )
2168 *
2169 * Copy L to WORK(IR) and zero out above it
2170 *
2171  CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2172  CALL slaset( 'U', m-1, m-1, zero, zero,
2173  $ work( ir+ldwrkr ), ldwrkr )
2174 *
2175 * Generate Q in A
2176 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2177 *
2178  CALL sorglq( m, n, m, a, lda, work( itau ),
2179  $ work( iwork ), lwork-iwork+1, ierr )
2180  ie = itau
2181  itauq = ie + m
2182  itaup = itauq + m
2183  iwork = itaup + m
2184 *
2185 * Bidiagonalize L in WORK(IR)
2186 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2187 *
2188  CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2189  $ work( itauq ), work( itaup ),
2190  $ work( iwork ), lwork-iwork+1, ierr )
2191 *
2192 * Generate right vectors bidiagonalizing L
2193 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2194 *
2195  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2196  $ work( itaup ), work( iwork ),
2197  $ lwork-iwork+1, ierr )
2198  iwork = ie + m
2199 *
2200 * Perform bidiagonal QR iteration, computing right
2201 * singular vectors of L in WORK(IR)
2202 * (Workspace: need M*M+BDSPAC)
2203 *
2204  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2205  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2206  $ work( iwork ), info )
2207  iu = ie + m
2208 *
2209 * Multiply right singular vectors of L in WORK(IR) by Q
2210 * in A, storing result in WORK(IU) and copying to A
2211 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2212 *
2213  DO 30 i = 1, n, chunk
2214  blk = min( n-i+1, chunk )
2215  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2216  $ ldwrkr, a( 1, i ), lda, zero,
2217  $ work( iu ), ldwrku )
2218  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2219  $ a( 1, i ), lda )
2220  30 CONTINUE
2221 *
2222  ELSE
2223 *
2224 * Insufficient workspace for a fast algorithm
2225 *
2226  ie = 1
2227  itauq = ie + m
2228  itaup = itauq + m
2229  iwork = itaup + m
2230 *
2231 * Bidiagonalize A
2232 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2233 *
2234  CALL sgebrd( m, n, a, lda, s, work( ie ),
2235  $ work( itauq ), work( itaup ),
2236  $ work( iwork ), lwork-iwork+1, ierr )
2237 *
2238 * Generate right vectors bidiagonalizing A
2239 * (Workspace: need 4*M, prefer 3*M+M*NB)
2240 *
2241  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2242  $ work( iwork ), lwork-iwork+1, ierr )
2243  iwork = ie + m
2244 *
2245 * Perform bidiagonal QR iteration, computing right
2246 * singular vectors of A in A
2247 * (Workspace: need BDSPAC)
2248 *
2249  CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2250  $ dum, 1, dum, 1, work( iwork ), info )
2251 *
2252  END IF
2253 *
2254  ELSE IF( wntvo .AND. wntuas ) THEN
2255 *
2256 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2257 * M right singular vectors to be overwritten on A and
2258 * M left singular vectors to be computed in U
2259 *
2260  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2261 *
2262 * Sufficient workspace for a fast algorithm
2263 *
2264  ir = 1
2265  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2266 *
2267 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2268 *
2269  ldwrku = lda
2270  chunk = n
2271  ldwrkr = lda
2272  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2273 *
2274 * WORK(IU) is LDA by N and WORK(IR) is M by M
2275 *
2276  ldwrku = lda
2277  chunk = n
2278  ldwrkr = m
2279  ELSE
2280 *
2281 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2282 *
2283  ldwrku = m
2284  chunk = ( lwork-m*m-m ) / m
2285  ldwrkr = m
2286  END IF
2287  itau = ir + ldwrkr*m
2288  iwork = itau + m
2289 *
2290 * Compute A=L*Q
2291 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2292 *
2293  CALL sgelqf( m, n, a, lda, work( itau ),
2294  $ work( iwork ), lwork-iwork+1, ierr )
2295 *
2296 * Copy L to U, zeroing about above it
2297 *
2298  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2299  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2300  $ ldu )
2301 *
2302 * Generate Q in A
2303 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2304 *
2305  CALL sorglq( m, n, m, a, lda, work( itau ),
2306  $ work( iwork ), lwork-iwork+1, ierr )
2307  ie = itau
2308  itauq = ie + m
2309  itaup = itauq + m
2310  iwork = itaup + m
2311 *
2312 * Bidiagonalize L in U, copying result to WORK(IR)
2313 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2314 *
2315  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2316  $ work( itauq ), work( itaup ),
2317  $ work( iwork ), lwork-iwork+1, ierr )
2318  CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2319 *
2320 * Generate right vectors bidiagonalizing L in WORK(IR)
2321 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2322 *
2323  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2324  $ work( itaup ), work( iwork ),
2325  $ lwork-iwork+1, ierr )
2326 *
2327 * Generate left vectors bidiagonalizing L in U
2328 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2329 *
2330  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2331  $ work( iwork ), lwork-iwork+1, ierr )
2332  iwork = ie + m
2333 *
2334 * Perform bidiagonal QR iteration, computing left
2335 * singular vectors of L in U, and computing right
2336 * singular vectors of L in WORK(IR)
2337 * (Workspace: need M*M+BDSPAC)
2338 *
2339  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2340  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2341  $ work( iwork ), info )
2342  iu = ie + m
2343 *
2344 * Multiply right singular vectors of L in WORK(IR) by Q
2345 * in A, storing result in WORK(IU) and copying to A
2346 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2347 *
2348  DO 40 i = 1, n, chunk
2349  blk = min( n-i+1, chunk )
2350  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2351  $ ldwrkr, a( 1, i ), lda, zero,
2352  $ work( iu ), ldwrku )
2353  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2354  $ a( 1, i ), lda )
2355  40 CONTINUE
2356 *
2357  ELSE
2358 *
2359 * Insufficient workspace for a fast algorithm
2360 *
2361  itau = 1
2362  iwork = itau + m
2363 *
2364 * Compute A=L*Q
2365 * (Workspace: need 2*M, prefer M+M*NB)
2366 *
2367  CALL sgelqf( m, n, a, lda, work( itau ),
2368  $ work( iwork ), lwork-iwork+1, ierr )
2369 *
2370 * Copy L to U, zeroing out above it
2371 *
2372  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2373  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2374  $ ldu )
2375 *
2376 * Generate Q in A
2377 * (Workspace: need 2*M, prefer M+M*NB)
2378 *
2379  CALL sorglq( m, n, m, a, lda, work( itau ),
2380  $ work( iwork ), lwork-iwork+1, ierr )
2381  ie = itau
2382  itauq = ie + m
2383  itaup = itauq + m
2384  iwork = itaup + m
2385 *
2386 * Bidiagonalize L in U
2387 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2388 *
2389  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2390  $ work( itauq ), work( itaup ),
2391  $ work( iwork ), lwork-iwork+1, ierr )
2392 *
2393 * Multiply right vectors bidiagonalizing L by Q in A
2394 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2395 *
2396  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2397  $ work( itaup ), a, lda, work( iwork ),
2398  $ lwork-iwork+1, ierr )
2399 *
2400 * Generate left vectors bidiagonalizing L in U
2401 * (Workspace: need 4*M, prefer 3*M+M*NB)
2402 *
2403  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2404  $ work( iwork ), lwork-iwork+1, ierr )
2405  iwork = ie + m
2406 *
2407 * Perform bidiagonal QR iteration, computing left
2408 * singular vectors of A in U and computing right
2409 * singular vectors of A in A
2410 * (Workspace: need BDSPAC)
2411 *
2412  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2413  $ u, ldu, dum, 1, work( iwork ), info )
2414 *
2415  END IF
2416 *
2417  ELSE IF( wntvs ) THEN
2418 *
2419  IF( wntun ) THEN
2420 *
2421 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2422 * M right singular vectors to be computed in VT and
2423 * no left singular vectors to be computed
2424 *
2425  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2426 *
2427 * Sufficient workspace for a fast algorithm
2428 *
2429  ir = 1
2430  IF( lwork.GE.wrkbl+lda*m ) THEN
2431 *
2432 * WORK(IR) is LDA by M
2433 *
2434  ldwrkr = lda
2435  ELSE
2436 *
2437 * WORK(IR) is M by M
2438 *
2439  ldwrkr = m
2440  END IF
2441  itau = ir + ldwrkr*m
2442  iwork = itau + m
2443 *
2444 * Compute A=L*Q
2445 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2446 *
2447  CALL sgelqf( m, n, a, lda, work( itau ),
2448  $ work( iwork ), lwork-iwork+1, ierr )
2449 *
2450 * Copy L to WORK(IR), zeroing out above it
2451 *
2452  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2453  $ ldwrkr )
2454  CALL slaset( 'U', m-1, m-1, zero, zero,
2455  $ work( ir+ldwrkr ), ldwrkr )
2456 *
2457 * Generate Q in A
2458 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2459 *
2460  CALL sorglq( m, n, m, a, lda, work( itau ),
2461  $ work( iwork ), lwork-iwork+1, ierr )
2462  ie = itau
2463  itauq = ie + m
2464  itaup = itauq + m
2465  iwork = itaup + m
2466 *
2467 * Bidiagonalize L in WORK(IR)
2468 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2469 *
2470  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2471  $ work( ie ), work( itauq ),
2472  $ work( itaup ), work( iwork ),
2473  $ lwork-iwork+1, ierr )
2474 *
2475 * Generate right vectors bidiagonalizing L in
2476 * WORK(IR)
2477 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2478 *
2479  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2480  $ work( itaup ), work( iwork ),
2481  $ lwork-iwork+1, ierr )
2482  iwork = ie + m
2483 *
2484 * Perform bidiagonal QR iteration, computing right
2485 * singular vectors of L in WORK(IR)
2486 * (Workspace: need M*M+BDSPAC)
2487 *
2488  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2489  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2490  $ work( iwork ), info )
2491 *
2492 * Multiply right singular vectors of L in WORK(IR) by
2493 * Q in A, storing result in VT
2494 * (Workspace: need M*M)
2495 *
2496  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2497  $ ldwrkr, a, lda, zero, vt, ldvt )
2498 *
2499  ELSE
2500 *
2501 * Insufficient workspace for a fast algorithm
2502 *
2503  itau = 1
2504  iwork = itau + m
2505 *
2506 * Compute A=L*Q
2507 * (Workspace: need 2*M, prefer M+M*NB)
2508 *
2509  CALL sgelqf( m, n, a, lda, work( itau ),
2510  $ work( iwork ), lwork-iwork+1, ierr )
2511 *
2512 * Copy result to VT
2513 *
2514  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2515 *
2516 * Generate Q in VT
2517 * (Workspace: need 2*M, prefer M+M*NB)
2518 *
2519  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2520  $ work( iwork ), lwork-iwork+1, ierr )
2521  ie = itau
2522  itauq = ie + m
2523  itaup = itauq + m
2524  iwork = itaup + m
2525 *
2526 * Zero out above L in A
2527 *
2528  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2529  $ lda )
2530 *
2531 * Bidiagonalize L in A
2532 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2533 *
2534  CALL sgebrd( m, m, a, lda, s, work( ie ),
2535  $ work( itauq ), work( itaup ),
2536  $ work( iwork ), lwork-iwork+1, ierr )
2537 *
2538 * Multiply right vectors bidiagonalizing L by Q in VT
2539 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2540 *
2541  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2542  $ work( itaup ), vt, ldvt,
2543  $ work( iwork ), lwork-iwork+1, ierr )
2544  iwork = ie + m
2545 *
2546 * Perform bidiagonal QR iteration, computing right
2547 * singular vectors of A in VT
2548 * (Workspace: need BDSPAC)
2549 *
2550  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2551  $ ldvt, dum, 1, dum, 1, work( iwork ),
2552  $ info )
2553 *
2554  END IF
2555 *
2556  ELSE IF( wntuo ) THEN
2557 *
2558 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2559 * M right singular vectors to be computed in VT and
2560 * M left singular vectors to be overwritten on A
2561 *
2562  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2563 *
2564 * Sufficient workspace for a fast algorithm
2565 *
2566  iu = 1
2567  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2568 *
2569 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2570 *
2571  ldwrku = lda
2572  ir = iu + ldwrku*m
2573  ldwrkr = lda
2574  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2575 *
2576 * WORK(IU) is LDA by M and WORK(IR) is M by M
2577 *
2578  ldwrku = lda
2579  ir = iu + ldwrku*m
2580  ldwrkr = m
2581  ELSE
2582 *
2583 * WORK(IU) is M by M and WORK(IR) is M by M
2584 *
2585  ldwrku = m
2586  ir = iu + ldwrku*m
2587  ldwrkr = m
2588  END IF
2589  itau = ir + ldwrkr*m
2590  iwork = itau + m
2591 *
2592 * Compute A=L*Q
2593 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2594 *
2595  CALL sgelqf( m, n, a, lda, work( itau ),
2596  $ work( iwork ), lwork-iwork+1, ierr )
2597 *
2598 * Copy L to WORK(IU), zeroing out below it
2599 *
2600  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2601  $ ldwrku )
2602  CALL slaset( 'U', m-1, m-1, zero, zero,
2603  $ work( iu+ldwrku ), ldwrku )
2604 *
2605 * Generate Q in A
2606 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2607 *
2608  CALL sorglq( m, n, m, a, lda, work( itau ),
2609  $ work( iwork ), lwork-iwork+1, ierr )
2610  ie = itau
2611  itauq = ie + m
2612  itaup = itauq + m
2613  iwork = itaup + m
2614 *
2615 * Bidiagonalize L in WORK(IU), copying result to
2616 * WORK(IR)
2617 * (Workspace: need 2*M*M+4*M,
2618 * prefer 2*M*M+3*M+2*M*NB)
2619 *
2620  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2621  $ work( ie ), work( itauq ),
2622  $ work( itaup ), work( iwork ),
2623  $ lwork-iwork+1, ierr )
2624  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2625  $ work( ir ), ldwrkr )
2626 *
2627 * Generate right bidiagonalizing vectors in WORK(IU)
2628 * (Workspace: need 2*M*M+4*M-1,
2629 * prefer 2*M*M+3*M+(M-1)*NB)
2630 *
2631  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2632  $ work( itaup ), work( iwork ),
2633  $ lwork-iwork+1, ierr )
2634 *
2635 * Generate left bidiagonalizing vectors in WORK(IR)
2636 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2637 *
2638  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2639  $ work( itauq ), work( iwork ),
2640  $ lwork-iwork+1, ierr )
2641  iwork = ie + m
2642 *
2643 * Perform bidiagonal QR iteration, computing left
2644 * singular vectors of L in WORK(IR) and computing
2645 * right singular vectors of L in WORK(IU)
2646 * (Workspace: need 2*M*M+BDSPAC)
2647 *
2648  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2649  $ work( iu ), ldwrku, work( ir ),
2650  $ ldwrkr, dum, 1, work( iwork ), info )
2651 *
2652 * Multiply right singular vectors of L in WORK(IU) by
2653 * Q in A, storing result in VT
2654 * (Workspace: need M*M)
2655 *
2656  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2657  $ ldwrku, a, lda, zero, vt, ldvt )
2658 *
2659 * Copy left singular vectors of L to A
2660 * (Workspace: need M*M)
2661 *
2662  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2663  $ lda )
2664 *
2665  ELSE
2666 *
2667 * Insufficient workspace for a fast algorithm
2668 *
2669  itau = 1
2670  iwork = itau + m
2671 *
2672 * Compute A=L*Q, copying result to VT
2673 * (Workspace: need 2*M, prefer M+M*NB)
2674 *
2675  CALL sgelqf( m, n, a, lda, work( itau ),
2676  $ work( iwork ), lwork-iwork+1, ierr )
2677  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2678 *
2679 * Generate Q in VT
2680 * (Workspace: need 2*M, prefer M+M*NB)
2681 *
2682  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2683  $ work( iwork ), lwork-iwork+1, ierr )
2684  ie = itau
2685  itauq = ie + m
2686  itaup = itauq + m
2687  iwork = itaup + m
2688 *
2689 * Zero out above L in A
2690 *
2691  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2692  $ lda )
2693 *
2694 * Bidiagonalize L in A
2695 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2696 *
2697  CALL sgebrd( m, m, a, lda, s, work( ie ),
2698  $ work( itauq ), work( itaup ),
2699  $ work( iwork ), lwork-iwork+1, ierr )
2700 *
2701 * Multiply right vectors bidiagonalizing L by Q in VT
2702 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2703 *
2704  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2705  $ work( itaup ), vt, ldvt,
2706  $ work( iwork ), lwork-iwork+1, ierr )
2707 *
2708 * Generate left bidiagonalizing vectors of L in A
2709 * (Workspace: need 4*M, prefer 3*M+M*NB)
2710 *
2711  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2712  $ work( iwork ), lwork-iwork+1, ierr )
2713  iwork = ie + m
2714 *
2715 * Perform bidiagonal QR iteration, compute left
2716 * singular vectors of A in A and compute right
2717 * singular vectors of A in VT
2718 * (Workspace: need BDSPAC)
2719 *
2720  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2721  $ ldvt, a, lda, dum, 1, work( iwork ),
2722  $ info )
2723 *
2724  END IF
2725 *
2726  ELSE IF( wntuas ) THEN
2727 *
2728 * Path 6t(N much larger than M, JOBU='S' or 'A',
2729 * JOBVT='S')
2730 * M right singular vectors to be computed in VT and
2731 * M left singular vectors to be computed in U
2732 *
2733  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2734 *
2735 * Sufficient workspace for a fast algorithm
2736 *
2737  iu = 1
2738  IF( lwork.GE.wrkbl+lda*m ) THEN
2739 *
2740 * WORK(IU) is LDA by N
2741 *
2742  ldwrku = lda
2743  ELSE
2744 *
2745 * WORK(IU) is LDA by M
2746 *
2747  ldwrku = m
2748  END IF
2749  itau = iu + ldwrku*m
2750  iwork = itau + m
2751 *
2752 * Compute A=L*Q
2753 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2754 *
2755  CALL sgelqf( m, n, a, lda, work( itau ),
2756  $ work( iwork ), lwork-iwork+1, ierr )
2757 *
2758 * Copy L to WORK(IU), zeroing out above it
2759 *
2760  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2761  $ ldwrku )
2762  CALL slaset( 'U', m-1, m-1, zero, zero,
2763  $ work( iu+ldwrku ), ldwrku )
2764 *
2765 * Generate Q in A
2766 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2767 *
2768  CALL sorglq( m, n, m, a, lda, work( itau ),
2769  $ work( iwork ), lwork-iwork+1, ierr )
2770  ie = itau
2771  itauq = ie + m
2772  itaup = itauq + m
2773  iwork = itaup + m
2774 *
2775 * Bidiagonalize L in WORK(IU), copying result to U
2776 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2777 *
2778  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2779  $ work( ie ), work( itauq ),
2780  $ work( itaup ), work( iwork ),
2781  $ lwork-iwork+1, ierr )
2782  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2783  $ ldu )
2784 *
2785 * Generate right bidiagonalizing vectors in WORK(IU)
2786 * (Workspace: need M*M+4*M-1,
2787 * prefer M*M+3*M+(M-1)*NB)
2788 *
2789  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2790  $ work( itaup ), work( iwork ),
2791  $ lwork-iwork+1, ierr )
2792 *
2793 * Generate left bidiagonalizing vectors in U
2794 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2795 *
2796  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2797  $ work( iwork ), lwork-iwork+1, ierr )
2798  iwork = ie + m
2799 *
2800 * Perform bidiagonal QR iteration, computing left
2801 * singular vectors of L in U and computing right
2802 * singular vectors of L in WORK(IU)
2803 * (Workspace: need M*M+BDSPAC)
2804 *
2805  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2806  $ work( iu ), ldwrku, u, ldu, dum, 1,
2807  $ work( iwork ), info )
2808 *
2809 * Multiply right singular vectors of L in WORK(IU) by
2810 * Q in A, storing result in VT
2811 * (Workspace: need M*M)
2812 *
2813  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2814  $ ldwrku, a, lda, zero, vt, ldvt )
2815 *
2816  ELSE
2817 *
2818 * Insufficient workspace for a fast algorithm
2819 *
2820  itau = 1
2821  iwork = itau + m
2822 *
2823 * Compute A=L*Q, copying result to VT
2824 * (Workspace: need 2*M, prefer M+M*NB)
2825 *
2826  CALL sgelqf( m, n, a, lda, work( itau ),
2827  $ work( iwork ), lwork-iwork+1, ierr )
2828  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2829 *
2830 * Generate Q in VT
2831 * (Workspace: need 2*M, prefer M+M*NB)
2832 *
2833  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2834  $ work( iwork ), lwork-iwork+1, ierr )
2835 *
2836 * Copy L to U, zeroing out above it
2837 *
2838  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2839  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2840  $ ldu )
2841  ie = itau
2842  itauq = ie + m
2843  itaup = itauq + m
2844  iwork = itaup + m
2845 *
2846 * Bidiagonalize L in U
2847 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2848 *
2849  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2850  $ work( itauq ), work( itaup ),
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Multiply right bidiagonalizing vectors in U by Q
2854 * in VT
2855 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2856 *
2857  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2858  $ work( itaup ), vt, ldvt,
2859  $ work( iwork ), lwork-iwork+1, ierr )
2860 *
2861 * Generate left bidiagonalizing vectors in U
2862 * (Workspace: need 4*M, prefer 3*M+M*NB)
2863 *
2864  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2865  $ work( iwork ), lwork-iwork+1, ierr )
2866  iwork = ie + m
2867 *
2868 * Perform bidiagonal QR iteration, computing left
2869 * singular vectors of A in U and computing right
2870 * singular vectors of A in VT
2871 * (Workspace: need BDSPAC)
2872 *
2873  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2874  $ ldvt, u, ldu, dum, 1, work( iwork ),
2875  $ info )
2876 *
2877  END IF
2878 *
2879  END IF
2880 *
2881  ELSE IF( wntva ) THEN
2882 *
2883  IF( wntun ) THEN
2884 *
2885 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2886 * N right singular vectors to be computed in VT and
2887 * no left singular vectors to be computed
2888 *
2889  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2890 *
2891 * Sufficient workspace for a fast algorithm
2892 *
2893  ir = 1
2894  IF( lwork.GE.wrkbl+lda*m ) THEN
2895 *
2896 * WORK(IR) is LDA by M
2897 *
2898  ldwrkr = lda
2899  ELSE
2900 *
2901 * WORK(IR) is M by M
2902 *
2903  ldwrkr = m
2904  END IF
2905  itau = ir + ldwrkr*m
2906  iwork = itau + m
2907 *
2908 * Compute A=L*Q, copying result to VT
2909 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2910 *
2911  CALL sgelqf( m, n, a, lda, work( itau ),
2912  $ work( iwork ), lwork-iwork+1, ierr )
2913  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2914 *
2915 * Copy L to WORK(IR), zeroing out above it
2916 *
2917  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2918  $ ldwrkr )
2919  CALL slaset( 'U', m-1, m-1, zero, zero,
2920  $ work( ir+ldwrkr ), ldwrkr )
2921 *
2922 * Generate Q in VT
2923 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2924 *
2925  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2926  $ work( iwork ), lwork-iwork+1, ierr )
2927  ie = itau
2928  itauq = ie + m
2929  itaup = itauq + m
2930  iwork = itaup + m
2931 *
2932 * Bidiagonalize L in WORK(IR)
2933 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2934 *
2935  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2936  $ work( ie ), work( itauq ),
2937  $ work( itaup ), work( iwork ),
2938  $ lwork-iwork+1, ierr )
2939 *
2940 * Generate right bidiagonalizing vectors in WORK(IR)
2941 * (Workspace: need M*M+4*M-1,
2942 * prefer M*M+3*M+(M-1)*NB)
2943 *
2944  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2945  $ work( itaup ), work( iwork ),
2946  $ lwork-iwork+1, ierr )
2947  iwork = ie + m
2948 *
2949 * Perform bidiagonal QR iteration, computing right
2950 * singular vectors of L in WORK(IR)
2951 * (Workspace: need M*M+BDSPAC)
2952 *
2953  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2954  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2955  $ work( iwork ), info )
2956 *
2957 * Multiply right singular vectors of L in WORK(IR) by
2958 * Q in VT, storing result in A
2959 * (Workspace: need M*M)
2960 *
2961  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2962  $ ldwrkr, vt, ldvt, zero, a, lda )
2963 *
2964 * Copy right singular vectors of A from A to VT
2965 *
2966  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2967 *
2968  ELSE
2969 *
2970 * Insufficient workspace for a fast algorithm
2971 *
2972  itau = 1
2973  iwork = itau + m
2974 *
2975 * Compute A=L*Q, copying result to VT
2976 * (Workspace: need 2*M, prefer M+M*NB)
2977 *
2978  CALL sgelqf( m, n, a, lda, work( itau ),
2979  $ work( iwork ), lwork-iwork+1, ierr )
2980  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2981 *
2982 * Generate Q in VT
2983 * (Workspace: need M+N, prefer M+N*NB)
2984 *
2985  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2986  $ work( iwork ), lwork-iwork+1, ierr )
2987  ie = itau
2988  itauq = ie + m
2989  itaup = itauq + m
2990  iwork = itaup + m
2991 *
2992 * Zero out above L in A
2993 *
2994  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2995  $ lda )
2996 *
2997 * Bidiagonalize L in A
2998 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2999 *
3000  CALL sgebrd( m, m, a, lda, s, work( ie ),
3001  $ work( itauq ), work( itaup ),
3002  $ work( iwork ), lwork-iwork+1, ierr )
3003 *
3004 * Multiply right bidiagonalizing vectors in A by Q
3005 * in VT
3006 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3007 *
3008  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3009  $ work( itaup ), vt, ldvt,
3010  $ work( iwork ), lwork-iwork+1, ierr )
3011  iwork = ie + m
3012 *
3013 * Perform bidiagonal QR iteration, computing right
3014 * singular vectors of A in VT
3015 * (Workspace: need BDSPAC)
3016 *
3017  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3018  $ ldvt, dum, 1, dum, 1, work( iwork ),
3019  $ info )
3020 *
3021  END IF
3022 *
3023  ELSE IF( wntuo ) THEN
3024 *
3025 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3026 * N right singular vectors to be computed in VT and
3027 * M left singular vectors to be overwritten on A
3028 *
3029  IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3030 *
3031 * Sufficient workspace for a fast algorithm
3032 *
3033  iu = 1
3034  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3035 *
3036 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3037 *
3038  ldwrku = lda
3039  ir = iu + ldwrku*m
3040  ldwrkr = lda
3041  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3042 *
3043 * WORK(IU) is LDA by M and WORK(IR) is M by M
3044 *
3045  ldwrku = lda
3046  ir = iu + ldwrku*m
3047  ldwrkr = m
3048  ELSE
3049 *
3050 * WORK(IU) is M by M and WORK(IR) is M by M
3051 *
3052  ldwrku = m
3053  ir = iu + ldwrku*m
3054  ldwrkr = m
3055  END IF
3056  itau = ir + ldwrkr*m
3057  iwork = itau + m
3058 *
3059 * Compute A=L*Q, copying result to VT
3060 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3061 *
3062  CALL sgelqf( m, n, a, lda, work( itau ),
3063  $ work( iwork ), lwork-iwork+1, ierr )
3064  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3065 *
3066 * Generate Q in VT
3067 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3068 *
3069  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3070  $ work( iwork ), lwork-iwork+1, ierr )
3071 *
3072 * Copy L to WORK(IU), zeroing out above it
3073 *
3074  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3075  $ ldwrku )
3076  CALL slaset( 'U', m-1, m-1, zero, zero,
3077  $ work( iu+ldwrku ), ldwrku )
3078  ie = itau
3079  itauq = ie + m
3080  itaup = itauq + m
3081  iwork = itaup + m
3082 *
3083 * Bidiagonalize L in WORK(IU), copying result to
3084 * WORK(IR)
3085 * (Workspace: need 2*M*M+4*M,
3086 * prefer 2*M*M+3*M+2*M*NB)
3087 *
3088  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3089  $ work( ie ), work( itauq ),
3090  $ work( itaup ), work( iwork ),
3091  $ lwork-iwork+1, ierr )
3092  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3093  $ work( ir ), ldwrkr )
3094 *
3095 * Generate right bidiagonalizing vectors in WORK(IU)
3096 * (Workspace: need 2*M*M+4*M-1,
3097 * prefer 2*M*M+3*M+(M-1)*NB)
3098 *
3099  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3100  $ work( itaup ), work( iwork ),
3101  $ lwork-iwork+1, ierr )
3102 *
3103 * Generate left bidiagonalizing vectors in WORK(IR)
3104 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3105 *
3106  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3107  $ work( itauq ), work( iwork ),
3108  $ lwork-iwork+1, ierr )
3109  iwork = ie + m
3110 *
3111 * Perform bidiagonal QR iteration, computing left
3112 * singular vectors of L in WORK(IR) and computing
3113 * right singular vectors of L in WORK(IU)
3114 * (Workspace: need 2*M*M+BDSPAC)
3115 *
3116  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3117  $ work( iu ), ldwrku, work( ir ),
3118  $ ldwrkr, dum, 1, work( iwork ), info )
3119 *
3120 * Multiply right singular vectors of L in WORK(IU) by
3121 * Q in VT, storing result in A
3122 * (Workspace: need M*M)
3123 *
3124  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3125  $ ldwrku, vt, ldvt, zero, a, lda )
3126 *
3127 * Copy right singular vectors of A from A to VT
3128 *
3129  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3130 *
3131 * Copy left singular vectors of A from WORK(IR) to A
3132 *
3133  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3134  $ lda )
3135 *
3136  ELSE
3137 *
3138 * Insufficient workspace for a fast algorithm
3139 *
3140  itau = 1
3141  iwork = itau + m
3142 *
3143 * Compute A=L*Q, copying result to VT
3144 * (Workspace: need 2*M, prefer M+M*NB)
3145 *
3146  CALL sgelqf( m, n, a, lda, work( itau ),
3147  $ work( iwork ), lwork-iwork+1, ierr )
3148  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3149 *
3150 * Generate Q in VT
3151 * (Workspace: need M+N, prefer M+N*NB)
3152 *
3153  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3154  $ work( iwork ), lwork-iwork+1, ierr )
3155  ie = itau
3156  itauq = ie + m
3157  itaup = itauq + m
3158  iwork = itaup + m
3159 *
3160 * Zero out above L in A
3161 *
3162  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3163  $ lda )
3164 *
3165 * Bidiagonalize L in A
3166 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3167 *
3168  CALL sgebrd( m, m, a, lda, s, work( ie ),
3169  $ work( itauq ), work( itaup ),
3170  $ work( iwork ), lwork-iwork+1, ierr )
3171 *
3172 * Multiply right bidiagonalizing vectors in A by Q
3173 * in VT
3174 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3175 *
3176  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3177  $ work( itaup ), vt, ldvt,
3178  $ work( iwork ), lwork-iwork+1, ierr )
3179 *
3180 * Generate left bidiagonalizing vectors in A
3181 * (Workspace: need 4*M, prefer 3*M+M*NB)
3182 *
3183  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3184  $ work( iwork ), lwork-iwork+1, ierr )
3185  iwork = ie + m
3186 *
3187 * Perform bidiagonal QR iteration, computing left
3188 * singular vectors of A in A and computing right
3189 * singular vectors of A in VT
3190 * (Workspace: need BDSPAC)
3191 *
3192  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3193  $ ldvt, a, lda, dum, 1, work( iwork ),
3194  $ info )
3195 *
3196  END IF
3197 *
3198  ELSE IF( wntuas ) THEN
3199 *
3200 * Path 9t(N much larger than M, JOBU='S' or 'A',
3201 * JOBVT='A')
3202 * N right singular vectors to be computed in VT and
3203 * M left singular vectors to be computed in U
3204 *
3205  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3206 *
3207 * Sufficient workspace for a fast algorithm
3208 *
3209  iu = 1
3210  IF( lwork.GE.wrkbl+lda*m ) THEN
3211 *
3212 * WORK(IU) is LDA by M
3213 *
3214  ldwrku = lda
3215  ELSE
3216 *
3217 * WORK(IU) is M by M
3218 *
3219  ldwrku = m
3220  END IF
3221  itau = iu + ldwrku*m
3222  iwork = itau + m
3223 *
3224 * Compute A=L*Q, copying result to VT
3225 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3226 *
3227  CALL sgelqf( m, n, a, lda, work( itau ),
3228  $ work( iwork ), lwork-iwork+1, ierr )
3229  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3230 *
3231 * Generate Q in VT
3232 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3233 *
3234  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3235  $ work( iwork ), lwork-iwork+1, ierr )
3236 *
3237 * Copy L to WORK(IU), zeroing out above it
3238 *
3239  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3240  $ ldwrku )
3241  CALL slaset( 'U', m-1, m-1, zero, zero,
3242  $ work( iu+ldwrku ), ldwrku )
3243  ie = itau
3244  itauq = ie + m
3245  itaup = itauq + m
3246  iwork = itaup + m
3247 *
3248 * Bidiagonalize L in WORK(IU), copying result to U
3249 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3250 *
3251  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3252  $ work( ie ), work( itauq ),
3253  $ work( itaup ), work( iwork ),
3254  $ lwork-iwork+1, ierr )
3255  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3256  $ ldu )
3257 *
3258 * Generate right bidiagonalizing vectors in WORK(IU)
3259 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3260 *
3261  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3262  $ work( itaup ), work( iwork ),
3263  $ lwork-iwork+1, ierr )
3264 *
3265 * Generate left bidiagonalizing vectors in U
3266 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3267 *
3268  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3269  $ work( iwork ), lwork-iwork+1, ierr )
3270  iwork = ie + m
3271 *
3272 * Perform bidiagonal QR iteration, computing left
3273 * singular vectors of L in U and computing right
3274 * singular vectors of L in WORK(IU)
3275 * (Workspace: need M*M+BDSPAC)
3276 *
3277  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3278  $ work( iu ), ldwrku, u, ldu, dum, 1,
3279  $ work( iwork ), info )
3280 *
3281 * Multiply right singular vectors of L in WORK(IU) by
3282 * Q in VT, storing result in A
3283 * (Workspace: need M*M)
3284 *
3285  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3286  $ ldwrku, vt, ldvt, zero, a, lda )
3287 *
3288 * Copy right singular vectors of A from A to VT
3289 *
3290  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3291 *
3292  ELSE
3293 *
3294 * Insufficient workspace for a fast algorithm
3295 *
3296  itau = 1
3297  iwork = itau + m
3298 *
3299 * Compute A=L*Q, copying result to VT
3300 * (Workspace: need 2*M, prefer M+M*NB)
3301 *
3302  CALL sgelqf( m, n, a, lda, work( itau ),
3303  $ work( iwork ), lwork-iwork+1, ierr )
3304  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3305 *
3306 * Generate Q in VT
3307 * (Workspace: need M+N, prefer M+N*NB)
3308 *
3309  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3310  $ work( iwork ), lwork-iwork+1, ierr )
3311 *
3312 * Copy L to U, zeroing out above it
3313 *
3314  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3315  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3316  $ ldu )
3317  ie = itau
3318  itauq = ie + m
3319  itaup = itauq + m
3320  iwork = itaup + m
3321 *
3322 * Bidiagonalize L in U
3323 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3324 *
3325  CALL sgebrd( m, m, u, ldu, s, work( ie ),
3326  $ work( itauq ), work( itaup ),
3327  $ work( iwork ), lwork-iwork+1, ierr )
3328 *
3329 * Multiply right bidiagonalizing vectors in U by Q
3330 * in VT
3331 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3332 *
3333  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3334  $ work( itaup ), vt, ldvt,
3335  $ work( iwork ), lwork-iwork+1, ierr )
3336 *
3337 * Generate left bidiagonalizing vectors in U
3338 * (Workspace: need 4*M, prefer 3*M+M*NB)
3339 *
3340  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3341  $ work( iwork ), lwork-iwork+1, ierr )
3342  iwork = ie + m
3343 *
3344 * Perform bidiagonal QR iteration, computing left
3345 * singular vectors of A in U and computing right
3346 * singular vectors of A in VT
3347 * (Workspace: need BDSPAC)
3348 *
3349  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3350  $ ldvt, u, ldu, dum, 1, work( iwork ),
3351  $ info )
3352 *
3353  END IF
3354 *
3355  END IF
3356 *
3357  END IF
3358 *
3359  ELSE
3360 *
3361 * N .LT. MNTHR
3362 *
3363 * Path 10t(N greater than M, but not much larger)
3364 * Reduce to bidiagonal form without LQ decomposition
3365 *
3366  ie = 1
3367  itauq = ie + m
3368  itaup = itauq + m
3369  iwork = itaup + m
3370 *
3371 * Bidiagonalize A
3372 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3373 *
3374  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3375  $ work( itaup ), work( iwork ), lwork-iwork+1,
3376  $ ierr )
3377  IF( wntuas ) THEN
3378 *
3379 * If left singular vectors desired in U, copy result to U
3380 * and generate left bidiagonalizing vectors in U
3381 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3382 *
3383  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3384  CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3385  $ work( iwork ), lwork-iwork+1, ierr )
3386  END IF
3387  IF( wntvas ) THEN
3388 *
3389 * If right singular vectors desired in VT, copy result to
3390 * VT and generate right bidiagonalizing vectors in VT
3391 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3392 *
3393  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3394  IF( wntva )
3395  $ nrvt = n
3396  IF( wntvs )
3397  $ nrvt = m
3398  CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3399  $ work( iwork ), lwork-iwork+1, ierr )
3400  END IF
3401  IF( wntuo ) THEN
3402 *
3403 * If left singular vectors desired in A, generate left
3404 * bidiagonalizing vectors in A
3405 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3406 *
3407  CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3408  $ work( iwork ), lwork-iwork+1, ierr )
3409  END IF
3410  IF( wntvo ) THEN
3411 *
3412 * If right singular vectors desired in A, generate right
3413 * bidiagonalizing vectors in A
3414 * (Workspace: need 4*M, prefer 3*M+M*NB)
3415 *
3416  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3417  $ work( iwork ), lwork-iwork+1, ierr )
3418  END IF
3419  iwork = ie + m
3420  IF( wntuas .OR. wntuo )
3421  $ nru = m
3422  IF( wntun )
3423  $ nru = 0
3424  IF( wntvas .OR. wntvo )
3425  $ ncvt = n
3426  IF( wntvn )
3427  $ ncvt = 0
3428  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3429 *
3430 * Perform bidiagonal QR iteration, if desired, computing
3431 * left singular vectors in U and computing right singular
3432 * vectors in VT
3433 * (Workspace: need BDSPAC)
3434 *
3435  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3436  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3437  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3438 *
3439 * Perform bidiagonal QR iteration, if desired, computing
3440 * left singular vectors in U and computing right singular
3441 * vectors in A
3442 * (Workspace: need BDSPAC)
3443 *
3444  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3445  $ u, ldu, dum, 1, work( iwork ), info )
3446  ELSE
3447 *
3448 * Perform bidiagonal QR iteration, if desired, computing
3449 * left singular vectors in A and computing right singular
3450 * vectors in VT
3451 * (Workspace: need BDSPAC)
3452 *
3453  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3454  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3455  END IF
3456 *
3457  END IF
3458 *
3459  END IF
3460 *
3461 * If SBDSQR failed to converge, copy unconverged superdiagonals
3462 * to WORK( 2:MINMN )
3463 *
3464  IF( info.NE.0 ) THEN
3465  IF( ie.GT.2 ) THEN
3466  DO 50 i = 1, minmn - 1
3467  work( i+1 ) = work( i+ie-1 )
3468  50 CONTINUE
3469  END IF
3470  IF( ie.LT.2 ) THEN
3471  DO 60 i = minmn - 1, 1, -1
3472  work( i+1 ) = work( i+ie-1 )
3473  60 CONTINUE
3474  END IF
3475  END IF
3476 *
3477 * Undo scaling if necessary
3478 *
3479  IF( iscl.EQ.1 ) THEN
3480  IF( anrm.GT.bignum )
3481  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3482  $ ierr )
3483  IF( info.NE.0 .AND. anrm.GT.bignum )
3484  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3485  $ minmn, ierr )
3486  IF( anrm.LT.smlnum )
3487  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3488  $ ierr )
3489  IF( info.NE.0 .AND. anrm.LT.smlnum )
3490  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3491  $ minmn, ierr )
3492  END IF
3493 *
3494 * Return optimal workspace in WORK(1)
3495 *
3496  work( 1 ) = maxwrk
3497 *
3498  RETURN
3499 *
3500 * End of SGESVD
3501 *
3502  END
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:143
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine sbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
SBDSQR
Definition: sbdsqr.f:240
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:157
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:145
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:205
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:143
subroutine sgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
SGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: sgesvd.f:211
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:196
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:127
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:128
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187