LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
sgesvd.f
Go to the documentation of this file.
1 *> \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL A( LDA, * ), S( * ), U( LDU, * ),
30 * $ VT( LDVT, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> SGESVD computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, optionally computing the left and/or right singular
41 *> vectors. The SVD is written
42 *>
43 *> A = U * SIGMA * transpose(V)
44 *>
45 *> where SIGMA is an M-by-N matrix which is zero except for its
46 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48 *> are the singular values of A; they are real and non-negative, and
49 *> are returned in descending order. The first min(m,n) columns of
50 *> U and V are the left and right singular vectors of A.
51 *>
52 *> Note that the routine returns V**T, not V.
53 *> \endverbatim
54 *
55 * Arguments:
56 * ==========
57 *
58 *> \param[in] JOBU
59 *> \verbatim
60 *> JOBU is CHARACTER*1
61 *> Specifies options for computing all or part of the matrix U:
62 *> = 'A': all M columns of U are returned in array U:
63 *> = 'S': the first min(m,n) columns of U (the left singular
64 *> vectors) are returned in the array U;
65 *> = 'O': the first min(m,n) columns of U (the left singular
66 *> vectors) are overwritten on the array A;
67 *> = 'N': no columns of U (no left singular vectors) are
68 *> computed.
69 *> \endverbatim
70 *>
71 *> \param[in] JOBVT
72 *> \verbatim
73 *> JOBVT is CHARACTER*1
74 *> Specifies options for computing all or part of the matrix
75 *> V**T:
76 *> = 'A': all N rows of V**T are returned in the array VT;
77 *> = 'S': the first min(m,n) rows of V**T (the right singular
78 *> vectors) are returned in the array VT;
79 *> = 'O': the first min(m,n) rows of V**T (the right singular
80 *> vectors) are overwritten on the array A;
81 *> = 'N': no rows of V**T (no right singular vectors) are
82 *> computed.
83 *>
84 *> JOBVT and JOBU cannot both be 'O'.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is REAL array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBU = 'O', A is overwritten with the first min(m,n)
105 *> columns of U (the left singular vectors,
106 *> stored columnwise);
107 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108 *> rows of V**T (the right singular vectors,
109 *> stored rowwise);
110 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111 *> are destroyed.
112 *> \endverbatim
113 *>
114 *> \param[in] LDA
115 *> \verbatim
116 *> LDA is INTEGER
117 *> The leading dimension of the array A. LDA >= max(1,M).
118 *> \endverbatim
119 *>
120 *> \param[out] S
121 *> \verbatim
122 *> S is REAL array, dimension (min(M,N))
123 *> The singular values of A, sorted so that S(i) >= S(i+1).
124 *> \endverbatim
125 *>
126 *> \param[out] U
127 *> \verbatim
128 *> U is REAL array, dimension (LDU,UCOL)
129 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130 *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131 *> if JOBU = 'S', U contains the first min(m,n) columns of U
132 *> (the left singular vectors, stored columnwise);
133 *> if JOBU = 'N' or 'O', U is not referenced.
134 *> \endverbatim
135 *>
136 *> \param[in] LDU
137 *> \verbatim
138 *> LDU is INTEGER
139 *> The leading dimension of the array U. LDU >= 1; if
140 *> JOBU = 'S' or 'A', LDU >= M.
141 *> \endverbatim
142 *>
143 *> \param[out] VT
144 *> \verbatim
145 *> VT is REAL array, dimension (LDVT,N)
146 *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147 *> V**T;
148 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149 *> V**T (the right singular vectors, stored rowwise);
150 *> if JOBVT = 'N' or 'O', VT is not referenced.
151 *> \endverbatim
152 *>
153 *> \param[in] LDVT
154 *> \verbatim
155 *> LDVT is INTEGER
156 *> The leading dimension of the array VT. LDVT >= 1; if
157 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158 *> \endverbatim
159 *>
160 *> \param[out] WORK
161 *> \verbatim
162 *> WORK is REAL array, dimension (MAX(1,LWORK))
163 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164 *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165 *> superdiagonal elements of an upper bidiagonal matrix B
166 *> whose diagonal is in S (not necessarily sorted). B
167 *> satisfies A = U * B * VT, so it has the same singular values
168 *> as A, and singular vectors related by U and VT.
169 *> \endverbatim
170 *>
171 *> \param[in] LWORK
172 *> \verbatim
173 *> LWORK is INTEGER
174 *> The dimension of the array WORK.
175 *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176 *> - PATH 1 (M much larger than N, JOBU='N')
177 *> - PATH 1t (N much larger than M, JOBVT='N')
178 *> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179 *> For good performance, LWORK should generally be larger.
180 *>
181 *> If LWORK = -1, then a workspace query is assumed; the routine
182 *> only calculates the optimal size of the WORK array, returns
183 *> this value as the first entry of the WORK array, and no error
184 *> message related to LWORK is issued by XERBLA.
185 *> \endverbatim
186 *>
187 *> \param[out] INFO
188 *> \verbatim
189 *> INFO is INTEGER
190 *> = 0: successful exit.
191 *> < 0: if INFO = -i, the i-th argument had an illegal value.
192 *> > 0: if SBDSQR did not converge, INFO specifies how many
193 *> superdiagonals of an intermediate bidiagonal form B
194 *> did not converge to zero. See the description of WORK
195 *> above for details.
196 *> \endverbatim
197 *
198 * Authors:
199 * ========
200 *
201 *> \author Univ. of Tennessee
202 *> \author Univ. of California Berkeley
203 *> \author Univ. of Colorado Denver
204 *> \author NAG Ltd.
205 *
206 *> \date April 2012
207 *
208 *> \ingroup realGEsing
209 *
210 * =====================================================================
211  SUBROUTINE sgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
212  $ work, lwork, info )
213 *
214 * -- LAPACK driver routine (version 3.4.1) --
215 * -- LAPACK is a software package provided by Univ. of Tennessee, --
216 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217 * April 2012
218 *
219 * .. Scalar Arguments ..
220  CHARACTER jobu, jobvt
221  INTEGER info, lda, ldu, ldvt, lwork, m, n
222 * ..
223 * .. Array Arguments ..
224  REAL a( lda, * ), s( * ), u( ldu, * ),
225  $ vt( ldvt, * ), work( * )
226 * ..
227 *
228 * =====================================================================
229 *
230 * .. Parameters ..
231  REAL zero, one
232  parameter( zero = 0.0e0, one = 1.0e0 )
233 * ..
234 * .. Local Scalars ..
235  LOGICAL lquery, wntua, wntuas, wntun, wntuo, wntus,
236  $ wntva, wntvas, wntvn, wntvo, wntvs
237  INTEGER bdspac, blk, chunk, i, ie, ierr, ir, iscl,
238  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
239  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
240  $ nrvt, wrkbl
241  INTEGER lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m,
242  $ lwork_sgebrd, lwork_sorgbr_p, lwork_sorgbr_q,
243  $ lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m
244  REAL anrm, bignum, eps, smlnum
245 * ..
246 * .. Local Arrays ..
247  REAL dum( 1 )
248 * ..
249 * .. External Subroutines ..
250  EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
252  $ xerbla
253 * ..
254 * .. External Functions ..
255  LOGICAL lsame
256  INTEGER ilaenv
257  REAL slamch, slange
258  EXTERNAL lsame, ilaenv, slamch, slange
259 * ..
260 * .. Intrinsic Functions ..
261  INTRINSIC max, min, sqrt
262 * ..
263 * .. Executable Statements ..
264 *
265 * Test the input arguments
266 *
267  info = 0
268  minmn = min( m, n )
269  wntua = lsame( jobu, 'A' )
270  wntus = lsame( jobu, 'S' )
271  wntuas = wntua .OR. wntus
272  wntuo = lsame( jobu, 'O' )
273  wntun = lsame( jobu, 'N' )
274  wntva = lsame( jobvt, 'A' )
275  wntvs = lsame( jobvt, 'S' )
276  wntvas = wntva .OR. wntvs
277  wntvo = lsame( jobvt, 'O' )
278  wntvn = lsame( jobvt, 'N' )
279  lquery = ( lwork.EQ.-1 )
280 *
281  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
282  info = -1
283  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
284  $ ( wntvo .AND. wntuo ) ) THEN
285  info = -2
286  ELSE IF( m.LT.0 ) THEN
287  info = -3
288  ELSE IF( n.LT.0 ) THEN
289  info = -4
290  ELSE IF( lda.LT.max( 1, m ) ) THEN
291  info = -6
292  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
293  info = -9
294  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
295  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
296  info = -11
297  END IF
298 *
299 * Compute workspace
300 * (Note: Comments in the code beginning "Workspace:" describe the
301 * minimal amount of workspace needed at that point in the code,
302 * as well as the preferred amount for good performance.
303 * NB refers to the optimal block size for the immediately
304 * following subroutine, as returned by ILAENV.)
305 *
306  IF( info.EQ.0 ) THEN
307  minwrk = 1
308  maxwrk = 1
309  IF( m.GE.n .AND. minmn.GT.0 ) THEN
310 *
311 * Compute space needed for SBDSQR
312 *
313  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
314  bdspac = 5*n
315 * Compute space needed for SGEQRF
316  CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_sgeqrf=dum(1)
318 * Compute space needed for SORGQR
319  CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
320  lwork_sorgqr_n=dum(1)
321  CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
322  lwork_sorgqr_m=dum(1)
323 * Compute space needed for SGEBRD
324  CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
325  $ dum(1), dum(1), -1, ierr )
326  lwork_sgebrd=dum(1)
327 * Compute space needed for SORGBR P
328  CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
329  $ dum(1), -1, ierr )
330  lwork_sorgbr_p=dum(1)
331 * Compute space needed for SORGBR Q
332  CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
333  $ dum(1), -1, ierr )
334  lwork_sorgbr_q=dum(1)
335 *
336  IF( m.GE.mnthr ) THEN
337  IF( wntun ) THEN
338 *
339 * Path 1 (M much larger than N, JOBU='N')
340 *
341  maxwrk = n + lwork_sgeqrf
342  maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
343  IF( wntvo .OR. wntvas )
344  $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
345  maxwrk = max( maxwrk, bdspac )
346  minwrk = max( 4*n, bdspac )
347  ELSE IF( wntuo .AND. wntvn ) THEN
348 *
349 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350 *
351  wrkbl = n + lwork_sgeqrf
352  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
353  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
354  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
355  wrkbl = max( wrkbl, bdspac )
356  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
357  minwrk = max( 3*n+m, bdspac )
358  ELSE IF( wntuo .AND. wntvas ) THEN
359 *
360 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361 * 'A')
362 *
363  wrkbl = n + lwork_sgeqrf
364  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
365  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
366  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
367  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
368  wrkbl = max( wrkbl, bdspac )
369  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
370  minwrk = max( 3*n+m, bdspac )
371  ELSE IF( wntus .AND. wntvn ) THEN
372 *
373 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374 *
375  wrkbl = n + lwork_sgeqrf
376  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
377  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
378  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
379  wrkbl = max( wrkbl, bdspac )
380  maxwrk = n*n + wrkbl
381  minwrk = max( 3*n+m, bdspac )
382  ELSE IF( wntus .AND. wntvo ) THEN
383 *
384 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385 *
386  wrkbl = n + lwork_sgeqrf
387  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
388  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
389  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
390  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
391  wrkbl = max( wrkbl, bdspac )
392  maxwrk = 2*n*n + wrkbl
393  minwrk = max( 3*n+m, bdspac )
394  ELSE IF( wntus .AND. wntvas ) THEN
395 *
396 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397 * 'A')
398 *
399  wrkbl = n + lwork_sgeqrf
400  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
401  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
402  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
403  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
404  wrkbl = max( wrkbl, bdspac )
405  maxwrk = n*n + wrkbl
406  minwrk = max( 3*n+m, bdspac )
407  ELSE IF( wntua .AND. wntvn ) THEN
408 *
409 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410 *
411  wrkbl = n + lwork_sgeqrf
412  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
413  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
414  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
415  wrkbl = max( wrkbl, bdspac )
416  maxwrk = n*n + wrkbl
417  minwrk = max( 3*n+m, bdspac )
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_sgeqrf
423  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
424  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
425  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
426  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
427  wrkbl = max( wrkbl, bdspac )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = max( 3*n+m, bdspac )
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_sgeqrf
436  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
437  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
438  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
439  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
440  wrkbl = max( wrkbl, bdspac )
441  maxwrk = n*n + wrkbl
442  minwrk = max( 3*n+m, bdspac )
443  END IF
444  ELSE
445 *
446 * Path 10 (M at least N, but not much larger)
447 *
448  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
449  $ dum(1), dum(1), -1, ierr )
450  lwork_sgebrd=dum(1)
451  maxwrk = 3*n + lwork_sgebrd
452  IF( wntus .OR. wntuo ) THEN
453  CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
454  $ dum(1), -1, ierr )
455  lwork_sorgbr_q=dum(1)
456  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
457  END IF
458  IF( wntua ) THEN
459  CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
460  $ dum(1), -1, ierr )
461  lwork_sorgbr_q=dum(1)
462  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
463  END IF
464  IF( .NOT.wntvn ) THEN
465  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
466  END IF
467  maxwrk = max( maxwrk, bdspac )
468  minwrk = max( 3*n+m, bdspac )
469  END IF
470  ELSE IF( minmn.GT.0 ) THEN
471 *
472 * Compute space needed for SBDSQR
473 *
474  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
475  bdspac = 5*m
476 * Compute space needed for SGELQF
477  CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
478  lwork_sgelqf=dum(1)
479 * Compute space needed for SORGLQ
480  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
481  lwork_sorglq_n=dum(1)
482  CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
483  lwork_sorglq_m=dum(1)
484 * Compute space needed for SGEBRD
485  CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
486  $ dum(1), dum(1), -1, ierr )
487  lwork_sgebrd=dum(1)
488 * Compute space needed for SORGBR P
489  CALL sorgbr( 'P', m, m, m, a, n, dum(1),
490  $ dum(1), -1, ierr )
491  lwork_sorgbr_p=dum(1)
492 * Compute space needed for SORGBR Q
493  CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
494  $ dum(1), -1, ierr )
495  lwork_sorgbr_q=dum(1)
496  IF( n.GE.mnthr ) THEN
497  IF( wntvn ) THEN
498 *
499 * Path 1t(N much larger than M, JOBVT='N')
500 *
501  maxwrk = m + lwork_sgelqf
502  maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
503  IF( wntuo .OR. wntuas )
504  $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
505  maxwrk = max( maxwrk, bdspac )
506  minwrk = max( 4*m, bdspac )
507  ELSE IF( wntvo .AND. wntun ) THEN
508 *
509 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510 *
511  wrkbl = m + lwork_sgelqf
512  wrkbl = max( wrkbl, m+lwork_sorglq_m )
513  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
514  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
515  wrkbl = max( wrkbl, bdspac )
516  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
517  minwrk = max( 3*m+n, bdspac )
518  ELSE IF( wntvo .AND. wntuas ) THEN
519 *
520 * Path 3t(N much larger than M, JOBU='S' or 'A',
521 * JOBVT='O')
522 *
523  wrkbl = m + lwork_sgelqf
524  wrkbl = max( wrkbl, m+lwork_sorglq_m )
525  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
526  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
527  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
528  wrkbl = max( wrkbl, bdspac )
529  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
530  minwrk = max( 3*m+n, bdspac )
531  ELSE IF( wntvs .AND. wntun ) THEN
532 *
533 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534 *
535  wrkbl = m + lwork_sgelqf
536  wrkbl = max( wrkbl, m+lwork_sorglq_m )
537  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
538  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
539  wrkbl = max( wrkbl, bdspac )
540  maxwrk = m*m + wrkbl
541  minwrk = max( 3*m+n, bdspac )
542  ELSE IF( wntvs .AND. wntuo ) THEN
543 *
544 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545 *
546  wrkbl = m + lwork_sgelqf
547  wrkbl = max( wrkbl, m+lwork_sorglq_m )
548  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
549  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
550  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
551  wrkbl = max( wrkbl, bdspac )
552  maxwrk = 2*m*m + wrkbl
553  minwrk = max( 3*m+n, bdspac )
554  maxwrk = max( maxwrk, minwrk )
555  ELSE IF( wntvs .AND. wntuas ) THEN
556 *
557 * Path 6t(N much larger than M, JOBU='S' or 'A',
558 * JOBVT='S')
559 *
560  wrkbl = m + lwork_sgelqf
561  wrkbl = max( wrkbl, m+lwork_sorglq_m )
562  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
563  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
564  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
565  wrkbl = max( wrkbl, bdspac )
566  maxwrk = m*m + wrkbl
567  minwrk = max( 3*m+n, bdspac )
568  ELSE IF( wntva .AND. wntun ) THEN
569 *
570 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
571 *
572  wrkbl = m + lwork_sgelqf
573  wrkbl = max( wrkbl, m+lwork_sorglq_n )
574  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
575  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
576  wrkbl = max( wrkbl, bdspac )
577  maxwrk = m*m + wrkbl
578  minwrk = max( 3*m+n, bdspac )
579  ELSE IF( wntva .AND. wntuo ) THEN
580 *
581 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
582 *
583  wrkbl = m + lwork_sgelqf
584  wrkbl = max( wrkbl, m+lwork_sorglq_n )
585  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
586  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
587  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
588  wrkbl = max( wrkbl, bdspac )
589  maxwrk = 2*m*m + wrkbl
590  minwrk = max( 3*m+n, bdspac )
591  ELSE IF( wntva .AND. wntuas ) THEN
592 *
593 * Path 9t(N much larger than M, JOBU='S' or 'A',
594 * JOBVT='A')
595 *
596  wrkbl = m + lwork_sgelqf
597  wrkbl = max( wrkbl, m+lwork_sorglq_n )
598  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
599  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
600  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
601  wrkbl = max( wrkbl, bdspac )
602  maxwrk = m*m + wrkbl
603  minwrk = max( 3*m+n, bdspac )
604  END IF
605  ELSE
606 *
607 * Path 10t(N greater than M, but not much larger)
608 *
609  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
610  $ dum(1), dum(1), -1, ierr )
611  lwork_sgebrd=dum(1)
612  maxwrk = 3*m + lwork_sgebrd
613  IF( wntvs .OR. wntvo ) THEN
614 * Compute space needed for SORGBR P
615  CALL sorgbr( 'P', m, n, m, a, n, dum(1),
616  $ dum(1), -1, ierr )
617  lwork_sorgbr_p=dum(1)
618  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
619  END IF
620  IF( wntva ) THEN
621  CALL sorgbr( 'P', n, n, m, a, n, dum(1),
622  $ dum(1), -1, ierr )
623  lwork_sorgbr_p=dum(1)
624  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
625  END IF
626  IF( .NOT.wntun ) THEN
627  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
628  END IF
629  maxwrk = max( maxwrk, bdspac )
630  minwrk = max( 3*m+n, bdspac )
631  END IF
632  END IF
633  maxwrk = max( maxwrk, minwrk )
634  work( 1 ) = maxwrk
635 *
636  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
637  info = -13
638  END IF
639  END IF
640 *
641  IF( info.NE.0 ) THEN
642  CALL xerbla( 'SGESVD', -info )
643  return
644  ELSE IF( lquery ) THEN
645  return
646  END IF
647 *
648 * Quick return if possible
649 *
650  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
651  return
652  END IF
653 *
654 * Get machine constants
655 *
656  eps = slamch( 'P' )
657  smlnum = sqrt( slamch( 'S' ) ) / eps
658  bignum = one / smlnum
659 *
660 * Scale A if max element outside range [SMLNUM,BIGNUM]
661 *
662  anrm = slange( 'M', m, n, a, lda, dum )
663  iscl = 0
664  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
665  iscl = 1
666  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
667  ELSE IF( anrm.GT.bignum ) THEN
668  iscl = 1
669  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
670  END IF
671 *
672  IF( m.GE.n ) THEN
673 *
674 * A has at least as many rows as columns. If A has sufficiently
675 * more rows than columns, first reduce using the QR
676 * decomposition (if sufficient workspace available)
677 *
678  IF( m.GE.mnthr ) THEN
679 *
680  IF( wntun ) THEN
681 *
682 * Path 1 (M much larger than N, JOBU='N')
683 * No left singular vectors to be computed
684 *
685  itau = 1
686  iwork = itau + n
687 *
688 * Compute A=Q*R
689 * (Workspace: need 2*N, prefer N+N*NB)
690 *
691  CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
692  $ lwork-iwork+1, ierr )
693 *
694 * Zero out below R
695 *
696  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
697  ie = 1
698  itauq = ie + n
699  itaup = itauq + n
700  iwork = itaup + n
701 *
702 * Bidiagonalize R in A
703 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
704 *
705  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706  $ work( itaup ), work( iwork ), lwork-iwork+1,
707  $ ierr )
708  ncvt = 0
709  IF( wntvo .OR. wntvas ) THEN
710 *
711 * If right singular vectors desired, generate P'.
712 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713 *
714  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715  $ work( iwork ), lwork-iwork+1, ierr )
716  ncvt = n
717  END IF
718  iwork = ie + n
719 *
720 * Perform bidiagonal QR iteration, computing right
721 * singular vectors of A in A if desired
722 * (Workspace: need BDSPAC)
723 *
724  CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725  $ dum, 1, dum, 1, work( iwork ), info )
726 *
727 * If right singular vectors desired in VT, copy them there
728 *
729  IF( wntvas )
730  $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731 *
732  ELSE IF( wntuo .AND. wntvn ) THEN
733 *
734 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735 * N left singular vectors to be overwritten on A and
736 * no right singular vectors to be computed
737 *
738  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739 *
740 * Sufficient workspace for a fast algorithm
741 *
742  ir = 1
743  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744 *
745 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
746 *
747  ldwrku = lda
748  ldwrkr = lda
749  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750 *
751 * WORK(IU) is LDA by N, WORK(IR) is N by N
752 *
753  ldwrku = lda
754  ldwrkr = n
755  ELSE
756 *
757 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758 *
759  ldwrku = ( lwork-n*n-n ) / n
760  ldwrkr = n
761  END IF
762  itau = ir + ldwrkr*n
763  iwork = itau + n
764 *
765 * Compute A=Q*R
766 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767 *
768  CALL sgeqrf( m, n, a, lda, work( itau ),
769  $ work( iwork ), lwork-iwork+1, ierr )
770 *
771 * Copy R to WORK(IR) and zero out below it
772 *
773  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774  CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775  $ ldwrkr )
776 *
777 * Generate Q in A
778 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779 *
780  CALL sorgqr( m, n, n, a, lda, work( itau ),
781  $ work( iwork ), lwork-iwork+1, ierr )
782  ie = itau
783  itauq = ie + n
784  itaup = itauq + n
785  iwork = itaup + n
786 *
787 * Bidiagonalize R in WORK(IR)
788 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789 *
790  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791  $ work( itauq ), work( itaup ),
792  $ work( iwork ), lwork-iwork+1, ierr )
793 *
794 * Generate left vectors bidiagonalizing R
795 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796 *
797  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798  $ work( itauq ), work( iwork ),
799  $ lwork-iwork+1, ierr )
800  iwork = ie + n
801 *
802 * Perform bidiagonal QR iteration, computing left
803 * singular vectors of R in WORK(IR)
804 * (Workspace: need N*N+BDSPAC)
805 *
806  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807  $ work( ir ), ldwrkr, dum, 1,
808  $ work( iwork ), info )
809  iu = ie + n
810 *
811 * Multiply Q in A by left singular vectors of R in
812 * WORK(IR), storing result in WORK(IU) and copying to A
813 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814 *
815  DO 10 i = 1, m, ldwrku
816  chunk = min( m-i+1, ldwrku )
817  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818  $ lda, work( ir ), ldwrkr, zero,
819  $ work( iu ), ldwrku )
820  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821  $ a( i, 1 ), lda )
822  10 continue
823 *
824  ELSE
825 *
826 * Insufficient workspace for a fast algorithm
827 *
828  ie = 1
829  itauq = ie + n
830  itaup = itauq + n
831  iwork = itaup + n
832 *
833 * Bidiagonalize A
834 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835 *
836  CALL sgebrd( m, n, a, lda, s, work( ie ),
837  $ work( itauq ), work( itaup ),
838  $ work( iwork ), lwork-iwork+1, ierr )
839 *
840 * Generate left vectors bidiagonalizing A
841 * (Workspace: need 4*N, prefer 3*N+N*NB)
842 *
843  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844  $ work( iwork ), lwork-iwork+1, ierr )
845  iwork = ie + n
846 *
847 * Perform bidiagonal QR iteration, computing left
848 * singular vectors of A in A
849 * (Workspace: need BDSPAC)
850 *
851  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852  $ a, lda, dum, 1, work( iwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n-n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891 *
892  CALL sgeqrf( m, n, a, lda, work( itau ),
893  $ work( iwork ), lwork-iwork+1, ierr )
894 *
895 * Copy R to VT, zeroing out below it
896 *
897  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898  IF( n.GT.1 )
899  $ CALL slaset( 'L', n-1, n-1, zero, zero,
900  $ vt( 2, 1 ), ldvt )
901 *
902 * Generate Q in A
903 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904 *
905  CALL sorgqr( m, n, n, a, lda, work( itau ),
906  $ work( iwork ), lwork-iwork+1, ierr )
907  ie = itau
908  itauq = ie + n
909  itaup = itauq + n
910  iwork = itaup + n
911 *
912 * Bidiagonalize R in VT, copying result to WORK(IR)
913 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914 *
915  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916  $ work( itauq ), work( itaup ),
917  $ work( iwork ), lwork-iwork+1, ierr )
918  CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919 *
920 * Generate left vectors bidiagonalizing R in WORK(IR)
921 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922 *
923  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924  $ work( itauq ), work( iwork ),
925  $ lwork-iwork+1, ierr )
926 *
927 * Generate right vectors bidiagonalizing R in VT
928 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929 *
930  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931  $ work( iwork ), lwork-iwork+1, ierr )
932  iwork = ie + n
933 *
934 * Perform bidiagonal QR iteration, computing left
935 * singular vectors of R in WORK(IR) and computing right
936 * singular vectors of R in VT
937 * (Workspace: need N*N+BDSPAC)
938 *
939  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940  $ work( ir ), ldwrkr, dum, 1,
941  $ work( iwork ), info )
942  iu = ie + n
943 *
944 * Multiply Q in A by left singular vectors of R in
945 * WORK(IR), storing result in WORK(IU) and copying to A
946 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947 *
948  DO 20 i = 1, m, ldwrku
949  chunk = min( m-i+1, ldwrku )
950  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951  $ lda, work( ir ), ldwrkr, zero,
952  $ work( iu ), ldwrku )
953  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954  $ a( i, 1 ), lda )
955  20 continue
956 *
957  ELSE
958 *
959 * Insufficient workspace for a fast algorithm
960 *
961  itau = 1
962  iwork = itau + n
963 *
964 * Compute A=Q*R
965 * (Workspace: need 2*N, prefer N+N*NB)
966 *
967  CALL sgeqrf( m, n, a, lda, work( itau ),
968  $ work( iwork ), lwork-iwork+1, ierr )
969 *
970 * Copy R to VT, zeroing out below it
971 *
972  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973  IF( n.GT.1 )
974  $ CALL slaset( 'L', n-1, n-1, zero, zero,
975  $ vt( 2, 1 ), ldvt )
976 *
977 * Generate Q in A
978 * (Workspace: need 2*N, prefer N+N*NB)
979 *
980  CALL sorgqr( m, n, n, a, lda, work( itau ),
981  $ work( iwork ), lwork-iwork+1, ierr )
982  ie = itau
983  itauq = ie + n
984  itaup = itauq + n
985  iwork = itaup + n
986 *
987 * Bidiagonalize R in VT
988 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
989 *
990  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991  $ work( itauq ), work( itaup ),
992  $ work( iwork ), lwork-iwork+1, ierr )
993 *
994 * Multiply Q in A by left vectors bidiagonalizing R
995 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
996 *
997  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998  $ work( itauq ), a, lda, work( iwork ),
999  $ lwork-iwork+1, ierr )
1000 *
1001 * Generate right vectors bidiagonalizing R in VT
1002 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003 *
1004  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005  $ work( iwork ), lwork-iwork+1, ierr )
1006  iwork = ie + n
1007 *
1008 * Perform bidiagonal QR iteration, computing left
1009 * singular vectors of A in A and computing right
1010 * singular vectors of A in VT
1011 * (Workspace: need BDSPAC)
1012 *
1013  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014  $ a, lda, dum, 1, work( iwork ), info )
1015 *
1016  END IF
1017 *
1018  ELSE IF( wntus ) THEN
1019 *
1020  IF( wntvn ) THEN
1021 *
1022 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023 * N left singular vectors to be computed in U and
1024 * no right singular vectors to be computed
1025 *
1026  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027 *
1028 * Sufficient workspace for a fast algorithm
1029 *
1030  ir = 1
1031  IF( lwork.GE.wrkbl+lda*n ) THEN
1032 *
1033 * WORK(IR) is LDA by N
1034 *
1035  ldwrkr = lda
1036  ELSE
1037 *
1038 * WORK(IR) is N by N
1039 *
1040  ldwrkr = n
1041  END IF
1042  itau = ir + ldwrkr*n
1043  iwork = itau + n
1044 *
1045 * Compute A=Q*R
1046 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047 *
1048  CALL sgeqrf( m, n, a, lda, work( itau ),
1049  $ work( iwork ), lwork-iwork+1, ierr )
1050 *
1051 * Copy R to WORK(IR), zeroing out below it
1052 *
1053  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054  $ ldwrkr )
1055  CALL slaset( 'L', n-1, n-1, zero, zero,
1056  $ work( ir+1 ), ldwrkr )
1057 *
1058 * Generate Q in A
1059 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060 *
1061  CALL sorgqr( m, n, n, a, lda, work( itau ),
1062  $ work( iwork ), lwork-iwork+1, ierr )
1063  ie = itau
1064  itauq = ie + n
1065  itaup = itauq + n
1066  iwork = itaup + n
1067 *
1068 * Bidiagonalize R in WORK(IR)
1069 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070 *
1071  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072  $ work( ie ), work( itauq ),
1073  $ work( itaup ), work( iwork ),
1074  $ lwork-iwork+1, ierr )
1075 *
1076 * Generate left vectors bidiagonalizing R in WORK(IR)
1077 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078 *
1079  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080  $ work( itauq ), work( iwork ),
1081  $ lwork-iwork+1, ierr )
1082  iwork = ie + n
1083 *
1084 * Perform bidiagonal QR iteration, computing left
1085 * singular vectors of R in WORK(IR)
1086 * (Workspace: need N*N+BDSPAC)
1087 *
1088  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089  $ 1, work( ir ), ldwrkr, dum, 1,
1090  $ work( iwork ), info )
1091 *
1092 * Multiply Q in A by left singular vectors of R in
1093 * WORK(IR), storing result in U
1094 * (Workspace: need N*N)
1095 *
1096  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097  $ work( ir ), ldwrkr, zero, u, ldu )
1098 *
1099  ELSE
1100 *
1101 * Insufficient workspace for a fast algorithm
1102 *
1103  itau = 1
1104  iwork = itau + n
1105 *
1106 * Compute A=Q*R, copying result to U
1107 * (Workspace: need 2*N, prefer N+N*NB)
1108 *
1109  CALL sgeqrf( m, n, a, lda, work( itau ),
1110  $ work( iwork ), lwork-iwork+1, ierr )
1111  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112 *
1113 * Generate Q in U
1114 * (Workspace: need 2*N, prefer N+N*NB)
1115 *
1116  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117  $ work( iwork ), lwork-iwork+1, ierr )
1118  ie = itau
1119  itauq = ie + n
1120  itaup = itauq + n
1121  iwork = itaup + n
1122 *
1123 * Zero out below R in A
1124 *
1125  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1126  $ lda )
1127 *
1128 * Bidiagonalize R in A
1129 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1130 *
1131  CALL sgebrd( n, n, a, lda, s, work( ie ),
1132  $ work( itauq ), work( itaup ),
1133  $ work( iwork ), lwork-iwork+1, ierr )
1134 *
1135 * Multiply Q in U by left vectors bidiagonalizing R
1136 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1137 *
1138  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1139  $ work( itauq ), u, ldu, work( iwork ),
1140  $ lwork-iwork+1, ierr )
1141  iwork = ie + n
1142 *
1143 * Perform bidiagonal QR iteration, computing left
1144 * singular vectors of A in U
1145 * (Workspace: need BDSPAC)
1146 *
1147  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1148  $ 1, u, ldu, dum, 1, work( iwork ),
1149  $ info )
1150 *
1151  END IF
1152 *
1153  ELSE IF( wntvo ) THEN
1154 *
1155 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1156 * N left singular vectors to be computed in U and
1157 * N right singular vectors to be overwritten on A
1158 *
1159  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1160 *
1161 * Sufficient workspace for a fast algorithm
1162 *
1163  iu = 1
1164  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1165 *
1166 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1167 *
1168  ldwrku = lda
1169  ir = iu + ldwrku*n
1170  ldwrkr = lda
1171  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1172 *
1173 * WORK(IU) is LDA by N and WORK(IR) is N by N
1174 *
1175  ldwrku = lda
1176  ir = iu + ldwrku*n
1177  ldwrkr = n
1178  ELSE
1179 *
1180 * WORK(IU) is N by N and WORK(IR) is N by N
1181 *
1182  ldwrku = n
1183  ir = iu + ldwrku*n
1184  ldwrkr = n
1185  END IF
1186  itau = ir + ldwrkr*n
1187  iwork = itau + n
1188 *
1189 * Compute A=Q*R
1190 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1191 *
1192  CALL sgeqrf( m, n, a, lda, work( itau ),
1193  $ work( iwork ), lwork-iwork+1, ierr )
1194 *
1195 * Copy R to WORK(IU), zeroing out below it
1196 *
1197  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1198  $ ldwrku )
1199  CALL slaset( 'L', n-1, n-1, zero, zero,
1200  $ work( iu+1 ), ldwrku )
1201 *
1202 * Generate Q in A
1203 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1204 *
1205  CALL sorgqr( m, n, n, a, lda, work( itau ),
1206  $ work( iwork ), lwork-iwork+1, ierr )
1207  ie = itau
1208  itauq = ie + n
1209  itaup = itauq + n
1210  iwork = itaup + n
1211 *
1212 * Bidiagonalize R in WORK(IU), copying result to
1213 * WORK(IR)
1214 * (Workspace: need 2*N*N+4*N,
1215 * prefer 2*N*N+3*N+2*N*NB)
1216 *
1217  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1218  $ work( ie ), work( itauq ),
1219  $ work( itaup ), work( iwork ),
1220  $ lwork-iwork+1, ierr )
1221  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1222  $ work( ir ), ldwrkr )
1223 *
1224 * Generate left bidiagonalizing vectors in WORK(IU)
1225 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1226 *
1227  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1228  $ work( itauq ), work( iwork ),
1229  $ lwork-iwork+1, ierr )
1230 *
1231 * Generate right bidiagonalizing vectors in WORK(IR)
1232 * (Workspace: need 2*N*N+4*N-1,
1233 * prefer 2*N*N+3*N+(N-1)*NB)
1234 *
1235  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1236  $ work( itaup ), work( iwork ),
1237  $ lwork-iwork+1, ierr )
1238  iwork = ie + n
1239 *
1240 * Perform bidiagonal QR iteration, computing left
1241 * singular vectors of R in WORK(IU) and computing
1242 * right singular vectors of R in WORK(IR)
1243 * (Workspace: need 2*N*N+BDSPAC)
1244 *
1245  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1246  $ work( ir ), ldwrkr, work( iu ),
1247  $ ldwrku, dum, 1, work( iwork ), info )
1248 *
1249 * Multiply Q in A by left singular vectors of R in
1250 * WORK(IU), storing result in U
1251 * (Workspace: need N*N)
1252 *
1253  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1254  $ work( iu ), ldwrku, zero, u, ldu )
1255 *
1256 * Copy right singular vectors of R to A
1257 * (Workspace: need N*N)
1258 *
1259  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1260  $ lda )
1261 *
1262  ELSE
1263 *
1264 * Insufficient workspace for a fast algorithm
1265 *
1266  itau = 1
1267  iwork = itau + n
1268 *
1269 * Compute A=Q*R, copying result to U
1270 * (Workspace: need 2*N, prefer N+N*NB)
1271 *
1272  CALL sgeqrf( m, n, a, lda, work( itau ),
1273  $ work( iwork ), lwork-iwork+1, ierr )
1274  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1275 *
1276 * Generate Q in U
1277 * (Workspace: need 2*N, prefer N+N*NB)
1278 *
1279  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1280  $ work( iwork ), lwork-iwork+1, ierr )
1281  ie = itau
1282  itauq = ie + n
1283  itaup = itauq + n
1284  iwork = itaup + n
1285 *
1286 * Zero out below R in A
1287 *
1288  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1289  $ lda )
1290 *
1291 * Bidiagonalize R in A
1292 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1293 *
1294  CALL sgebrd( n, n, a, lda, s, work( ie ),
1295  $ work( itauq ), work( itaup ),
1296  $ work( iwork ), lwork-iwork+1, ierr )
1297 *
1298 * Multiply Q in U by left vectors bidiagonalizing R
1299 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1300 *
1301  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1302  $ work( itauq ), u, ldu, work( iwork ),
1303  $ lwork-iwork+1, ierr )
1304 *
1305 * Generate right vectors bidiagonalizing R in A
1306 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1307 *
1308  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1309  $ work( iwork ), lwork-iwork+1, ierr )
1310  iwork = ie + n
1311 *
1312 * Perform bidiagonal QR iteration, computing left
1313 * singular vectors of A in U and computing right
1314 * singular vectors of A in A
1315 * (Workspace: need BDSPAC)
1316 *
1317  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1318  $ lda, u, ldu, dum, 1, work( iwork ),
1319  $ info )
1320 *
1321  END IF
1322 *
1323  ELSE IF( wntvas ) THEN
1324 *
1325 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1326 * or 'A')
1327 * N left singular vectors to be computed in U and
1328 * N right singular vectors to be computed in VT
1329 *
1330  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1331 *
1332 * Sufficient workspace for a fast algorithm
1333 *
1334  iu = 1
1335  IF( lwork.GE.wrkbl+lda*n ) THEN
1336 *
1337 * WORK(IU) is LDA by N
1338 *
1339  ldwrku = lda
1340  ELSE
1341 *
1342 * WORK(IU) is N by N
1343 *
1344  ldwrku = n
1345  END IF
1346  itau = iu + ldwrku*n
1347  iwork = itau + n
1348 *
1349 * Compute A=Q*R
1350 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1351 *
1352  CALL sgeqrf( m, n, a, lda, work( itau ),
1353  $ work( iwork ), lwork-iwork+1, ierr )
1354 *
1355 * Copy R to WORK(IU), zeroing out below it
1356 *
1357  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1358  $ ldwrku )
1359  CALL slaset( 'L', n-1, n-1, zero, zero,
1360  $ work( iu+1 ), ldwrku )
1361 *
1362 * Generate Q in A
1363 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1364 *
1365  CALL sorgqr( m, n, n, a, lda, work( itau ),
1366  $ work( iwork ), lwork-iwork+1, ierr )
1367  ie = itau
1368  itauq = ie + n
1369  itaup = itauq + n
1370  iwork = itaup + n
1371 *
1372 * Bidiagonalize R in WORK(IU), copying result to VT
1373 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1374 *
1375  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1376  $ work( ie ), work( itauq ),
1377  $ work( itaup ), work( iwork ),
1378  $ lwork-iwork+1, ierr )
1379  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1380  $ ldvt )
1381 *
1382 * Generate left bidiagonalizing vectors in WORK(IU)
1383 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1384 *
1385  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1386  $ work( itauq ), work( iwork ),
1387  $ lwork-iwork+1, ierr )
1388 *
1389 * Generate right bidiagonalizing vectors in VT
1390 * (Workspace: need N*N+4*N-1,
1391 * prefer N*N+3*N+(N-1)*NB)
1392 *
1393  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1394  $ work( iwork ), lwork-iwork+1, ierr )
1395  iwork = ie + n
1396 *
1397 * Perform bidiagonal QR iteration, computing left
1398 * singular vectors of R in WORK(IU) and computing
1399 * right singular vectors of R in VT
1400 * (Workspace: need N*N+BDSPAC)
1401 *
1402  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1403  $ ldvt, work( iu ), ldwrku, dum, 1,
1404  $ work( iwork ), info )
1405 *
1406 * Multiply Q in A by left singular vectors of R in
1407 * WORK(IU), storing result in U
1408 * (Workspace: need N*N)
1409 *
1410  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1411  $ work( iu ), ldwrku, zero, u, ldu )
1412 *
1413  ELSE
1414 *
1415 * Insufficient workspace for a fast algorithm
1416 *
1417  itau = 1
1418  iwork = itau + n
1419 *
1420 * Compute A=Q*R, copying result to U
1421 * (Workspace: need 2*N, prefer N+N*NB)
1422 *
1423  CALL sgeqrf( m, n, a, lda, work( itau ),
1424  $ work( iwork ), lwork-iwork+1, ierr )
1425  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1426 *
1427 * Generate Q in U
1428 * (Workspace: need 2*N, prefer N+N*NB)
1429 *
1430  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1431  $ work( iwork ), lwork-iwork+1, ierr )
1432 *
1433 * Copy R to VT, zeroing out below it
1434 *
1435  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1436  IF( n.GT.1 )
1437  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1438  $ vt( 2, 1 ), ldvt )
1439  ie = itau
1440  itauq = ie + n
1441  itaup = itauq + n
1442  iwork = itaup + n
1443 *
1444 * Bidiagonalize R in VT
1445 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1446 *
1447  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1448  $ work( itauq ), work( itaup ),
1449  $ work( iwork ), lwork-iwork+1, ierr )
1450 *
1451 * Multiply Q in U by left bidiagonalizing vectors
1452 * in VT
1453 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1454 *
1455  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1456  $ work( itauq ), u, ldu, work( iwork ),
1457  $ lwork-iwork+1, ierr )
1458 *
1459 * Generate right bidiagonalizing vectors in VT
1460 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1461 *
1462  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1463  $ work( iwork ), lwork-iwork+1, ierr )
1464  iwork = ie + n
1465 *
1466 * Perform bidiagonal QR iteration, computing left
1467 * singular vectors of A in U and computing right
1468 * singular vectors of A in VT
1469 * (Workspace: need BDSPAC)
1470 *
1471  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1472  $ ldvt, u, ldu, dum, 1, work( iwork ),
1473  $ info )
1474 *
1475  END IF
1476 *
1477  END IF
1478 *
1479  ELSE IF( wntua ) THEN
1480 *
1481  IF( wntvn ) THEN
1482 *
1483 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1484 * M left singular vectors to be computed in U and
1485 * no right singular vectors to be computed
1486 *
1487  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1488 *
1489 * Sufficient workspace for a fast algorithm
1490 *
1491  ir = 1
1492  IF( lwork.GE.wrkbl+lda*n ) THEN
1493 *
1494 * WORK(IR) is LDA by N
1495 *
1496  ldwrkr = lda
1497  ELSE
1498 *
1499 * WORK(IR) is N by N
1500 *
1501  ldwrkr = n
1502  END IF
1503  itau = ir + ldwrkr*n
1504  iwork = itau + n
1505 *
1506 * Compute A=Q*R, copying result to U
1507 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1508 *
1509  CALL sgeqrf( m, n, a, lda, work( itau ),
1510  $ work( iwork ), lwork-iwork+1, ierr )
1511  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1512 *
1513 * Copy R to WORK(IR), zeroing out below it
1514 *
1515  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1516  $ ldwrkr )
1517  CALL slaset( 'L', n-1, n-1, zero, zero,
1518  $ work( ir+1 ), ldwrkr )
1519 *
1520 * Generate Q in U
1521 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1522 *
1523  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1524  $ work( iwork ), lwork-iwork+1, ierr )
1525  ie = itau
1526  itauq = ie + n
1527  itaup = itauq + n
1528  iwork = itaup + n
1529 *
1530 * Bidiagonalize R in WORK(IR)
1531 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1532 *
1533  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1534  $ work( ie ), work( itauq ),
1535  $ work( itaup ), work( iwork ),
1536  $ lwork-iwork+1, ierr )
1537 *
1538 * Generate left bidiagonalizing vectors in WORK(IR)
1539 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1540 *
1541  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1542  $ work( itauq ), work( iwork ),
1543  $ lwork-iwork+1, ierr )
1544  iwork = ie + n
1545 *
1546 * Perform bidiagonal QR iteration, computing left
1547 * singular vectors of R in WORK(IR)
1548 * (Workspace: need N*N+BDSPAC)
1549 *
1550  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1551  $ 1, work( ir ), ldwrkr, dum, 1,
1552  $ work( iwork ), info )
1553 *
1554 * Multiply Q in U by left singular vectors of R in
1555 * WORK(IR), storing result in A
1556 * (Workspace: need N*N)
1557 *
1558  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1559  $ work( ir ), ldwrkr, zero, a, lda )
1560 *
1561 * Copy left singular vectors of A from A to U
1562 *
1563  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1564 *
1565  ELSE
1566 *
1567 * Insufficient workspace for a fast algorithm
1568 *
1569  itau = 1
1570  iwork = itau + n
1571 *
1572 * Compute A=Q*R, copying result to U
1573 * (Workspace: need 2*N, prefer N+N*NB)
1574 *
1575  CALL sgeqrf( m, n, a, lda, work( itau ),
1576  $ work( iwork ), lwork-iwork+1, ierr )
1577  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1578 *
1579 * Generate Q in U
1580 * (Workspace: need N+M, prefer N+M*NB)
1581 *
1582  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1583  $ work( iwork ), lwork-iwork+1, ierr )
1584  ie = itau
1585  itauq = ie + n
1586  itaup = itauq + n
1587  iwork = itaup + n
1588 *
1589 * Zero out below R in A
1590 *
1591  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1592  $ lda )
1593 *
1594 * Bidiagonalize R in A
1595 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1596 *
1597  CALL sgebrd( n, n, a, lda, s, work( ie ),
1598  $ work( itauq ), work( itaup ),
1599  $ work( iwork ), lwork-iwork+1, ierr )
1600 *
1601 * Multiply Q in U by left bidiagonalizing vectors
1602 * in A
1603 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1604 *
1605  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1606  $ work( itauq ), u, ldu, work( iwork ),
1607  $ lwork-iwork+1, ierr )
1608  iwork = ie + n
1609 *
1610 * Perform bidiagonal QR iteration, computing left
1611 * singular vectors of A in U
1612 * (Workspace: need BDSPAC)
1613 *
1614  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1615  $ 1, u, ldu, dum, 1, work( iwork ),
1616  $ info )
1617 *
1618  END IF
1619 *
1620  ELSE IF( wntvo ) THEN
1621 *
1622 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1623 * M left singular vectors to be computed in U and
1624 * N right singular vectors to be overwritten on A
1625 *
1626  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1627 *
1628 * Sufficient workspace for a fast algorithm
1629 *
1630  iu = 1
1631  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1632 *
1633 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1634 *
1635  ldwrku = lda
1636  ir = iu + ldwrku*n
1637  ldwrkr = lda
1638  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1639 *
1640 * WORK(IU) is LDA by N and WORK(IR) is N by N
1641 *
1642  ldwrku = lda
1643  ir = iu + ldwrku*n
1644  ldwrkr = n
1645  ELSE
1646 *
1647 * WORK(IU) is N by N and WORK(IR) is N by N
1648 *
1649  ldwrku = n
1650  ir = iu + ldwrku*n
1651  ldwrkr = n
1652  END IF
1653  itau = ir + ldwrkr*n
1654  iwork = itau + n
1655 *
1656 * Compute A=Q*R, copying result to U
1657 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1658 *
1659  CALL sgeqrf( m, n, a, lda, work( itau ),
1660  $ work( iwork ), lwork-iwork+1, ierr )
1661  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1662 *
1663 * Generate Q in U
1664 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1665 *
1666  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1667  $ work( iwork ), lwork-iwork+1, ierr )
1668 *
1669 * Copy R to WORK(IU), zeroing out below it
1670 *
1671  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1672  $ ldwrku )
1673  CALL slaset( 'L', n-1, n-1, zero, zero,
1674  $ work( iu+1 ), ldwrku )
1675  ie = itau
1676  itauq = ie + n
1677  itaup = itauq + n
1678  iwork = itaup + n
1679 *
1680 * Bidiagonalize R in WORK(IU), copying result to
1681 * WORK(IR)
1682 * (Workspace: need 2*N*N+4*N,
1683 * prefer 2*N*N+3*N+2*N*NB)
1684 *
1685  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1686  $ work( ie ), work( itauq ),
1687  $ work( itaup ), work( iwork ),
1688  $ lwork-iwork+1, ierr )
1689  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1690  $ work( ir ), ldwrkr )
1691 *
1692 * Generate left bidiagonalizing vectors in WORK(IU)
1693 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1694 *
1695  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1696  $ work( itauq ), work( iwork ),
1697  $ lwork-iwork+1, ierr )
1698 *
1699 * Generate right bidiagonalizing vectors in WORK(IR)
1700 * (Workspace: need 2*N*N+4*N-1,
1701 * prefer 2*N*N+3*N+(N-1)*NB)
1702 *
1703  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1704  $ work( itaup ), work( iwork ),
1705  $ lwork-iwork+1, ierr )
1706  iwork = ie + n
1707 *
1708 * Perform bidiagonal QR iteration, computing left
1709 * singular vectors of R in WORK(IU) and computing
1710 * right singular vectors of R in WORK(IR)
1711 * (Workspace: need 2*N*N+BDSPAC)
1712 *
1713  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1714  $ work( ir ), ldwrkr, work( iu ),
1715  $ ldwrku, dum, 1, work( iwork ), info )
1716 *
1717 * Multiply Q in U by left singular vectors of R in
1718 * WORK(IU), storing result in A
1719 * (Workspace: need N*N)
1720 *
1721  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1722  $ work( iu ), ldwrku, zero, a, lda )
1723 *
1724 * Copy left singular vectors of A from A to U
1725 *
1726  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1727 *
1728 * Copy right singular vectors of R from WORK(IR) to A
1729 *
1730  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1731  $ lda )
1732 *
1733  ELSE
1734 *
1735 * Insufficient workspace for a fast algorithm
1736 *
1737  itau = 1
1738  iwork = itau + n
1739 *
1740 * Compute A=Q*R, copying result to U
1741 * (Workspace: need 2*N, prefer N+N*NB)
1742 *
1743  CALL sgeqrf( m, n, a, lda, work( itau ),
1744  $ work( iwork ), lwork-iwork+1, ierr )
1745  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1746 *
1747 * Generate Q in U
1748 * (Workspace: need N+M, prefer N+M*NB)
1749 *
1750  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1751  $ work( iwork ), lwork-iwork+1, ierr )
1752  ie = itau
1753  itauq = ie + n
1754  itaup = itauq + n
1755  iwork = itaup + n
1756 *
1757 * Zero out below R in A
1758 *
1759  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1760  $ lda )
1761 *
1762 * Bidiagonalize R in A
1763 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1764 *
1765  CALL sgebrd( n, n, a, lda, s, work( ie ),
1766  $ work( itauq ), work( itaup ),
1767  $ work( iwork ), lwork-iwork+1, ierr )
1768 *
1769 * Multiply Q in U by left bidiagonalizing vectors
1770 * in A
1771 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1772 *
1773  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1774  $ work( itauq ), u, ldu, work( iwork ),
1775  $ lwork-iwork+1, ierr )
1776 *
1777 * Generate right bidiagonalizing vectors in A
1778 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1779 *
1780  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1781  $ work( iwork ), lwork-iwork+1, ierr )
1782  iwork = ie + n
1783 *
1784 * Perform bidiagonal QR iteration, computing left
1785 * singular vectors of A in U and computing right
1786 * singular vectors of A in A
1787 * (Workspace: need BDSPAC)
1788 *
1789  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1790  $ lda, u, ldu, dum, 1, work( iwork ),
1791  $ info )
1792 *
1793  END IF
1794 *
1795  ELSE IF( wntvas ) THEN
1796 *
1797 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1798 * or 'A')
1799 * M left singular vectors to be computed in U and
1800 * N right singular vectors to be computed in VT
1801 *
1802  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1803 *
1804 * Sufficient workspace for a fast algorithm
1805 *
1806  iu = 1
1807  IF( lwork.GE.wrkbl+lda*n ) THEN
1808 *
1809 * WORK(IU) is LDA by N
1810 *
1811  ldwrku = lda
1812  ELSE
1813 *
1814 * WORK(IU) is N by N
1815 *
1816  ldwrku = n
1817  END IF
1818  itau = iu + ldwrku*n
1819  iwork = itau + n
1820 *
1821 * Compute A=Q*R, copying result to U
1822 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1823 *
1824  CALL sgeqrf( m, n, a, lda, work( itau ),
1825  $ work( iwork ), lwork-iwork+1, ierr )
1826  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1827 *
1828 * Generate Q in U
1829 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1830 *
1831  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1832  $ work( iwork ), lwork-iwork+1, ierr )
1833 *
1834 * Copy R to WORK(IU), zeroing out below it
1835 *
1836  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1837  $ ldwrku )
1838  CALL slaset( 'L', n-1, n-1, zero, zero,
1839  $ work( iu+1 ), ldwrku )
1840  ie = itau
1841  itauq = ie + n
1842  itaup = itauq + n
1843  iwork = itaup + n
1844 *
1845 * Bidiagonalize R in WORK(IU), copying result to VT
1846 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1847 *
1848  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1849  $ work( ie ), work( itauq ),
1850  $ work( itaup ), work( iwork ),
1851  $ lwork-iwork+1, ierr )
1852  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1853  $ ldvt )
1854 *
1855 * Generate left bidiagonalizing vectors in WORK(IU)
1856 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1857 *
1858  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1859  $ work( itauq ), work( iwork ),
1860  $ lwork-iwork+1, ierr )
1861 *
1862 * Generate right bidiagonalizing vectors in VT
1863 * (Workspace: need N*N+4*N-1,
1864 * prefer N*N+3*N+(N-1)*NB)
1865 *
1866  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1867  $ work( iwork ), lwork-iwork+1, ierr )
1868  iwork = ie + n
1869 *
1870 * Perform bidiagonal QR iteration, computing left
1871 * singular vectors of R in WORK(IU) and computing
1872 * right singular vectors of R in VT
1873 * (Workspace: need N*N+BDSPAC)
1874 *
1875  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1876  $ ldvt, work( iu ), ldwrku, dum, 1,
1877  $ work( iwork ), info )
1878 *
1879 * Multiply Q in U by left singular vectors of R in
1880 * WORK(IU), storing result in A
1881 * (Workspace: need N*N)
1882 *
1883  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1884  $ work( iu ), ldwrku, zero, a, lda )
1885 *
1886 * Copy left singular vectors of A from A to U
1887 *
1888  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1889 *
1890  ELSE
1891 *
1892 * Insufficient workspace for a fast algorithm
1893 *
1894  itau = 1
1895  iwork = itau + n
1896 *
1897 * Compute A=Q*R, copying result to U
1898 * (Workspace: need 2*N, prefer N+N*NB)
1899 *
1900  CALL sgeqrf( m, n, a, lda, work( itau ),
1901  $ work( iwork ), lwork-iwork+1, ierr )
1902  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1903 *
1904 * Generate Q in U
1905 * (Workspace: need N+M, prefer N+M*NB)
1906 *
1907  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1908  $ work( iwork ), lwork-iwork+1, ierr )
1909 *
1910 * Copy R from A to VT, zeroing out below it
1911 *
1912  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1913  IF( n.GT.1 )
1914  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1915  $ vt( 2, 1 ), ldvt )
1916  ie = itau
1917  itauq = ie + n
1918  itaup = itauq + n
1919  iwork = itaup + n
1920 *
1921 * Bidiagonalize R in VT
1922 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1923 *
1924  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1925  $ work( itauq ), work( itaup ),
1926  $ work( iwork ), lwork-iwork+1, ierr )
1927 *
1928 * Multiply Q in U by left bidiagonalizing vectors
1929 * in VT
1930 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1931 *
1932  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1933  $ work( itauq ), u, ldu, work( iwork ),
1934  $ lwork-iwork+1, ierr )
1935 *
1936 * Generate right bidiagonalizing vectors in VT
1937 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1938 *
1939  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1940  $ work( iwork ), lwork-iwork+1, ierr )
1941  iwork = ie + n
1942 *
1943 * Perform bidiagonal QR iteration, computing left
1944 * singular vectors of A in U and computing right
1945 * singular vectors of A in VT
1946 * (Workspace: need BDSPAC)
1947 *
1948  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1949  $ ldvt, u, ldu, dum, 1, work( iwork ),
1950  $ info )
1951 *
1952  END IF
1953 *
1954  END IF
1955 *
1956  END IF
1957 *
1958  ELSE
1959 *
1960 * M .LT. MNTHR
1961 *
1962 * Path 10 (M at least N, but not much larger)
1963 * Reduce to bidiagonal form without QR decomposition
1964 *
1965  ie = 1
1966  itauq = ie + n
1967  itaup = itauq + n
1968  iwork = itaup + n
1969 *
1970 * Bidiagonalize A
1971 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1972 *
1973  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1974  $ work( itaup ), work( iwork ), lwork-iwork+1,
1975  $ ierr )
1976  IF( wntuas ) THEN
1977 *
1978 * If left singular vectors desired in U, copy result to U
1979 * and generate left bidiagonalizing vectors in U
1980 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1981 *
1982  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1983  IF( wntus )
1984  $ ncu = n
1985  IF( wntua )
1986  $ ncu = m
1987  CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1988  $ work( iwork ), lwork-iwork+1, ierr )
1989  END IF
1990  IF( wntvas ) THEN
1991 *
1992 * If right singular vectors desired in VT, copy result to
1993 * VT and generate right bidiagonalizing vectors in VT
1994 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1995 *
1996  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1997  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1998  $ work( iwork ), lwork-iwork+1, ierr )
1999  END IF
2000  IF( wntuo ) THEN
2001 *
2002 * If left singular vectors desired in A, generate left
2003 * bidiagonalizing vectors in A
2004 * (Workspace: need 4*N, prefer 3*N+N*NB)
2005 *
2006  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2007  $ work( iwork ), lwork-iwork+1, ierr )
2008  END IF
2009  IF( wntvo ) THEN
2010 *
2011 * If right singular vectors desired in A, generate right
2012 * bidiagonalizing vectors in A
2013 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2014 *
2015  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2016  $ work( iwork ), lwork-iwork+1, ierr )
2017  END IF
2018  iwork = ie + n
2019  IF( wntuas .OR. wntuo )
2020  $ nru = m
2021  IF( wntun )
2022  $ nru = 0
2023  IF( wntvas .OR. wntvo )
2024  $ ncvt = n
2025  IF( wntvn )
2026  $ ncvt = 0
2027  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2028 *
2029 * Perform bidiagonal QR iteration, if desired, computing
2030 * left singular vectors in U and computing right singular
2031 * vectors in VT
2032 * (Workspace: need BDSPAC)
2033 *
2034  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2035  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2036  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2037 *
2038 * Perform bidiagonal QR iteration, if desired, computing
2039 * left singular vectors in U and computing right singular
2040 * vectors in A
2041 * (Workspace: need BDSPAC)
2042 *
2043  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2044  $ u, ldu, dum, 1, work( iwork ), info )
2045  ELSE
2046 *
2047 * Perform bidiagonal QR iteration, if desired, computing
2048 * left singular vectors in A and computing right singular
2049 * vectors in VT
2050 * (Workspace: need BDSPAC)
2051 *
2052  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2053  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2054  END IF
2055 *
2056  END IF
2057 *
2058  ELSE
2059 *
2060 * A has more columns than rows. If A has sufficiently more
2061 * columns than rows, first reduce using the LQ decomposition (if
2062 * sufficient workspace available)
2063 *
2064  IF( n.GE.mnthr ) THEN
2065 *
2066  IF( wntvn ) THEN
2067 *
2068 * Path 1t(N much larger than M, JOBVT='N')
2069 * No right singular vectors to be computed
2070 *
2071  itau = 1
2072  iwork = itau + m
2073 *
2074 * Compute A=L*Q
2075 * (Workspace: need 2*M, prefer M+M*NB)
2076 *
2077  CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2078  $ lwork-iwork+1, ierr )
2079 *
2080 * Zero out above L
2081 *
2082  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2083  ie = 1
2084  itauq = ie + m
2085  itaup = itauq + m
2086  iwork = itaup + m
2087 *
2088 * Bidiagonalize L in A
2089 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2090 *
2091  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2092  $ work( itaup ), work( iwork ), lwork-iwork+1,
2093  $ ierr )
2094  IF( wntuo .OR. wntuas ) THEN
2095 *
2096 * If left singular vectors desired, generate Q
2097 * (Workspace: need 4*M, prefer 3*M+M*NB)
2098 *
2099  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2100  $ work( iwork ), lwork-iwork+1, ierr )
2101  END IF
2102  iwork = ie + m
2103  nru = 0
2104  IF( wntuo .OR. wntuas )
2105  $ nru = m
2106 *
2107 * Perform bidiagonal QR iteration, computing left singular
2108 * vectors of A in A if desired
2109 * (Workspace: need BDSPAC)
2110 *
2111  CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2112  $ lda, dum, 1, work( iwork ), info )
2113 *
2114 * If left singular vectors desired in U, copy them there
2115 *
2116  IF( wntuas )
2117  $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2118 *
2119  ELSE IF( wntvo .AND. wntun ) THEN
2120 *
2121 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2122 * M right singular vectors to be overwritten on A and
2123 * no left singular vectors to be computed
2124 *
2125  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2126 *
2127 * Sufficient workspace for a fast algorithm
2128 *
2129  ir = 1
2130  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2131 *
2132 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2133 *
2134  ldwrku = lda
2135  chunk = n
2136  ldwrkr = lda
2137  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2138 *
2139 * WORK(IU) is LDA by N and WORK(IR) is M by M
2140 *
2141  ldwrku = lda
2142  chunk = n
2143  ldwrkr = m
2144  ELSE
2145 *
2146 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2147 *
2148  ldwrku = m
2149  chunk = ( lwork-m*m-m ) / m
2150  ldwrkr = m
2151  END IF
2152  itau = ir + ldwrkr*m
2153  iwork = itau + m
2154 *
2155 * Compute A=L*Q
2156 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2157 *
2158  CALL sgelqf( m, n, a, lda, work( itau ),
2159  $ work( iwork ), lwork-iwork+1, ierr )
2160 *
2161 * Copy L to WORK(IR) and zero out above it
2162 *
2163  CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2164  CALL slaset( 'U', m-1, m-1, zero, zero,
2165  $ work( ir+ldwrkr ), ldwrkr )
2166 *
2167 * Generate Q in A
2168 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2169 *
2170  CALL sorglq( m, n, m, a, lda, work( itau ),
2171  $ work( iwork ), lwork-iwork+1, ierr )
2172  ie = itau
2173  itauq = ie + m
2174  itaup = itauq + m
2175  iwork = itaup + m
2176 *
2177 * Bidiagonalize L in WORK(IR)
2178 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2179 *
2180  CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2181  $ work( itauq ), work( itaup ),
2182  $ work( iwork ), lwork-iwork+1, ierr )
2183 *
2184 * Generate right vectors bidiagonalizing L
2185 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2186 *
2187  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2188  $ work( itaup ), work( iwork ),
2189  $ lwork-iwork+1, ierr )
2190  iwork = ie + m
2191 *
2192 * Perform bidiagonal QR iteration, computing right
2193 * singular vectors of L in WORK(IR)
2194 * (Workspace: need M*M+BDSPAC)
2195 *
2196  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2197  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2198  $ work( iwork ), info )
2199  iu = ie + m
2200 *
2201 * Multiply right singular vectors of L in WORK(IR) by Q
2202 * in A, storing result in WORK(IU) and copying to A
2203 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2204 *
2205  DO 30 i = 1, n, chunk
2206  blk = min( n-i+1, chunk )
2207  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2208  $ ldwrkr, a( 1, i ), lda, zero,
2209  $ work( iu ), ldwrku )
2210  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2211  $ a( 1, i ), lda )
2212  30 continue
2213 *
2214  ELSE
2215 *
2216 * Insufficient workspace for a fast algorithm
2217 *
2218  ie = 1
2219  itauq = ie + m
2220  itaup = itauq + m
2221  iwork = itaup + m
2222 *
2223 * Bidiagonalize A
2224 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2225 *
2226  CALL sgebrd( m, n, a, lda, s, work( ie ),
2227  $ work( itauq ), work( itaup ),
2228  $ work( iwork ), lwork-iwork+1, ierr )
2229 *
2230 * Generate right vectors bidiagonalizing A
2231 * (Workspace: need 4*M, prefer 3*M+M*NB)
2232 *
2233  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2234  $ work( iwork ), lwork-iwork+1, ierr )
2235  iwork = ie + m
2236 *
2237 * Perform bidiagonal QR iteration, computing right
2238 * singular vectors of A in A
2239 * (Workspace: need BDSPAC)
2240 *
2241  CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2242  $ dum, 1, dum, 1, work( iwork ), info )
2243 *
2244  END IF
2245 *
2246  ELSE IF( wntvo .AND. wntuas ) THEN
2247 *
2248 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2249 * M right singular vectors to be overwritten on A and
2250 * M left singular vectors to be computed in U
2251 *
2252  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2253 *
2254 * Sufficient workspace for a fast algorithm
2255 *
2256  ir = 1
2257  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2258 *
2259 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2260 *
2261  ldwrku = lda
2262  chunk = n
2263  ldwrkr = lda
2264  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2265 *
2266 * WORK(IU) is LDA by N and WORK(IR) is M by M
2267 *
2268  ldwrku = lda
2269  chunk = n
2270  ldwrkr = m
2271  ELSE
2272 *
2273 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2274 *
2275  ldwrku = m
2276  chunk = ( lwork-m*m-m ) / m
2277  ldwrkr = m
2278  END IF
2279  itau = ir + ldwrkr*m
2280  iwork = itau + m
2281 *
2282 * Compute A=L*Q
2283 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2284 *
2285  CALL sgelqf( m, n, a, lda, work( itau ),
2286  $ work( iwork ), lwork-iwork+1, ierr )
2287 *
2288 * Copy L to U, zeroing about above it
2289 *
2290  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2291  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2292  $ ldu )
2293 *
2294 * Generate Q in A
2295 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2296 *
2297  CALL sorglq( m, n, m, a, lda, work( itau ),
2298  $ work( iwork ), lwork-iwork+1, ierr )
2299  ie = itau
2300  itauq = ie + m
2301  itaup = itauq + m
2302  iwork = itaup + m
2303 *
2304 * Bidiagonalize L in U, copying result to WORK(IR)
2305 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2306 *
2307  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2308  $ work( itauq ), work( itaup ),
2309  $ work( iwork ), lwork-iwork+1, ierr )
2310  CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2311 *
2312 * Generate right vectors bidiagonalizing L in WORK(IR)
2313 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2314 *
2315  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2316  $ work( itaup ), work( iwork ),
2317  $ lwork-iwork+1, ierr )
2318 *
2319 * Generate left vectors bidiagonalizing L in U
2320 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2321 *
2322  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2323  $ work( iwork ), lwork-iwork+1, ierr )
2324  iwork = ie + m
2325 *
2326 * Perform bidiagonal QR iteration, computing left
2327 * singular vectors of L in U, and computing right
2328 * singular vectors of L in WORK(IR)
2329 * (Workspace: need M*M+BDSPAC)
2330 *
2331  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2332  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2333  $ work( iwork ), info )
2334  iu = ie + m
2335 *
2336 * Multiply right singular vectors of L in WORK(IR) by Q
2337 * in A, storing result in WORK(IU) and copying to A
2338 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2339 *
2340  DO 40 i = 1, n, chunk
2341  blk = min( n-i+1, chunk )
2342  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2343  $ ldwrkr, a( 1, i ), lda, zero,
2344  $ work( iu ), ldwrku )
2345  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2346  $ a( 1, i ), lda )
2347  40 continue
2348 *
2349  ELSE
2350 *
2351 * Insufficient workspace for a fast algorithm
2352 *
2353  itau = 1
2354  iwork = itau + m
2355 *
2356 * Compute A=L*Q
2357 * (Workspace: need 2*M, prefer M+M*NB)
2358 *
2359  CALL sgelqf( m, n, a, lda, work( itau ),
2360  $ work( iwork ), lwork-iwork+1, ierr )
2361 *
2362 * Copy L to U, zeroing out above it
2363 *
2364  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2365  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2366  $ ldu )
2367 *
2368 * Generate Q in A
2369 * (Workspace: need 2*M, prefer M+M*NB)
2370 *
2371  CALL sorglq( m, n, m, a, lda, work( itau ),
2372  $ work( iwork ), lwork-iwork+1, ierr )
2373  ie = itau
2374  itauq = ie + m
2375  itaup = itauq + m
2376  iwork = itaup + m
2377 *
2378 * Bidiagonalize L in U
2379 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2380 *
2381  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2382  $ work( itauq ), work( itaup ),
2383  $ work( iwork ), lwork-iwork+1, ierr )
2384 *
2385 * Multiply right vectors bidiagonalizing L by Q in A
2386 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2387 *
2388  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2389  $ work( itaup ), a, lda, work( iwork ),
2390  $ lwork-iwork+1, ierr )
2391 *
2392 * Generate left vectors bidiagonalizing L in U
2393 * (Workspace: need 4*M, prefer 3*M+M*NB)
2394 *
2395  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2396  $ work( iwork ), lwork-iwork+1, ierr )
2397  iwork = ie + m
2398 *
2399 * Perform bidiagonal QR iteration, computing left
2400 * singular vectors of A in U and computing right
2401 * singular vectors of A in A
2402 * (Workspace: need BDSPAC)
2403 *
2404  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2405  $ u, ldu, dum, 1, work( iwork ), info )
2406 *
2407  END IF
2408 *
2409  ELSE IF( wntvs ) THEN
2410 *
2411  IF( wntun ) THEN
2412 *
2413 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2414 * M right singular vectors to be computed in VT and
2415 * no left singular vectors to be computed
2416 *
2417  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2418 *
2419 * Sufficient workspace for a fast algorithm
2420 *
2421  ir = 1
2422  IF( lwork.GE.wrkbl+lda*m ) THEN
2423 *
2424 * WORK(IR) is LDA by M
2425 *
2426  ldwrkr = lda
2427  ELSE
2428 *
2429 * WORK(IR) is M by M
2430 *
2431  ldwrkr = m
2432  END IF
2433  itau = ir + ldwrkr*m
2434  iwork = itau + m
2435 *
2436 * Compute A=L*Q
2437 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2438 *
2439  CALL sgelqf( m, n, a, lda, work( itau ),
2440  $ work( iwork ), lwork-iwork+1, ierr )
2441 *
2442 * Copy L to WORK(IR), zeroing out above it
2443 *
2444  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2445  $ ldwrkr )
2446  CALL slaset( 'U', m-1, m-1, zero, zero,
2447  $ work( ir+ldwrkr ), ldwrkr )
2448 *
2449 * Generate Q in A
2450 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2451 *
2452  CALL sorglq( m, n, m, a, lda, work( itau ),
2453  $ work( iwork ), lwork-iwork+1, ierr )
2454  ie = itau
2455  itauq = ie + m
2456  itaup = itauq + m
2457  iwork = itaup + m
2458 *
2459 * Bidiagonalize L in WORK(IR)
2460 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2461 *
2462  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2463  $ work( ie ), work( itauq ),
2464  $ work( itaup ), work( iwork ),
2465  $ lwork-iwork+1, ierr )
2466 *
2467 * Generate right vectors bidiagonalizing L in
2468 * WORK(IR)
2469 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2470 *
2471  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2472  $ work( itaup ), work( iwork ),
2473  $ lwork-iwork+1, ierr )
2474  iwork = ie + m
2475 *
2476 * Perform bidiagonal QR iteration, computing right
2477 * singular vectors of L in WORK(IR)
2478 * (Workspace: need M*M+BDSPAC)
2479 *
2480  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2481  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2482  $ work( iwork ), info )
2483 *
2484 * Multiply right singular vectors of L in WORK(IR) by
2485 * Q in A, storing result in VT
2486 * (Workspace: need M*M)
2487 *
2488  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2489  $ ldwrkr, a, lda, zero, vt, ldvt )
2490 *
2491  ELSE
2492 *
2493 * Insufficient workspace for a fast algorithm
2494 *
2495  itau = 1
2496  iwork = itau + m
2497 *
2498 * Compute A=L*Q
2499 * (Workspace: need 2*M, prefer M+M*NB)
2500 *
2501  CALL sgelqf( m, n, a, lda, work( itau ),
2502  $ work( iwork ), lwork-iwork+1, ierr )
2503 *
2504 * Copy result to VT
2505 *
2506  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2507 *
2508 * Generate Q in VT
2509 * (Workspace: need 2*M, prefer M+M*NB)
2510 *
2511  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2512  $ work( iwork ), lwork-iwork+1, ierr )
2513  ie = itau
2514  itauq = ie + m
2515  itaup = itauq + m
2516  iwork = itaup + m
2517 *
2518 * Zero out above L in A
2519 *
2520  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2521  $ lda )
2522 *
2523 * Bidiagonalize L in A
2524 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2525 *
2526  CALL sgebrd( m, m, a, lda, s, work( ie ),
2527  $ work( itauq ), work( itaup ),
2528  $ work( iwork ), lwork-iwork+1, ierr )
2529 *
2530 * Multiply right vectors bidiagonalizing L by Q in VT
2531 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2532 *
2533  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2534  $ work( itaup ), vt, ldvt,
2535  $ work( iwork ), lwork-iwork+1, ierr )
2536  iwork = ie + m
2537 *
2538 * Perform bidiagonal QR iteration, computing right
2539 * singular vectors of A in VT
2540 * (Workspace: need BDSPAC)
2541 *
2542  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2543  $ ldvt, dum, 1, dum, 1, work( iwork ),
2544  $ info )
2545 *
2546  END IF
2547 *
2548  ELSE IF( wntuo ) THEN
2549 *
2550 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2551 * M right singular vectors to be computed in VT and
2552 * M left singular vectors to be overwritten on A
2553 *
2554  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2555 *
2556 * Sufficient workspace for a fast algorithm
2557 *
2558  iu = 1
2559  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2560 *
2561 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2562 *
2563  ldwrku = lda
2564  ir = iu + ldwrku*m
2565  ldwrkr = lda
2566  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2567 *
2568 * WORK(IU) is LDA by M and WORK(IR) is M by M
2569 *
2570  ldwrku = lda
2571  ir = iu + ldwrku*m
2572  ldwrkr = m
2573  ELSE
2574 *
2575 * WORK(IU) is M by M and WORK(IR) is M by M
2576 *
2577  ldwrku = m
2578  ir = iu + ldwrku*m
2579  ldwrkr = m
2580  END IF
2581  itau = ir + ldwrkr*m
2582  iwork = itau + m
2583 *
2584 * Compute A=L*Q
2585 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2586 *
2587  CALL sgelqf( m, n, a, lda, work( itau ),
2588  $ work( iwork ), lwork-iwork+1, ierr )
2589 *
2590 * Copy L to WORK(IU), zeroing out below it
2591 *
2592  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2593  $ ldwrku )
2594  CALL slaset( 'U', m-1, m-1, zero, zero,
2595  $ work( iu+ldwrku ), ldwrku )
2596 *
2597 * Generate Q in A
2598 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2599 *
2600  CALL sorglq( m, n, m, a, lda, work( itau ),
2601  $ work( iwork ), lwork-iwork+1, ierr )
2602  ie = itau
2603  itauq = ie + m
2604  itaup = itauq + m
2605  iwork = itaup + m
2606 *
2607 * Bidiagonalize L in WORK(IU), copying result to
2608 * WORK(IR)
2609 * (Workspace: need 2*M*M+4*M,
2610 * prefer 2*M*M+3*M+2*M*NB)
2611 *
2612  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2613  $ work( ie ), work( itauq ),
2614  $ work( itaup ), work( iwork ),
2615  $ lwork-iwork+1, ierr )
2616  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2617  $ work( ir ), ldwrkr )
2618 *
2619 * Generate right bidiagonalizing vectors in WORK(IU)
2620 * (Workspace: need 2*M*M+4*M-1,
2621 * prefer 2*M*M+3*M+(M-1)*NB)
2622 *
2623  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2624  $ work( itaup ), work( iwork ),
2625  $ lwork-iwork+1, ierr )
2626 *
2627 * Generate left bidiagonalizing vectors in WORK(IR)
2628 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2629 *
2630  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2631  $ work( itauq ), work( iwork ),
2632  $ lwork-iwork+1, ierr )
2633  iwork = ie + m
2634 *
2635 * Perform bidiagonal QR iteration, computing left
2636 * singular vectors of L in WORK(IR) and computing
2637 * right singular vectors of L in WORK(IU)
2638 * (Workspace: need 2*M*M+BDSPAC)
2639 *
2640  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2641  $ work( iu ), ldwrku, work( ir ),
2642  $ ldwrkr, dum, 1, work( iwork ), info )
2643 *
2644 * Multiply right singular vectors of L in WORK(IU) by
2645 * Q in A, storing result in VT
2646 * (Workspace: need M*M)
2647 *
2648  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2649  $ ldwrku, a, lda, zero, vt, ldvt )
2650 *
2651 * Copy left singular vectors of L to A
2652 * (Workspace: need M*M)
2653 *
2654  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2655  $ lda )
2656 *
2657  ELSE
2658 *
2659 * Insufficient workspace for a fast algorithm
2660 *
2661  itau = 1
2662  iwork = itau + m
2663 *
2664 * Compute A=L*Q, copying result to VT
2665 * (Workspace: need 2*M, prefer M+M*NB)
2666 *
2667  CALL sgelqf( m, n, a, lda, work( itau ),
2668  $ work( iwork ), lwork-iwork+1, ierr )
2669  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2670 *
2671 * Generate Q in VT
2672 * (Workspace: need 2*M, prefer M+M*NB)
2673 *
2674  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2675  $ work( iwork ), lwork-iwork+1, ierr )
2676  ie = itau
2677  itauq = ie + m
2678  itaup = itauq + m
2679  iwork = itaup + m
2680 *
2681 * Zero out above L in A
2682 *
2683  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2684  $ lda )
2685 *
2686 * Bidiagonalize L in A
2687 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2688 *
2689  CALL sgebrd( m, m, a, lda, s, work( ie ),
2690  $ work( itauq ), work( itaup ),
2691  $ work( iwork ), lwork-iwork+1, ierr )
2692 *
2693 * Multiply right vectors bidiagonalizing L by Q in VT
2694 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2695 *
2696  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2697  $ work( itaup ), vt, ldvt,
2698  $ work( iwork ), lwork-iwork+1, ierr )
2699 *
2700 * Generate left bidiagonalizing vectors of L in A
2701 * (Workspace: need 4*M, prefer 3*M+M*NB)
2702 *
2703  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2704  $ work( iwork ), lwork-iwork+1, ierr )
2705  iwork = ie + m
2706 *
2707 * Perform bidiagonal QR iteration, compute left
2708 * singular vectors of A in A and compute right
2709 * singular vectors of A in VT
2710 * (Workspace: need BDSPAC)
2711 *
2712  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2713  $ ldvt, a, lda, dum, 1, work( iwork ),
2714  $ info )
2715 *
2716  END IF
2717 *
2718  ELSE IF( wntuas ) THEN
2719 *
2720 * Path 6t(N much larger than M, JOBU='S' or 'A',
2721 * JOBVT='S')
2722 * M right singular vectors to be computed in VT and
2723 * M left singular vectors to be computed in U
2724 *
2725  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2726 *
2727 * Sufficient workspace for a fast algorithm
2728 *
2729  iu = 1
2730  IF( lwork.GE.wrkbl+lda*m ) THEN
2731 *
2732 * WORK(IU) is LDA by N
2733 *
2734  ldwrku = lda
2735  ELSE
2736 *
2737 * WORK(IU) is LDA by M
2738 *
2739  ldwrku = m
2740  END IF
2741  itau = iu + ldwrku*m
2742  iwork = itau + m
2743 *
2744 * Compute A=L*Q
2745 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2746 *
2747  CALL sgelqf( m, n, a, lda, work( itau ),
2748  $ work( iwork ), lwork-iwork+1, ierr )
2749 *
2750 * Copy L to WORK(IU), zeroing out above it
2751 *
2752  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2753  $ ldwrku )
2754  CALL slaset( 'U', m-1, m-1, zero, zero,
2755  $ work( iu+ldwrku ), ldwrku )
2756 *
2757 * Generate Q in A
2758 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2759 *
2760  CALL sorglq( m, n, m, a, lda, work( itau ),
2761  $ work( iwork ), lwork-iwork+1, ierr )
2762  ie = itau
2763  itauq = ie + m
2764  itaup = itauq + m
2765  iwork = itaup + m
2766 *
2767 * Bidiagonalize L in WORK(IU), copying result to U
2768 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2769 *
2770  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2771  $ work( ie ), work( itauq ),
2772  $ work( itaup ), work( iwork ),
2773  $ lwork-iwork+1, ierr )
2774  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2775  $ ldu )
2776 *
2777 * Generate right bidiagonalizing vectors in WORK(IU)
2778 * (Workspace: need M*M+4*M-1,
2779 * prefer M*M+3*M+(M-1)*NB)
2780 *
2781  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2782  $ work( itaup ), work( iwork ),
2783  $ lwork-iwork+1, ierr )
2784 *
2785 * Generate left bidiagonalizing vectors in U
2786 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2787 *
2788  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2789  $ work( iwork ), lwork-iwork+1, ierr )
2790  iwork = ie + m
2791 *
2792 * Perform bidiagonal QR iteration, computing left
2793 * singular vectors of L in U and computing right
2794 * singular vectors of L in WORK(IU)
2795 * (Workspace: need M*M+BDSPAC)
2796 *
2797  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2798  $ work( iu ), ldwrku, u, ldu, dum, 1,
2799  $ work( iwork ), info )
2800 *
2801 * Multiply right singular vectors of L in WORK(IU) by
2802 * Q in A, storing result in VT
2803 * (Workspace: need M*M)
2804 *
2805  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2806  $ ldwrku, a, lda, zero, vt, ldvt )
2807 *
2808  ELSE
2809 *
2810 * Insufficient workspace for a fast algorithm
2811 *
2812  itau = 1
2813  iwork = itau + m
2814 *
2815 * Compute A=L*Q, copying result to VT
2816 * (Workspace: need 2*M, prefer M+M*NB)
2817 *
2818  CALL sgelqf( m, n, a, lda, work( itau ),
2819  $ work( iwork ), lwork-iwork+1, ierr )
2820  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2821 *
2822 * Generate Q in VT
2823 * (Workspace: need 2*M, prefer M+M*NB)
2824 *
2825  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2826  $ work( iwork ), lwork-iwork+1, ierr )
2827 *
2828 * Copy L to U, zeroing out above it
2829 *
2830  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2831  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2832  $ ldu )
2833  ie = itau
2834  itauq = ie + m
2835  itaup = itauq + m
2836  iwork = itaup + m
2837 *
2838 * Bidiagonalize L in U
2839 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2840 *
2841  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2842  $ work( itauq ), work( itaup ),
2843  $ work( iwork ), lwork-iwork+1, ierr )
2844 *
2845 * Multiply right bidiagonalizing vectors in U by Q
2846 * in VT
2847 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2848 *
2849  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2850  $ work( itaup ), vt, ldvt,
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Generate left bidiagonalizing vectors in U
2854 * (Workspace: need 4*M, prefer 3*M+M*NB)
2855 *
2856  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2857  $ work( iwork ), lwork-iwork+1, ierr )
2858  iwork = ie + m
2859 *
2860 * Perform bidiagonal QR iteration, computing left
2861 * singular vectors of A in U and computing right
2862 * singular vectors of A in VT
2863 * (Workspace: need BDSPAC)
2864 *
2865  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2866  $ ldvt, u, ldu, dum, 1, work( iwork ),
2867  $ info )
2868 *
2869  END IF
2870 *
2871  END IF
2872 *
2873  ELSE IF( wntva ) THEN
2874 *
2875  IF( wntun ) THEN
2876 *
2877 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2878 * N right singular vectors to be computed in VT and
2879 * no left singular vectors to be computed
2880 *
2881  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2882 *
2883 * Sufficient workspace for a fast algorithm
2884 *
2885  ir = 1
2886  IF( lwork.GE.wrkbl+lda*m ) THEN
2887 *
2888 * WORK(IR) is LDA by M
2889 *
2890  ldwrkr = lda
2891  ELSE
2892 *
2893 * WORK(IR) is M by M
2894 *
2895  ldwrkr = m
2896  END IF
2897  itau = ir + ldwrkr*m
2898  iwork = itau + m
2899 *
2900 * Compute A=L*Q, copying result to VT
2901 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2902 *
2903  CALL sgelqf( m, n, a, lda, work( itau ),
2904  $ work( iwork ), lwork-iwork+1, ierr )
2905  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2906 *
2907 * Copy L to WORK(IR), zeroing out above it
2908 *
2909  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2910  $ ldwrkr )
2911  CALL slaset( 'U', m-1, m-1, zero, zero,
2912  $ work( ir+ldwrkr ), ldwrkr )
2913 *
2914 * Generate Q in VT
2915 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2916 *
2917  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2918  $ work( iwork ), lwork-iwork+1, ierr )
2919  ie = itau
2920  itauq = ie + m
2921  itaup = itauq + m
2922  iwork = itaup + m
2923 *
2924 * Bidiagonalize L in WORK(IR)
2925 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2926 *
2927  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2928  $ work( ie ), work( itauq ),
2929  $ work( itaup ), work( iwork ),
2930  $ lwork-iwork+1, ierr )
2931 *
2932 * Generate right bidiagonalizing vectors in WORK(IR)
2933 * (Workspace: need M*M+4*M-1,
2934 * prefer M*M+3*M+(M-1)*NB)
2935 *
2936  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2937  $ work( itaup ), work( iwork ),
2938  $ lwork-iwork+1, ierr )
2939  iwork = ie + m
2940 *
2941 * Perform bidiagonal QR iteration, computing right
2942 * singular vectors of L in WORK(IR)
2943 * (Workspace: need M*M+BDSPAC)
2944 *
2945  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2946  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2947  $ work( iwork ), info )
2948 *
2949 * Multiply right singular vectors of L in WORK(IR) by
2950 * Q in VT, storing result in A
2951 * (Workspace: need M*M)
2952 *
2953  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2954  $ ldwrkr, vt, ldvt, zero, a, lda )
2955 *
2956 * Copy right singular vectors of A from A to VT
2957 *
2958  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2959 *
2960  ELSE
2961 *
2962 * Insufficient workspace for a fast algorithm
2963 *
2964  itau = 1
2965  iwork = itau + m
2966 *
2967 * Compute A=L*Q, copying result to VT
2968 * (Workspace: need 2*M, prefer M+M*NB)
2969 *
2970  CALL sgelqf( m, n, a, lda, work( itau ),
2971  $ work( iwork ), lwork-iwork+1, ierr )
2972  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2973 *
2974 * Generate Q in VT
2975 * (Workspace: need M+N, prefer M+N*NB)
2976 *
2977  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2978  $ work( iwork ), lwork-iwork+1, ierr )
2979  ie = itau
2980  itauq = ie + m
2981  itaup = itauq + m
2982  iwork = itaup + m
2983 *
2984 * Zero out above L in A
2985 *
2986  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2987  $ lda )
2988 *
2989 * Bidiagonalize L in A
2990 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2991 *
2992  CALL sgebrd( m, m, a, lda, s, work( ie ),
2993  $ work( itauq ), work( itaup ),
2994  $ work( iwork ), lwork-iwork+1, ierr )
2995 *
2996 * Multiply right bidiagonalizing vectors in A by Q
2997 * in VT
2998 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2999 *
3000  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3001  $ work( itaup ), vt, ldvt,
3002  $ work( iwork ), lwork-iwork+1, ierr )
3003  iwork = ie + m
3004 *
3005 * Perform bidiagonal QR iteration, computing right
3006 * singular vectors of A in VT
3007 * (Workspace: need BDSPAC)
3008 *
3009  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3010  $ ldvt, dum, 1, dum, 1, work( iwork ),
3011  $ info )
3012 *
3013  END IF
3014 *
3015  ELSE IF( wntuo ) THEN
3016 *
3017 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3018 * N right singular vectors to be computed in VT and
3019 * M left singular vectors to be overwritten on A
3020 *
3021  IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3022 *
3023 * Sufficient workspace for a fast algorithm
3024 *
3025  iu = 1
3026  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3027 *
3028 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3029 *
3030  ldwrku = lda
3031  ir = iu + ldwrku*m
3032  ldwrkr = lda
3033  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3034 *
3035 * WORK(IU) is LDA by M and WORK(IR) is M by M
3036 *
3037  ldwrku = lda
3038  ir = iu + ldwrku*m
3039  ldwrkr = m
3040  ELSE
3041 *
3042 * WORK(IU) is M by M and WORK(IR) is M by M
3043 *
3044  ldwrku = m
3045  ir = iu + ldwrku*m
3046  ldwrkr = m
3047  END IF
3048  itau = ir + ldwrkr*m
3049  iwork = itau + m
3050 *
3051 * Compute A=L*Q, copying result to VT
3052 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3053 *
3054  CALL sgelqf( m, n, a, lda, work( itau ),
3055  $ work( iwork ), lwork-iwork+1, ierr )
3056  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3057 *
3058 * Generate Q in VT
3059 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3060 *
3061  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3062  $ work( iwork ), lwork-iwork+1, ierr )
3063 *
3064 * Copy L to WORK(IU), zeroing out above it
3065 *
3066  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3067  $ ldwrku )
3068  CALL slaset( 'U', m-1, m-1, zero, zero,
3069  $ work( iu+ldwrku ), ldwrku )
3070  ie = itau
3071  itauq = ie + m
3072  itaup = itauq + m
3073  iwork = itaup + m
3074 *
3075 * Bidiagonalize L in WORK(IU), copying result to
3076 * WORK(IR)
3077 * (Workspace: need 2*M*M+4*M,
3078 * prefer 2*M*M+3*M+2*M*NB)
3079 *
3080  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3081  $ work( ie ), work( itauq ),
3082  $ work( itaup ), work( iwork ),
3083  $ lwork-iwork+1, ierr )
3084  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3085  $ work( ir ), ldwrkr )
3086 *
3087 * Generate right bidiagonalizing vectors in WORK(IU)
3088 * (Workspace: need 2*M*M+4*M-1,
3089 * prefer 2*M*M+3*M+(M-1)*NB)
3090 *
3091  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3092  $ work( itaup ), work( iwork ),
3093  $ lwork-iwork+1, ierr )
3094 *
3095 * Generate left bidiagonalizing vectors in WORK(IR)
3096 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3097 *
3098  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3099  $ work( itauq ), work( iwork ),
3100  $ lwork-iwork+1, ierr )
3101  iwork = ie + m
3102 *
3103 * Perform bidiagonal QR iteration, computing left
3104 * singular vectors of L in WORK(IR) and computing
3105 * right singular vectors of L in WORK(IU)
3106 * (Workspace: need 2*M*M+BDSPAC)
3107 *
3108  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3109  $ work( iu ), ldwrku, work( ir ),
3110  $ ldwrkr, dum, 1, work( iwork ), info )
3111 *
3112 * Multiply right singular vectors of L in WORK(IU) by
3113 * Q in VT, storing result in A
3114 * (Workspace: need M*M)
3115 *
3116  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3117  $ ldwrku, vt, ldvt, zero, a, lda )
3118 *
3119 * Copy right singular vectors of A from A to VT
3120 *
3121  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3122 *
3123 * Copy left singular vectors of A from WORK(IR) to A
3124 *
3125  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3126  $ lda )
3127 *
3128  ELSE
3129 *
3130 * Insufficient workspace for a fast algorithm
3131 *
3132  itau = 1
3133  iwork = itau + m
3134 *
3135 * Compute A=L*Q, copying result to VT
3136 * (Workspace: need 2*M, prefer M+M*NB)
3137 *
3138  CALL sgelqf( m, n, a, lda, work( itau ),
3139  $ work( iwork ), lwork-iwork+1, ierr )
3140  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3141 *
3142 * Generate Q in VT
3143 * (Workspace: need M+N, prefer M+N*NB)
3144 *
3145  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3146  $ work( iwork ), lwork-iwork+1, ierr )
3147  ie = itau
3148  itauq = ie + m
3149  itaup = itauq + m
3150  iwork = itaup + m
3151 *
3152 * Zero out above L in A
3153 *
3154  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3155  $ lda )
3156 *
3157 * Bidiagonalize L in A
3158 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3159 *
3160  CALL sgebrd( m, m, a, lda, s, work( ie ),
3161  $ work( itauq ), work( itaup ),
3162  $ work( iwork ), lwork-iwork+1, ierr )
3163 *
3164 * Multiply right bidiagonalizing vectors in A by Q
3165 * in VT
3166 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3167 *
3168  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3169  $ work( itaup ), vt, ldvt,
3170  $ work( iwork ), lwork-iwork+1, ierr )
3171 *
3172 * Generate left bidiagonalizing vectors in A
3173 * (Workspace: need 4*M, prefer 3*M+M*NB)
3174 *
3175  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3176  $ work( iwork ), lwork-iwork+1, ierr )
3177  iwork = ie + m
3178 *
3179 * Perform bidiagonal QR iteration, computing left
3180 * singular vectors of A in A and computing right
3181 * singular vectors of A in VT
3182 * (Workspace: need BDSPAC)
3183 *
3184  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3185  $ ldvt, a, lda, dum, 1, work( iwork ),
3186  $ info )
3187 *
3188  END IF
3189 *
3190  ELSE IF( wntuas ) THEN
3191 *
3192 * Path 9t(N much larger than M, JOBU='S' or 'A',
3193 * JOBVT='A')
3194 * N right singular vectors to be computed in VT and
3195 * M left singular vectors to be computed in U
3196 *
3197  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3198 *
3199 * Sufficient workspace for a fast algorithm
3200 *
3201  iu = 1
3202  IF( lwork.GE.wrkbl+lda*m ) THEN
3203 *
3204 * WORK(IU) is LDA by M
3205 *
3206  ldwrku = lda
3207  ELSE
3208 *
3209 * WORK(IU) is M by M
3210 *
3211  ldwrku = m
3212  END IF
3213  itau = iu + ldwrku*m
3214  iwork = itau + m
3215 *
3216 * Compute A=L*Q, copying result to VT
3217 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3218 *
3219  CALL sgelqf( m, n, a, lda, work( itau ),
3220  $ work( iwork ), lwork-iwork+1, ierr )
3221  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3222 *
3223 * Generate Q in VT
3224 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3225 *
3226  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3227  $ work( iwork ), lwork-iwork+1, ierr )
3228 *
3229 * Copy L to WORK(IU), zeroing out above it
3230 *
3231  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3232  $ ldwrku )
3233  CALL slaset( 'U', m-1, m-1, zero, zero,
3234  $ work( iu+ldwrku ), ldwrku )
3235  ie = itau
3236  itauq = ie + m
3237  itaup = itauq + m
3238  iwork = itaup + m
3239 *
3240 * Bidiagonalize L in WORK(IU), copying result to U
3241 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3242 *
3243  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3244  $ work( ie ), work( itauq ),
3245  $ work( itaup ), work( iwork ),
3246  $ lwork-iwork+1, ierr )
3247  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3248  $ ldu )
3249 *
3250 * Generate right bidiagonalizing vectors in WORK(IU)
3251 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3252 *
3253  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3254  $ work( itaup ), work( iwork ),
3255  $ lwork-iwork+1, ierr )
3256 *
3257 * Generate left bidiagonalizing vectors in U
3258 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3259 *
3260  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3261  $ work( iwork ), lwork-iwork+1, ierr )
3262  iwork = ie + m
3263 *
3264 * Perform bidiagonal QR iteration, computing left
3265 * singular vectors of L in U and computing right
3266 * singular vectors of L in WORK(IU)
3267 * (Workspace: need M*M+BDSPAC)
3268 *
3269  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3270  $ work( iu ), ldwrku, u, ldu, dum, 1,
3271  $ work( iwork ), info )
3272 *
3273 * Multiply right singular vectors of L in WORK(IU) by
3274 * Q in VT, storing result in A
3275 * (Workspace: need M*M)
3276 *
3277  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3278  $ ldwrku, vt, ldvt, zero, a, lda )
3279 *
3280 * Copy right singular vectors of A from A to VT
3281 *
3282  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3283 *
3284  ELSE
3285 *
3286 * Insufficient workspace for a fast algorithm
3287 *
3288  itau = 1
3289  iwork = itau + m
3290 *
3291 * Compute A=L*Q, copying result to VT
3292 * (Workspace: need 2*M, prefer M+M*NB)
3293 *
3294  CALL sgelqf( m, n, a, lda, work( itau ),
3295  $ work( iwork ), lwork-iwork+1, ierr )
3296  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3297 *
3298 * Generate Q in VT
3299 * (Workspace: need M+N, prefer M+N*NB)
3300 *
3301  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3302  $ work( iwork ), lwork-iwork+1, ierr )
3303 *
3304 * Copy L to U, zeroing out above it
3305 *
3306  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3307  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3308  $ ldu )
3309  ie = itau
3310  itauq = ie + m
3311  itaup = itauq + m
3312  iwork = itaup + m
3313 *
3314 * Bidiagonalize L in U
3315 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3316 *
3317  CALL sgebrd( m, m, u, ldu, s, work( ie ),
3318  $ work( itauq ), work( itaup ),
3319  $ work( iwork ), lwork-iwork+1, ierr )
3320 *
3321 * Multiply right bidiagonalizing vectors in U by Q
3322 * in VT
3323 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3324 *
3325  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3326  $ work( itaup ), vt, ldvt,
3327  $ work( iwork ), lwork-iwork+1, ierr )
3328 *
3329 * Generate left bidiagonalizing vectors in U
3330 * (Workspace: need 4*M, prefer 3*M+M*NB)
3331 *
3332  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3333  $ work( iwork ), lwork-iwork+1, ierr )
3334  iwork = ie + m
3335 *
3336 * Perform bidiagonal QR iteration, computing left
3337 * singular vectors of A in U and computing right
3338 * singular vectors of A in VT
3339 * (Workspace: need BDSPAC)
3340 *
3341  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3342  $ ldvt, u, ldu, dum, 1, work( iwork ),
3343  $ info )
3344 *
3345  END IF
3346 *
3347  END IF
3348 *
3349  END IF
3350 *
3351  ELSE
3352 *
3353 * N .LT. MNTHR
3354 *
3355 * Path 10t(N greater than M, but not much larger)
3356 * Reduce to bidiagonal form without LQ decomposition
3357 *
3358  ie = 1
3359  itauq = ie + m
3360  itaup = itauq + m
3361  iwork = itaup + m
3362 *
3363 * Bidiagonalize A
3364 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3365 *
3366  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3367  $ work( itaup ), work( iwork ), lwork-iwork+1,
3368  $ ierr )
3369  IF( wntuas ) THEN
3370 *
3371 * If left singular vectors desired in U, copy result to U
3372 * and generate left bidiagonalizing vectors in U
3373 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3374 *
3375  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3376  CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3377  $ work( iwork ), lwork-iwork+1, ierr )
3378  END IF
3379  IF( wntvas ) THEN
3380 *
3381 * If right singular vectors desired in VT, copy result to
3382 * VT and generate right bidiagonalizing vectors in VT
3383 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3384 *
3385  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3386  IF( wntva )
3387  $ nrvt = n
3388  IF( wntvs )
3389  $ nrvt = m
3390  CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3391  $ work( iwork ), lwork-iwork+1, ierr )
3392  END IF
3393  IF( wntuo ) THEN
3394 *
3395 * If left singular vectors desired in A, generate left
3396 * bidiagonalizing vectors in A
3397 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3398 *
3399  CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3400  $ work( iwork ), lwork-iwork+1, ierr )
3401  END IF
3402  IF( wntvo ) THEN
3403 *
3404 * If right singular vectors desired in A, generate right
3405 * bidiagonalizing vectors in A
3406 * (Workspace: need 4*M, prefer 3*M+M*NB)
3407 *
3408  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3409  $ work( iwork ), lwork-iwork+1, ierr )
3410  END IF
3411  iwork = ie + m
3412  IF( wntuas .OR. wntuo )
3413  $ nru = m
3414  IF( wntun )
3415  $ nru = 0
3416  IF( wntvas .OR. wntvo )
3417  $ ncvt = n
3418  IF( wntvn )
3419  $ ncvt = 0
3420  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3421 *
3422 * Perform bidiagonal QR iteration, if desired, computing
3423 * left singular vectors in U and computing right singular
3424 * vectors in VT
3425 * (Workspace: need BDSPAC)
3426 *
3427  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3428  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3429  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3430 *
3431 * Perform bidiagonal QR iteration, if desired, computing
3432 * left singular vectors in U and computing right singular
3433 * vectors in A
3434 * (Workspace: need BDSPAC)
3435 *
3436  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3437  $ u, ldu, dum, 1, work( iwork ), info )
3438  ELSE
3439 *
3440 * Perform bidiagonal QR iteration, if desired, computing
3441 * left singular vectors in A and computing right singular
3442 * vectors in VT
3443 * (Workspace: need BDSPAC)
3444 *
3445  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3446  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3447  END IF
3448 *
3449  END IF
3450 *
3451  END IF
3452 *
3453 * If SBDSQR failed to converge, copy unconverged superdiagonals
3454 * to WORK( 2:MINMN )
3455 *
3456  IF( info.NE.0 ) THEN
3457  IF( ie.GT.2 ) THEN
3458  DO 50 i = 1, minmn - 1
3459  work( i+1 ) = work( i+ie-1 )
3460  50 continue
3461  END IF
3462  IF( ie.LT.2 ) THEN
3463  DO 60 i = minmn - 1, 1, -1
3464  work( i+1 ) = work( i+ie-1 )
3465  60 continue
3466  END IF
3467  END IF
3468 *
3469 * Undo scaling if necessary
3470 *
3471  IF( iscl.EQ.1 ) THEN
3472  IF( anrm.GT.bignum )
3473  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3474  $ ierr )
3475  IF( info.NE.0 .AND. anrm.GT.bignum )
3476  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3477  $ minmn, ierr )
3478  IF( anrm.LT.smlnum )
3479  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3480  $ ierr )
3481  IF( info.NE.0 .AND. anrm.LT.smlnum )
3482  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3483  $ minmn, ierr )
3484  END IF
3485 *
3486 * Return optimal workspace in WORK(1)
3487 *
3488  work( 1 ) = maxwrk
3489 *
3490  return
3491 *
3492 * End of SGESVD
3493 *
3494  END