LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ sgesvd()

subroutine sgesvd ( character  JOBU,
character  JOBVT,
integer  M,
integer  N,
real, dimension( lda, * )  A,
integer  LDA,
real, dimension( * )  S,
real, dimension( ldu, * )  U,
integer  LDU,
real, dimension( ldvt, * )  VT,
integer  LDVT,
real, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

SGESVD computes the singular value decomposition (SVD) for GE matrices

Download SGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SGESVD computes the singular value decomposition (SVD) of a real
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors. The SVD is written

      A = U * SIGMA * transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns V**T, not V.
Parameters
[in]JOBU
          JOBU is CHARACTER*1
          Specifies options for computing all or part of the matrix U:
          = 'A':  all M columns of U are returned in array U:
          = 'S':  the first min(m,n) columns of U (the left singular
                  vectors) are returned in the array U;
          = 'O':  the first min(m,n) columns of U (the left singular
                  vectors) are overwritten on the array A;
          = 'N':  no columns of U (no left singular vectors) are
                  computed.
[in]JOBVT
          JOBVT is CHARACTER*1
          Specifies options for computing all or part of the matrix
          V**T:
          = 'A':  all N rows of V**T are returned in the array VT;
          = 'S':  the first min(m,n) rows of V**T (the right singular
                  vectors) are returned in the array VT;
          = 'O':  the first min(m,n) rows of V**T (the right singular
                  vectors) are overwritten on the array A;
          = 'N':  no rows of V**T (no right singular vectors) are
                  computed.

          JOBVT and JOBU cannot both be 'O'.
[in]M
          M is INTEGER
          The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          if JOBU = 'O',  A is overwritten with the first min(m,n)
                          columns of U (the left singular vectors,
                          stored columnwise);
          if JOBVT = 'O', A is overwritten with the first min(m,n)
                          rows of V**T (the right singular vectors,
                          stored rowwise);
          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
                          are destroyed.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]S
          S is REAL array, dimension (min(M,N))
          The singular values of A, sorted so that S(i) >= S(i+1).
[out]U
          U is REAL array, dimension (LDU,UCOL)
          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
          if JOBU = 'S', U contains the first min(m,n) columns of U
          (the left singular vectors, stored columnwise);
          if JOBU = 'N' or 'O', U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U.  LDU >= 1; if
          JOBU = 'S' or 'A', LDU >= M.
[out]VT
          VT is REAL array, dimension (LDVT,N)
          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
          V**T;
          if JOBVT = 'S', VT contains the first min(m,n) rows of
          V**T (the right singular vectors, stored rowwise);
          if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVT
          LDVT is INTEGER
          The leading dimension of the array VT.  LDVT >= 1; if
          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
          superdiagonal elements of an upper bidiagonal matrix B
          whose diagonal is in S (not necessarily sorted). B
          satisfies A = U * B * VT, so it has the same singular values
          as A, and singular vectors related by U and VT.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK.
          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
             - PATH 1  (M much larger than N, JOBU='N')
             - PATH 1t (N much larger than M, JOBVT='N')
          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
          For good performance, LWORK should generally be larger.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if SBDSQR did not converge, INFO specifies how many
                superdiagonals of an intermediate bidiagonal form B
                did not converge to zero. See the description of WORK
                above for details.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 209 of file sgesvd.f.

211 *
212 * -- LAPACK driver routine --
213 * -- LAPACK is a software package provided by Univ. of Tennessee, --
214 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215 *
216 * .. Scalar Arguments ..
217  CHARACTER JOBU, JOBVT
218  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219 * ..
220 * .. Array Arguments ..
221  REAL A( LDA, * ), S( * ), U( LDU, * ),
222  $ VT( LDVT, * ), WORK( * )
223 * ..
224 *
225 * =====================================================================
226 *
227 * .. Parameters ..
228  REAL ZERO, ONE
229  parameter( zero = 0.0e0, one = 1.0e0 )
230 * ..
231 * .. Local Scalars ..
232  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233  $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
234  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235  $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
236  $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
237  $ NRVT, WRKBL
238  INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
239  $ LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q,
240  $ LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M
241  REAL ANRM, BIGNUM, EPS, SMLNUM
242 * ..
243 * .. Local Arrays ..
244  REAL DUM( 1 )
245 * ..
246 * .. External Subroutines ..
247  EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
249  $ xerbla
250 * ..
251 * .. External Functions ..
252  LOGICAL LSAME
253  INTEGER ILAENV
254  REAL SLAMCH, SLANGE
255  EXTERNAL lsame, ilaenv, slamch, slange
256 * ..
257 * .. Intrinsic Functions ..
258  INTRINSIC max, min, sqrt
259 * ..
260 * .. Executable Statements ..
261 *
262 * Test the input arguments
263 *
264  info = 0
265  minmn = min( m, n )
266  wntua = lsame( jobu, 'A' )
267  wntus = lsame( jobu, 'S' )
268  wntuas = wntua .OR. wntus
269  wntuo = lsame( jobu, 'O' )
270  wntun = lsame( jobu, 'N' )
271  wntva = lsame( jobvt, 'A' )
272  wntvs = lsame( jobvt, 'S' )
273  wntvas = wntva .OR. wntvs
274  wntvo = lsame( jobvt, 'O' )
275  wntvn = lsame( jobvt, 'N' )
276  lquery = ( lwork.EQ.-1 )
277 *
278  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279  info = -1
280  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281  $ ( wntvo .AND. wntuo ) ) THEN
282  info = -2
283  ELSE IF( m.LT.0 ) THEN
284  info = -3
285  ELSE IF( n.LT.0 ) THEN
286  info = -4
287  ELSE IF( lda.LT.max( 1, m ) ) THEN
288  info = -6
289  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290  info = -9
291  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293  info = -11
294  END IF
295 *
296 * Compute workspace
297 * (Note: Comments in the code beginning "Workspace:" describe the
298 * minimal amount of workspace needed at that point in the code,
299 * as well as the preferred amount for good performance.
300 * NB refers to the optimal block size for the immediately
301 * following subroutine, as returned by ILAENV.)
302 *
303  IF( info.EQ.0 ) THEN
304  minwrk = 1
305  maxwrk = 1
306  IF( m.GE.n .AND. minmn.GT.0 ) THEN
307 *
308 * Compute space needed for SBDSQR
309 *
310  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
311  bdspac = 5*n
312 * Compute space needed for SGEQRF
313  CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314  lwork_sgeqrf = int( dum(1) )
315 * Compute space needed for SORGQR
316  CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_sorgqr_n = int( dum(1) )
318  CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319  lwork_sorgqr_m = int( dum(1) )
320 * Compute space needed for SGEBRD
321  CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
322  $ dum(1), dum(1), -1, ierr )
323  lwork_sgebrd = int( dum(1) )
324 * Compute space needed for SORGBR P
325  CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
326  $ dum(1), -1, ierr )
327  lwork_sorgbr_p = int( dum(1) )
328 * Compute space needed for SORGBR Q
329  CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
330  $ dum(1), -1, ierr )
331  lwork_sorgbr_q = int( dum(1) )
332 *
333  IF( m.GE.mnthr ) THEN
334  IF( wntun ) THEN
335 *
336 * Path 1 (M much larger than N, JOBU='N')
337 *
338  maxwrk = n + lwork_sgeqrf
339  maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
340  IF( wntvo .OR. wntvas )
341  $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
342  maxwrk = max( maxwrk, bdspac )
343  minwrk = max( 4*n, bdspac )
344  ELSE IF( wntuo .AND. wntvn ) THEN
345 *
346 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347 *
348  wrkbl = n + lwork_sgeqrf
349  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
350  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
351  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
352  wrkbl = max( wrkbl, bdspac )
353  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
354  minwrk = max( 3*n+m, bdspac )
355  ELSE IF( wntuo .AND. wntvas ) THEN
356 *
357 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358 * 'A')
359 *
360  wrkbl = n + lwork_sgeqrf
361  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
362  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
363  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
364  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
365  wrkbl = max( wrkbl, bdspac )
366  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
367  minwrk = max( 3*n+m, bdspac )
368  ELSE IF( wntus .AND. wntvn ) THEN
369 *
370 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371 *
372  wrkbl = n + lwork_sgeqrf
373  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
374  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
375  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
376  wrkbl = max( wrkbl, bdspac )
377  maxwrk = n*n + wrkbl
378  minwrk = max( 3*n+m, bdspac )
379  ELSE IF( wntus .AND. wntvo ) THEN
380 *
381 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382 *
383  wrkbl = n + lwork_sgeqrf
384  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
385  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
386  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
387  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
388  wrkbl = max( wrkbl, bdspac )
389  maxwrk = 2*n*n + wrkbl
390  minwrk = max( 3*n+m, bdspac )
391  ELSE IF( wntus .AND. wntvas ) THEN
392 *
393 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394 * 'A')
395 *
396  wrkbl = n + lwork_sgeqrf
397  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
398  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
399  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
400  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
401  wrkbl = max( wrkbl, bdspac )
402  maxwrk = n*n + wrkbl
403  minwrk = max( 3*n+m, bdspac )
404  ELSE IF( wntua .AND. wntvn ) THEN
405 *
406 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407 *
408  wrkbl = n + lwork_sgeqrf
409  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
410  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
411  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
412  wrkbl = max( wrkbl, bdspac )
413  maxwrk = n*n + wrkbl
414  minwrk = max( 3*n+m, bdspac )
415  ELSE IF( wntua .AND. wntvo ) THEN
416 *
417 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418 *
419  wrkbl = n + lwork_sgeqrf
420  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
421  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
422  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
423  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
424  wrkbl = max( wrkbl, bdspac )
425  maxwrk = 2*n*n + wrkbl
426  minwrk = max( 3*n+m, bdspac )
427  ELSE IF( wntua .AND. wntvas ) THEN
428 *
429 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430 * 'A')
431 *
432  wrkbl = n + lwork_sgeqrf
433  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
434  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
435  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
436  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
437  wrkbl = max( wrkbl, bdspac )
438  maxwrk = n*n + wrkbl
439  minwrk = max( 3*n+m, bdspac )
440  END IF
441  ELSE
442 *
443 * Path 10 (M at least N, but not much larger)
444 *
445  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
446  $ dum(1), dum(1), -1, ierr )
447  lwork_sgebrd = int( dum(1) )
448  maxwrk = 3*n + lwork_sgebrd
449  IF( wntus .OR. wntuo ) THEN
450  CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
451  $ dum(1), -1, ierr )
452  lwork_sorgbr_q = int( dum(1) )
453  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
454  END IF
455  IF( wntua ) THEN
456  CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
457  $ dum(1), -1, ierr )
458  lwork_sorgbr_q = int( dum(1) )
459  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
460  END IF
461  IF( .NOT.wntvn ) THEN
462  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
463  END IF
464  maxwrk = max( maxwrk, bdspac )
465  minwrk = max( 3*n+m, bdspac )
466  END IF
467  ELSE IF( minmn.GT.0 ) THEN
468 *
469 * Compute space needed for SBDSQR
470 *
471  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
472  bdspac = 5*m
473 * Compute space needed for SGELQF
474  CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475  lwork_sgelqf = int( dum(1) )
476 * Compute space needed for SORGLQ
477  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478  lwork_sorglq_n = int( dum(1) )
479  CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480  lwork_sorglq_m = int( dum(1) )
481 * Compute space needed for SGEBRD
482  CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
483  $ dum(1), dum(1), -1, ierr )
484  lwork_sgebrd = int( dum(1) )
485 * Compute space needed for SORGBR P
486  CALL sorgbr( 'P', m, m, m, a, n, dum(1),
487  $ dum(1), -1, ierr )
488  lwork_sorgbr_p = int( dum(1) )
489 * Compute space needed for SORGBR Q
490  CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
491  $ dum(1), -1, ierr )
492  lwork_sorgbr_q = int( dum(1) )
493  IF( n.GE.mnthr ) THEN
494  IF( wntvn ) THEN
495 *
496 * Path 1t(N much larger than M, JOBVT='N')
497 *
498  maxwrk = m + lwork_sgelqf
499  maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
500  IF( wntuo .OR. wntuas )
501  $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
502  maxwrk = max( maxwrk, bdspac )
503  minwrk = max( 4*m, bdspac )
504  ELSE IF( wntvo .AND. wntun ) THEN
505 *
506 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507 *
508  wrkbl = m + lwork_sgelqf
509  wrkbl = max( wrkbl, m+lwork_sorglq_m )
510  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
511  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
512  wrkbl = max( wrkbl, bdspac )
513  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
514  minwrk = max( 3*m+n, bdspac )
515  ELSE IF( wntvo .AND. wntuas ) THEN
516 *
517 * Path 3t(N much larger than M, JOBU='S' or 'A',
518 * JOBVT='O')
519 *
520  wrkbl = m + lwork_sgelqf
521  wrkbl = max( wrkbl, m+lwork_sorglq_m )
522  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
523  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
524  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
525  wrkbl = max( wrkbl, bdspac )
526  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
527  minwrk = max( 3*m+n, bdspac )
528  ELSE IF( wntvs .AND. wntun ) THEN
529 *
530 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531 *
532  wrkbl = m + lwork_sgelqf
533  wrkbl = max( wrkbl, m+lwork_sorglq_m )
534  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
535  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
536  wrkbl = max( wrkbl, bdspac )
537  maxwrk = m*m + wrkbl
538  minwrk = max( 3*m+n, bdspac )
539  ELSE IF( wntvs .AND. wntuo ) THEN
540 *
541 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542 *
543  wrkbl = m + lwork_sgelqf
544  wrkbl = max( wrkbl, m+lwork_sorglq_m )
545  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
546  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
547  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
548  wrkbl = max( wrkbl, bdspac )
549  maxwrk = 2*m*m + wrkbl
550  minwrk = max( 3*m+n, bdspac )
551  maxwrk = max( maxwrk, minwrk )
552  ELSE IF( wntvs .AND. wntuas ) THEN
553 *
554 * Path 6t(N much larger than M, JOBU='S' or 'A',
555 * JOBVT='S')
556 *
557  wrkbl = m + lwork_sgelqf
558  wrkbl = max( wrkbl, m+lwork_sorglq_m )
559  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
560  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
561  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
562  wrkbl = max( wrkbl, bdspac )
563  maxwrk = m*m + wrkbl
564  minwrk = max( 3*m+n, bdspac )
565  ELSE IF( wntva .AND. wntun ) THEN
566 *
567 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
568 *
569  wrkbl = m + lwork_sgelqf
570  wrkbl = max( wrkbl, m+lwork_sorglq_n )
571  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
572  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
573  wrkbl = max( wrkbl, bdspac )
574  maxwrk = m*m + wrkbl
575  minwrk = max( 3*m+n, bdspac )
576  ELSE IF( wntva .AND. wntuo ) THEN
577 *
578 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
579 *
580  wrkbl = m + lwork_sgelqf
581  wrkbl = max( wrkbl, m+lwork_sorglq_n )
582  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
583  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
584  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
585  wrkbl = max( wrkbl, bdspac )
586  maxwrk = 2*m*m + wrkbl
587  minwrk = max( 3*m+n, bdspac )
588  ELSE IF( wntva .AND. wntuas ) THEN
589 *
590 * Path 9t(N much larger than M, JOBU='S' or 'A',
591 * JOBVT='A')
592 *
593  wrkbl = m + lwork_sgelqf
594  wrkbl = max( wrkbl, m+lwork_sorglq_n )
595  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
596  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
597  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
598  wrkbl = max( wrkbl, bdspac )
599  maxwrk = m*m + wrkbl
600  minwrk = max( 3*m+n, bdspac )
601  END IF
602  ELSE
603 *
604 * Path 10t(N greater than M, but not much larger)
605 *
606  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
607  $ dum(1), dum(1), -1, ierr )
608  lwork_sgebrd = int( dum(1) )
609  maxwrk = 3*m + lwork_sgebrd
610  IF( wntvs .OR. wntvo ) THEN
611 * Compute space needed for SORGBR P
612  CALL sorgbr( 'P', m, n, m, a, n, dum(1),
613  $ dum(1), -1, ierr )
614  lwork_sorgbr_p = int( dum(1) )
615  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
616  END IF
617  IF( wntva ) THEN
618  CALL sorgbr( 'P', n, n, m, a, n, dum(1),
619  $ dum(1), -1, ierr )
620  lwork_sorgbr_p = int( dum(1) )
621  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
622  END IF
623  IF( .NOT.wntun ) THEN
624  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
625  END IF
626  maxwrk = max( maxwrk, bdspac )
627  minwrk = max( 3*m+n, bdspac )
628  END IF
629  END IF
630  maxwrk = max( maxwrk, minwrk )
631  work( 1 ) = maxwrk
632 *
633  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
634  info = -13
635  END IF
636  END IF
637 *
638  IF( info.NE.0 ) THEN
639  CALL xerbla( 'SGESVD', -info )
640  RETURN
641  ELSE IF( lquery ) THEN
642  RETURN
643  END IF
644 *
645 * Quick return if possible
646 *
647  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
648  RETURN
649  END IF
650 *
651 * Get machine constants
652 *
653  eps = slamch( 'P' )
654  smlnum = sqrt( slamch( 'S' ) ) / eps
655  bignum = one / smlnum
656 *
657 * Scale A if max element outside range [SMLNUM,BIGNUM]
658 *
659  anrm = slange( 'M', m, n, a, lda, dum )
660  iscl = 0
661  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
662  iscl = 1
663  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
664  ELSE IF( anrm.GT.bignum ) THEN
665  iscl = 1
666  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
667  END IF
668 *
669  IF( m.GE.n ) THEN
670 *
671 * A has at least as many rows as columns. If A has sufficiently
672 * more rows than columns, first reduce using the QR
673 * decomposition (if sufficient workspace available)
674 *
675  IF( m.GE.mnthr ) THEN
676 *
677  IF( wntun ) THEN
678 *
679 * Path 1 (M much larger than N, JOBU='N')
680 * No left singular vectors to be computed
681 *
682  itau = 1
683  iwork = itau + n
684 *
685 * Compute A=Q*R
686 * (Workspace: need 2*N, prefer N+N*NB)
687 *
688  CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
689  $ lwork-iwork+1, ierr )
690 *
691 * Zero out below R
692 *
693  IF( n .GT. 1 ) THEN
694  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
695  $ lda )
696  END IF
697  ie = 1
698  itauq = ie + n
699  itaup = itauq + n
700  iwork = itaup + n
701 *
702 * Bidiagonalize R in A
703 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
704 *
705  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706  $ work( itaup ), work( iwork ), lwork-iwork+1,
707  $ ierr )
708  ncvt = 0
709  IF( wntvo .OR. wntvas ) THEN
710 *
711 * If right singular vectors desired, generate P'.
712 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713 *
714  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715  $ work( iwork ), lwork-iwork+1, ierr )
716  ncvt = n
717  END IF
718  iwork = ie + n
719 *
720 * Perform bidiagonal QR iteration, computing right
721 * singular vectors of A in A if desired
722 * (Workspace: need BDSPAC)
723 *
724  CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725  $ dum, 1, dum, 1, work( iwork ), info )
726 *
727 * If right singular vectors desired in VT, copy them there
728 *
729  IF( wntvas )
730  $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731 *
732  ELSE IF( wntuo .AND. wntvn ) THEN
733 *
734 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735 * N left singular vectors to be overwritten on A and
736 * no right singular vectors to be computed
737 *
738  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739 *
740 * Sufficient workspace for a fast algorithm
741 *
742  ir = 1
743  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744 *
745 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
746 *
747  ldwrku = lda
748  ldwrkr = lda
749  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750 *
751 * WORK(IU) is LDA by N, WORK(IR) is N by N
752 *
753  ldwrku = lda
754  ldwrkr = n
755  ELSE
756 *
757 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758 *
759  ldwrku = ( lwork-n*n-n ) / n
760  ldwrkr = n
761  END IF
762  itau = ir + ldwrkr*n
763  iwork = itau + n
764 *
765 * Compute A=Q*R
766 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767 *
768  CALL sgeqrf( m, n, a, lda, work( itau ),
769  $ work( iwork ), lwork-iwork+1, ierr )
770 *
771 * Copy R to WORK(IR) and zero out below it
772 *
773  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774  CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775  $ ldwrkr )
776 *
777 * Generate Q in A
778 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779 *
780  CALL sorgqr( m, n, n, a, lda, work( itau ),
781  $ work( iwork ), lwork-iwork+1, ierr )
782  ie = itau
783  itauq = ie + n
784  itaup = itauq + n
785  iwork = itaup + n
786 *
787 * Bidiagonalize R in WORK(IR)
788 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789 *
790  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791  $ work( itauq ), work( itaup ),
792  $ work( iwork ), lwork-iwork+1, ierr )
793 *
794 * Generate left vectors bidiagonalizing R
795 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796 *
797  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798  $ work( itauq ), work( iwork ),
799  $ lwork-iwork+1, ierr )
800  iwork = ie + n
801 *
802 * Perform bidiagonal QR iteration, computing left
803 * singular vectors of R in WORK(IR)
804 * (Workspace: need N*N+BDSPAC)
805 *
806  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807  $ work( ir ), ldwrkr, dum, 1,
808  $ work( iwork ), info )
809  iu = ie + n
810 *
811 * Multiply Q in A by left singular vectors of R in
812 * WORK(IR), storing result in WORK(IU) and copying to A
813 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814 *
815  DO 10 i = 1, m, ldwrku
816  chunk = min( m-i+1, ldwrku )
817  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818  $ lda, work( ir ), ldwrkr, zero,
819  $ work( iu ), ldwrku )
820  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821  $ a( i, 1 ), lda )
822  10 CONTINUE
823 *
824  ELSE
825 *
826 * Insufficient workspace for a fast algorithm
827 *
828  ie = 1
829  itauq = ie + n
830  itaup = itauq + n
831  iwork = itaup + n
832 *
833 * Bidiagonalize A
834 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835 *
836  CALL sgebrd( m, n, a, lda, s, work( ie ),
837  $ work( itauq ), work( itaup ),
838  $ work( iwork ), lwork-iwork+1, ierr )
839 *
840 * Generate left vectors bidiagonalizing A
841 * (Workspace: need 4*N, prefer 3*N+N*NB)
842 *
843  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844  $ work( iwork ), lwork-iwork+1, ierr )
845  iwork = ie + n
846 *
847 * Perform bidiagonal QR iteration, computing left
848 * singular vectors of A in A
849 * (Workspace: need BDSPAC)
850 *
851  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852  $ a, lda, dum, 1, work( iwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n-n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891 *
892  CALL sgeqrf( m, n, a, lda, work( itau ),
893  $ work( iwork ), lwork-iwork+1, ierr )
894 *
895 * Copy R to VT, zeroing out below it
896 *
897  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898  IF( n.GT.1 )
899  $ CALL slaset( 'L', n-1, n-1, zero, zero,
900  $ vt( 2, 1 ), ldvt )
901 *
902 * Generate Q in A
903 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904 *
905  CALL sorgqr( m, n, n, a, lda, work( itau ),
906  $ work( iwork ), lwork-iwork+1, ierr )
907  ie = itau
908  itauq = ie + n
909  itaup = itauq + n
910  iwork = itaup + n
911 *
912 * Bidiagonalize R in VT, copying result to WORK(IR)
913 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914 *
915  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916  $ work( itauq ), work( itaup ),
917  $ work( iwork ), lwork-iwork+1, ierr )
918  CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919 *
920 * Generate left vectors bidiagonalizing R in WORK(IR)
921 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922 *
923  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924  $ work( itauq ), work( iwork ),
925  $ lwork-iwork+1, ierr )
926 *
927 * Generate right vectors bidiagonalizing R in VT
928 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929 *
930  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931  $ work( iwork ), lwork-iwork+1, ierr )
932  iwork = ie + n
933 *
934 * Perform bidiagonal QR iteration, computing left
935 * singular vectors of R in WORK(IR) and computing right
936 * singular vectors of R in VT
937 * (Workspace: need N*N+BDSPAC)
938 *
939  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940  $ work( ir ), ldwrkr, dum, 1,
941  $ work( iwork ), info )
942  iu = ie + n
943 *
944 * Multiply Q in A by left singular vectors of R in
945 * WORK(IR), storing result in WORK(IU) and copying to A
946 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947 *
948  DO 20 i = 1, m, ldwrku
949  chunk = min( m-i+1, ldwrku )
950  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951  $ lda, work( ir ), ldwrkr, zero,
952  $ work( iu ), ldwrku )
953  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954  $ a( i, 1 ), lda )
955  20 CONTINUE
956 *
957  ELSE
958 *
959 * Insufficient workspace for a fast algorithm
960 *
961  itau = 1
962  iwork = itau + n
963 *
964 * Compute A=Q*R
965 * (Workspace: need 2*N, prefer N+N*NB)
966 *
967  CALL sgeqrf( m, n, a, lda, work( itau ),
968  $ work( iwork ), lwork-iwork+1, ierr )
969 *
970 * Copy R to VT, zeroing out below it
971 *
972  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973  IF( n.GT.1 )
974  $ CALL slaset( 'L', n-1, n-1, zero, zero,
975  $ vt( 2, 1 ), ldvt )
976 *
977 * Generate Q in A
978 * (Workspace: need 2*N, prefer N+N*NB)
979 *
980  CALL sorgqr( m, n, n, a, lda, work( itau ),
981  $ work( iwork ), lwork-iwork+1, ierr )
982  ie = itau
983  itauq = ie + n
984  itaup = itauq + n
985  iwork = itaup + n
986 *
987 * Bidiagonalize R in VT
988 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
989 *
990  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991  $ work( itauq ), work( itaup ),
992  $ work( iwork ), lwork-iwork+1, ierr )
993 *
994 * Multiply Q in A by left vectors bidiagonalizing R
995 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
996 *
997  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998  $ work( itauq ), a, lda, work( iwork ),
999  $ lwork-iwork+1, ierr )
1000 *
1001 * Generate right vectors bidiagonalizing R in VT
1002 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003 *
1004  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005  $ work( iwork ), lwork-iwork+1, ierr )
1006  iwork = ie + n
1007 *
1008 * Perform bidiagonal QR iteration, computing left
1009 * singular vectors of A in A and computing right
1010 * singular vectors of A in VT
1011 * (Workspace: need BDSPAC)
1012 *
1013  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014  $ a, lda, dum, 1, work( iwork ), info )
1015 *
1016  END IF
1017 *
1018  ELSE IF( wntus ) THEN
1019 *
1020  IF( wntvn ) THEN
1021 *
1022 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023 * N left singular vectors to be computed in U and
1024 * no right singular vectors to be computed
1025 *
1026  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027 *
1028 * Sufficient workspace for a fast algorithm
1029 *
1030  ir = 1
1031  IF( lwork.GE.wrkbl+lda*n ) THEN
1032 *
1033 * WORK(IR) is LDA by N
1034 *
1035  ldwrkr = lda
1036  ELSE
1037 *
1038 * WORK(IR) is N by N
1039 *
1040  ldwrkr = n
1041  END IF
1042  itau = ir + ldwrkr*n
1043  iwork = itau + n
1044 *
1045 * Compute A=Q*R
1046 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047 *
1048  CALL sgeqrf( m, n, a, lda, work( itau ),
1049  $ work( iwork ), lwork-iwork+1, ierr )
1050 *
1051 * Copy R to WORK(IR), zeroing out below it
1052 *
1053  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054  $ ldwrkr )
1055  CALL slaset( 'L', n-1, n-1, zero, zero,
1056  $ work( ir+1 ), ldwrkr )
1057 *
1058 * Generate Q in A
1059 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060 *
1061  CALL sorgqr( m, n, n, a, lda, work( itau ),
1062  $ work( iwork ), lwork-iwork+1, ierr )
1063  ie = itau
1064  itauq = ie + n
1065  itaup = itauq + n
1066  iwork = itaup + n
1067 *
1068 * Bidiagonalize R in WORK(IR)
1069 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070 *
1071  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072  $ work( ie ), work( itauq ),
1073  $ work( itaup ), work( iwork ),
1074  $ lwork-iwork+1, ierr )
1075 *
1076 * Generate left vectors bidiagonalizing R in WORK(IR)
1077 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078 *
1079  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080  $ work( itauq ), work( iwork ),
1081  $ lwork-iwork+1, ierr )
1082  iwork = ie + n
1083 *
1084 * Perform bidiagonal QR iteration, computing left
1085 * singular vectors of R in WORK(IR)
1086 * (Workspace: need N*N+BDSPAC)
1087 *
1088  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089  $ 1, work( ir ), ldwrkr, dum, 1,
1090  $ work( iwork ), info )
1091 *
1092 * Multiply Q in A by left singular vectors of R in
1093 * WORK(IR), storing result in U
1094 * (Workspace: need N*N)
1095 *
1096  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097  $ work( ir ), ldwrkr, zero, u, ldu )
1098 *
1099  ELSE
1100 *
1101 * Insufficient workspace for a fast algorithm
1102 *
1103  itau = 1
1104  iwork = itau + n
1105 *
1106 * Compute A=Q*R, copying result to U
1107 * (Workspace: need 2*N, prefer N+N*NB)
1108 *
1109  CALL sgeqrf( m, n, a, lda, work( itau ),
1110  $ work( iwork ), lwork-iwork+1, ierr )
1111  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112 *
1113 * Generate Q in U
1114 * (Workspace: need 2*N, prefer N+N*NB)
1115 *
1116  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117  $ work( iwork ), lwork-iwork+1, ierr )
1118  ie = itau
1119  itauq = ie + n
1120  itaup = itauq + n
1121  iwork = itaup + n
1122 *
1123 * Zero out below R in A
1124 *
1125  IF( n .GT. 1 ) THEN
1126  CALL slaset( 'L', n-1, n-1, zero, zero,
1127  $ a( 2, 1 ), lda )
1128  END IF
1129 *
1130 * Bidiagonalize R in A
1131 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1132 *
1133  CALL sgebrd( n, n, a, lda, s, work( ie ),
1134  $ work( itauq ), work( itaup ),
1135  $ work( iwork ), lwork-iwork+1, ierr )
1136 *
1137 * Multiply Q in U by left vectors bidiagonalizing R
1138 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1139 *
1140  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1141  $ work( itauq ), u, ldu, work( iwork ),
1142  $ lwork-iwork+1, ierr )
1143  iwork = ie + n
1144 *
1145 * Perform bidiagonal QR iteration, computing left
1146 * singular vectors of A in U
1147 * (Workspace: need BDSPAC)
1148 *
1149  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1150  $ 1, u, ldu, dum, 1, work( iwork ),
1151  $ info )
1152 *
1153  END IF
1154 *
1155  ELSE IF( wntvo ) THEN
1156 *
1157 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1158 * N left singular vectors to be computed in U and
1159 * N right singular vectors to be overwritten on A
1160 *
1161  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1162 *
1163 * Sufficient workspace for a fast algorithm
1164 *
1165  iu = 1
1166  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1167 *
1168 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1169 *
1170  ldwrku = lda
1171  ir = iu + ldwrku*n
1172  ldwrkr = lda
1173  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1174 *
1175 * WORK(IU) is LDA by N and WORK(IR) is N by N
1176 *
1177  ldwrku = lda
1178  ir = iu + ldwrku*n
1179  ldwrkr = n
1180  ELSE
1181 *
1182 * WORK(IU) is N by N and WORK(IR) is N by N
1183 *
1184  ldwrku = n
1185  ir = iu + ldwrku*n
1186  ldwrkr = n
1187  END IF
1188  itau = ir + ldwrkr*n
1189  iwork = itau + n
1190 *
1191 * Compute A=Q*R
1192 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1193 *
1194  CALL sgeqrf( m, n, a, lda, work( itau ),
1195  $ work( iwork ), lwork-iwork+1, ierr )
1196 *
1197 * Copy R to WORK(IU), zeroing out below it
1198 *
1199  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1200  $ ldwrku )
1201  CALL slaset( 'L', n-1, n-1, zero, zero,
1202  $ work( iu+1 ), ldwrku )
1203 *
1204 * Generate Q in A
1205 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1206 *
1207  CALL sorgqr( m, n, n, a, lda, work( itau ),
1208  $ work( iwork ), lwork-iwork+1, ierr )
1209  ie = itau
1210  itauq = ie + n
1211  itaup = itauq + n
1212  iwork = itaup + n
1213 *
1214 * Bidiagonalize R in WORK(IU), copying result to
1215 * WORK(IR)
1216 * (Workspace: need 2*N*N+4*N,
1217 * prefer 2*N*N+3*N+2*N*NB)
1218 *
1219  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1220  $ work( ie ), work( itauq ),
1221  $ work( itaup ), work( iwork ),
1222  $ lwork-iwork+1, ierr )
1223  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1224  $ work( ir ), ldwrkr )
1225 *
1226 * Generate left bidiagonalizing vectors in WORK(IU)
1227 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1228 *
1229  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1230  $ work( itauq ), work( iwork ),
1231  $ lwork-iwork+1, ierr )
1232 *
1233 * Generate right bidiagonalizing vectors in WORK(IR)
1234 * (Workspace: need 2*N*N+4*N-1,
1235 * prefer 2*N*N+3*N+(N-1)*NB)
1236 *
1237  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1238  $ work( itaup ), work( iwork ),
1239  $ lwork-iwork+1, ierr )
1240  iwork = ie + n
1241 *
1242 * Perform bidiagonal QR iteration, computing left
1243 * singular vectors of R in WORK(IU) and computing
1244 * right singular vectors of R in WORK(IR)
1245 * (Workspace: need 2*N*N+BDSPAC)
1246 *
1247  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1248  $ work( ir ), ldwrkr, work( iu ),
1249  $ ldwrku, dum, 1, work( iwork ), info )
1250 *
1251 * Multiply Q in A by left singular vectors of R in
1252 * WORK(IU), storing result in U
1253 * (Workspace: need N*N)
1254 *
1255  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1256  $ work( iu ), ldwrku, zero, u, ldu )
1257 *
1258 * Copy right singular vectors of R to A
1259 * (Workspace: need N*N)
1260 *
1261  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1262  $ lda )
1263 *
1264  ELSE
1265 *
1266 * Insufficient workspace for a fast algorithm
1267 *
1268  itau = 1
1269  iwork = itau + n
1270 *
1271 * Compute A=Q*R, copying result to U
1272 * (Workspace: need 2*N, prefer N+N*NB)
1273 *
1274  CALL sgeqrf( m, n, a, lda, work( itau ),
1275  $ work( iwork ), lwork-iwork+1, ierr )
1276  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1277 *
1278 * Generate Q in U
1279 * (Workspace: need 2*N, prefer N+N*NB)
1280 *
1281  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1282  $ work( iwork ), lwork-iwork+1, ierr )
1283  ie = itau
1284  itauq = ie + n
1285  itaup = itauq + n
1286  iwork = itaup + n
1287 *
1288 * Zero out below R in A
1289 *
1290  IF( n .GT. 1 ) THEN
1291  CALL slaset( 'L', n-1, n-1, zero, zero,
1292  $ a( 2, 1 ), lda )
1293  END IF
1294 *
1295 * Bidiagonalize R in A
1296 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1297 *
1298  CALL sgebrd( n, n, a, lda, s, work( ie ),
1299  $ work( itauq ), work( itaup ),
1300  $ work( iwork ), lwork-iwork+1, ierr )
1301 *
1302 * Multiply Q in U by left vectors bidiagonalizing R
1303 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1304 *
1305  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1306  $ work( itauq ), u, ldu, work( iwork ),
1307  $ lwork-iwork+1, ierr )
1308 *
1309 * Generate right vectors bidiagonalizing R in A
1310 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1311 *
1312  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1313  $ work( iwork ), lwork-iwork+1, ierr )
1314  iwork = ie + n
1315 *
1316 * Perform bidiagonal QR iteration, computing left
1317 * singular vectors of A in U and computing right
1318 * singular vectors of A in A
1319 * (Workspace: need BDSPAC)
1320 *
1321  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1322  $ lda, u, ldu, dum, 1, work( iwork ),
1323  $ info )
1324 *
1325  END IF
1326 *
1327  ELSE IF( wntvas ) THEN
1328 *
1329 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1330 * or 'A')
1331 * N left singular vectors to be computed in U and
1332 * N right singular vectors to be computed in VT
1333 *
1334  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1335 *
1336 * Sufficient workspace for a fast algorithm
1337 *
1338  iu = 1
1339  IF( lwork.GE.wrkbl+lda*n ) THEN
1340 *
1341 * WORK(IU) is LDA by N
1342 *
1343  ldwrku = lda
1344  ELSE
1345 *
1346 * WORK(IU) is N by N
1347 *
1348  ldwrku = n
1349  END IF
1350  itau = iu + ldwrku*n
1351  iwork = itau + n
1352 *
1353 * Compute A=Q*R
1354 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1355 *
1356  CALL sgeqrf( m, n, a, lda, work( itau ),
1357  $ work( iwork ), lwork-iwork+1, ierr )
1358 *
1359 * Copy R to WORK(IU), zeroing out below it
1360 *
1361  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1362  $ ldwrku )
1363  CALL slaset( 'L', n-1, n-1, zero, zero,
1364  $ work( iu+1 ), ldwrku )
1365 *
1366 * Generate Q in A
1367 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1368 *
1369  CALL sorgqr( m, n, n, a, lda, work( itau ),
1370  $ work( iwork ), lwork-iwork+1, ierr )
1371  ie = itau
1372  itauq = ie + n
1373  itaup = itauq + n
1374  iwork = itaup + n
1375 *
1376 * Bidiagonalize R in WORK(IU), copying result to VT
1377 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1378 *
1379  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1380  $ work( ie ), work( itauq ),
1381  $ work( itaup ), work( iwork ),
1382  $ lwork-iwork+1, ierr )
1383  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1384  $ ldvt )
1385 *
1386 * Generate left bidiagonalizing vectors in WORK(IU)
1387 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1388 *
1389  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1390  $ work( itauq ), work( iwork ),
1391  $ lwork-iwork+1, ierr )
1392 *
1393 * Generate right bidiagonalizing vectors in VT
1394 * (Workspace: need N*N+4*N-1,
1395 * prefer N*N+3*N+(N-1)*NB)
1396 *
1397  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1398  $ work( iwork ), lwork-iwork+1, ierr )
1399  iwork = ie + n
1400 *
1401 * Perform bidiagonal QR iteration, computing left
1402 * singular vectors of R in WORK(IU) and computing
1403 * right singular vectors of R in VT
1404 * (Workspace: need N*N+BDSPAC)
1405 *
1406  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1407  $ ldvt, work( iu ), ldwrku, dum, 1,
1408  $ work( iwork ), info )
1409 *
1410 * Multiply Q in A by left singular vectors of R in
1411 * WORK(IU), storing result in U
1412 * (Workspace: need N*N)
1413 *
1414  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1415  $ work( iu ), ldwrku, zero, u, ldu )
1416 *
1417  ELSE
1418 *
1419 * Insufficient workspace for a fast algorithm
1420 *
1421  itau = 1
1422  iwork = itau + n
1423 *
1424 * Compute A=Q*R, copying result to U
1425 * (Workspace: need 2*N, prefer N+N*NB)
1426 *
1427  CALL sgeqrf( m, n, a, lda, work( itau ),
1428  $ work( iwork ), lwork-iwork+1, ierr )
1429  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1430 *
1431 * Generate Q in U
1432 * (Workspace: need 2*N, prefer N+N*NB)
1433 *
1434  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1435  $ work( iwork ), lwork-iwork+1, ierr )
1436 *
1437 * Copy R to VT, zeroing out below it
1438 *
1439  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1440  IF( n.GT.1 )
1441  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1442  $ vt( 2, 1 ), ldvt )
1443  ie = itau
1444  itauq = ie + n
1445  itaup = itauq + n
1446  iwork = itaup + n
1447 *
1448 * Bidiagonalize R in VT
1449 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1450 *
1451  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1452  $ work( itauq ), work( itaup ),
1453  $ work( iwork ), lwork-iwork+1, ierr )
1454 *
1455 * Multiply Q in U by left bidiagonalizing vectors
1456 * in VT
1457 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1458 *
1459  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1460  $ work( itauq ), u, ldu, work( iwork ),
1461  $ lwork-iwork+1, ierr )
1462 *
1463 * Generate right bidiagonalizing vectors in VT
1464 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1465 *
1466  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1467  $ work( iwork ), lwork-iwork+1, ierr )
1468  iwork = ie + n
1469 *
1470 * Perform bidiagonal QR iteration, computing left
1471 * singular vectors of A in U and computing right
1472 * singular vectors of A in VT
1473 * (Workspace: need BDSPAC)
1474 *
1475  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1476  $ ldvt, u, ldu, dum, 1, work( iwork ),
1477  $ info )
1478 *
1479  END IF
1480 *
1481  END IF
1482 *
1483  ELSE IF( wntua ) THEN
1484 *
1485  IF( wntvn ) THEN
1486 *
1487 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1488 * M left singular vectors to be computed in U and
1489 * no right singular vectors to be computed
1490 *
1491  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1492 *
1493 * Sufficient workspace for a fast algorithm
1494 *
1495  ir = 1
1496  IF( lwork.GE.wrkbl+lda*n ) THEN
1497 *
1498 * WORK(IR) is LDA by N
1499 *
1500  ldwrkr = lda
1501  ELSE
1502 *
1503 * WORK(IR) is N by N
1504 *
1505  ldwrkr = n
1506  END IF
1507  itau = ir + ldwrkr*n
1508  iwork = itau + n
1509 *
1510 * Compute A=Q*R, copying result to U
1511 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1512 *
1513  CALL sgeqrf( m, n, a, lda, work( itau ),
1514  $ work( iwork ), lwork-iwork+1, ierr )
1515  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1516 *
1517 * Copy R to WORK(IR), zeroing out below it
1518 *
1519  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1520  $ ldwrkr )
1521  CALL slaset( 'L', n-1, n-1, zero, zero,
1522  $ work( ir+1 ), ldwrkr )
1523 *
1524 * Generate Q in U
1525 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1526 *
1527  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1528  $ work( iwork ), lwork-iwork+1, ierr )
1529  ie = itau
1530  itauq = ie + n
1531  itaup = itauq + n
1532  iwork = itaup + n
1533 *
1534 * Bidiagonalize R in WORK(IR)
1535 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1536 *
1537  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1538  $ work( ie ), work( itauq ),
1539  $ work( itaup ), work( iwork ),
1540  $ lwork-iwork+1, ierr )
1541 *
1542 * Generate left bidiagonalizing vectors in WORK(IR)
1543 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1544 *
1545  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1546  $ work( itauq ), work( iwork ),
1547  $ lwork-iwork+1, ierr )
1548  iwork = ie + n
1549 *
1550 * Perform bidiagonal QR iteration, computing left
1551 * singular vectors of R in WORK(IR)
1552 * (Workspace: need N*N+BDSPAC)
1553 *
1554  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1555  $ 1, work( ir ), ldwrkr, dum, 1,
1556  $ work( iwork ), info )
1557 *
1558 * Multiply Q in U by left singular vectors of R in
1559 * WORK(IR), storing result in A
1560 * (Workspace: need N*N)
1561 *
1562  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1563  $ work( ir ), ldwrkr, zero, a, lda )
1564 *
1565 * Copy left singular vectors of A from A to U
1566 *
1567  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1568 *
1569  ELSE
1570 *
1571 * Insufficient workspace for a fast algorithm
1572 *
1573  itau = 1
1574  iwork = itau + n
1575 *
1576 * Compute A=Q*R, copying result to U
1577 * (Workspace: need 2*N, prefer N+N*NB)
1578 *
1579  CALL sgeqrf( m, n, a, lda, work( itau ),
1580  $ work( iwork ), lwork-iwork+1, ierr )
1581  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1582 *
1583 * Generate Q in U
1584 * (Workspace: need N+M, prefer N+M*NB)
1585 *
1586  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1587  $ work( iwork ), lwork-iwork+1, ierr )
1588  ie = itau
1589  itauq = ie + n
1590  itaup = itauq + n
1591  iwork = itaup + n
1592 *
1593 * Zero out below R in A
1594 *
1595  IF( n .GT. 1 ) THEN
1596  CALL slaset( 'L', n-1, n-1, zero, zero,
1597  $ a( 2, 1 ), lda )
1598  END IF
1599 *
1600 * Bidiagonalize R in A
1601 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1602 *
1603  CALL sgebrd( n, n, a, lda, s, work( ie ),
1604  $ work( itauq ), work( itaup ),
1605  $ work( iwork ), lwork-iwork+1, ierr )
1606 *
1607 * Multiply Q in U by left bidiagonalizing vectors
1608 * in A
1609 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1610 *
1611  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1612  $ work( itauq ), u, ldu, work( iwork ),
1613  $ lwork-iwork+1, ierr )
1614  iwork = ie + n
1615 *
1616 * Perform bidiagonal QR iteration, computing left
1617 * singular vectors of A in U
1618 * (Workspace: need BDSPAC)
1619 *
1620  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1621  $ 1, u, ldu, dum, 1, work( iwork ),
1622  $ info )
1623 *
1624  END IF
1625 *
1626  ELSE IF( wntvo ) THEN
1627 *
1628 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1629 * M left singular vectors to be computed in U and
1630 * N right singular vectors to be overwritten on A
1631 *
1632  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1633 *
1634 * Sufficient workspace for a fast algorithm
1635 *
1636  iu = 1
1637  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1638 *
1639 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1640 *
1641  ldwrku = lda
1642  ir = iu + ldwrku*n
1643  ldwrkr = lda
1644  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1645 *
1646 * WORK(IU) is LDA by N and WORK(IR) is N by N
1647 *
1648  ldwrku = lda
1649  ir = iu + ldwrku*n
1650  ldwrkr = n
1651  ELSE
1652 *
1653 * WORK(IU) is N by N and WORK(IR) is N by N
1654 *
1655  ldwrku = n
1656  ir = iu + ldwrku*n
1657  ldwrkr = n
1658  END IF
1659  itau = ir + ldwrkr*n
1660  iwork = itau + n
1661 *
1662 * Compute A=Q*R, copying result to U
1663 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1664 *
1665  CALL sgeqrf( m, n, a, lda, work( itau ),
1666  $ work( iwork ), lwork-iwork+1, ierr )
1667  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1668 *
1669 * Generate Q in U
1670 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1671 *
1672  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1673  $ work( iwork ), lwork-iwork+1, ierr )
1674 *
1675 * Copy R to WORK(IU), zeroing out below it
1676 *
1677  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1678  $ ldwrku )
1679  CALL slaset( 'L', n-1, n-1, zero, zero,
1680  $ work( iu+1 ), ldwrku )
1681  ie = itau
1682  itauq = ie + n
1683  itaup = itauq + n
1684  iwork = itaup + n
1685 *
1686 * Bidiagonalize R in WORK(IU), copying result to
1687 * WORK(IR)
1688 * (Workspace: need 2*N*N+4*N,
1689 * prefer 2*N*N+3*N+2*N*NB)
1690 *
1691  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1692  $ work( ie ), work( itauq ),
1693  $ work( itaup ), work( iwork ),
1694  $ lwork-iwork+1, ierr )
1695  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1696  $ work( ir ), ldwrkr )
1697 *
1698 * Generate left bidiagonalizing vectors in WORK(IU)
1699 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1700 *
1701  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1702  $ work( itauq ), work( iwork ),
1703  $ lwork-iwork+1, ierr )
1704 *
1705 * Generate right bidiagonalizing vectors in WORK(IR)
1706 * (Workspace: need 2*N*N+4*N-1,
1707 * prefer 2*N*N+3*N+(N-1)*NB)
1708 *
1709  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1710  $ work( itaup ), work( iwork ),
1711  $ lwork-iwork+1, ierr )
1712  iwork = ie + n
1713 *
1714 * Perform bidiagonal QR iteration, computing left
1715 * singular vectors of R in WORK(IU) and computing
1716 * right singular vectors of R in WORK(IR)
1717 * (Workspace: need 2*N*N+BDSPAC)
1718 *
1719  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1720  $ work( ir ), ldwrkr, work( iu ),
1721  $ ldwrku, dum, 1, work( iwork ), info )
1722 *
1723 * Multiply Q in U by left singular vectors of R in
1724 * WORK(IU), storing result in A
1725 * (Workspace: need N*N)
1726 *
1727  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1728  $ work( iu ), ldwrku, zero, a, lda )
1729 *
1730 * Copy left singular vectors of A from A to U
1731 *
1732  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1733 *
1734 * Copy right singular vectors of R from WORK(IR) to A
1735 *
1736  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1737  $ lda )
1738 *
1739  ELSE
1740 *
1741 * Insufficient workspace for a fast algorithm
1742 *
1743  itau = 1
1744  iwork = itau + n
1745 *
1746 * Compute A=Q*R, copying result to U
1747 * (Workspace: need 2*N, prefer N+N*NB)
1748 *
1749  CALL sgeqrf( m, n, a, lda, work( itau ),
1750  $ work( iwork ), lwork-iwork+1, ierr )
1751  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1752 *
1753 * Generate Q in U
1754 * (Workspace: need N+M, prefer N+M*NB)
1755 *
1756  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1757  $ work( iwork ), lwork-iwork+1, ierr )
1758  ie = itau
1759  itauq = ie + n
1760  itaup = itauq + n
1761  iwork = itaup + n
1762 *
1763 * Zero out below R in A
1764 *
1765  IF( n .GT. 1 ) THEN
1766  CALL slaset( 'L', n-1, n-1, zero, zero,
1767  $ a( 2, 1 ), lda )
1768  END IF
1769 *
1770 * Bidiagonalize R in A
1771 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1772 *
1773  CALL sgebrd( n, n, a, lda, s, work( ie ),
1774  $ work( itauq ), work( itaup ),
1775  $ work( iwork ), lwork-iwork+1, ierr )
1776 *
1777 * Multiply Q in U by left bidiagonalizing vectors
1778 * in A
1779 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1780 *
1781  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1782  $ work( itauq ), u, ldu, work( iwork ),
1783  $ lwork-iwork+1, ierr )
1784 *
1785 * Generate right bidiagonalizing vectors in A
1786 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1787 *
1788  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1789  $ work( iwork ), lwork-iwork+1, ierr )
1790  iwork = ie + n
1791 *
1792 * Perform bidiagonal QR iteration, computing left
1793 * singular vectors of A in U and computing right
1794 * singular vectors of A in A
1795 * (Workspace: need BDSPAC)
1796 *
1797  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1798  $ lda, u, ldu, dum, 1, work( iwork ),
1799  $ info )
1800 *
1801  END IF
1802 *
1803  ELSE IF( wntvas ) THEN
1804 *
1805 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1806 * or 'A')
1807 * M left singular vectors to be computed in U and
1808 * N right singular vectors to be computed in VT
1809 *
1810  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1811 *
1812 * Sufficient workspace for a fast algorithm
1813 *
1814  iu = 1
1815  IF( lwork.GE.wrkbl+lda*n ) THEN
1816 *
1817 * WORK(IU) is LDA by N
1818 *
1819  ldwrku = lda
1820  ELSE
1821 *
1822 * WORK(IU) is N by N
1823 *
1824  ldwrku = n
1825  END IF
1826  itau = iu + ldwrku*n
1827  iwork = itau + n
1828 *
1829 * Compute A=Q*R, copying result to U
1830 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1831 *
1832  CALL sgeqrf( m, n, a, lda, work( itau ),
1833  $ work( iwork ), lwork-iwork+1, ierr )
1834  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1835 *
1836 * Generate Q in U
1837 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1838 *
1839  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1840  $ work( iwork ), lwork-iwork+1, ierr )
1841 *
1842 * Copy R to WORK(IU), zeroing out below it
1843 *
1844  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1845  $ ldwrku )
1846  CALL slaset( 'L', n-1, n-1, zero, zero,
1847  $ work( iu+1 ), ldwrku )
1848  ie = itau
1849  itauq = ie + n
1850  itaup = itauq + n
1851  iwork = itaup + n
1852 *
1853 * Bidiagonalize R in WORK(IU), copying result to VT
1854 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1855 *
1856  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1857  $ work( ie ), work( itauq ),
1858  $ work( itaup ), work( iwork ),
1859  $ lwork-iwork+1, ierr )
1860  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1861  $ ldvt )
1862 *
1863 * Generate left bidiagonalizing vectors in WORK(IU)
1864 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1865 *
1866  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1867  $ work( itauq ), work( iwork ),
1868  $ lwork-iwork+1, ierr )
1869 *
1870 * Generate right bidiagonalizing vectors in VT
1871 * (Workspace: need N*N+4*N-1,
1872 * prefer N*N+3*N+(N-1)*NB)
1873 *
1874  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1875  $ work( iwork ), lwork-iwork+1, ierr )
1876  iwork = ie + n
1877 *
1878 * Perform bidiagonal QR iteration, computing left
1879 * singular vectors of R in WORK(IU) and computing
1880 * right singular vectors of R in VT
1881 * (Workspace: need N*N+BDSPAC)
1882 *
1883  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1884  $ ldvt, work( iu ), ldwrku, dum, 1,
1885  $ work( iwork ), info )
1886 *
1887 * Multiply Q in U by left singular vectors of R in
1888 * WORK(IU), storing result in A
1889 * (Workspace: need N*N)
1890 *
1891  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1892  $ work( iu ), ldwrku, zero, a, lda )
1893 *
1894 * Copy left singular vectors of A from A to U
1895 *
1896  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1897 *
1898  ELSE
1899 *
1900 * Insufficient workspace for a fast algorithm
1901 *
1902  itau = 1
1903  iwork = itau + n
1904 *
1905 * Compute A=Q*R, copying result to U
1906 * (Workspace: need 2*N, prefer N+N*NB)
1907 *
1908  CALL sgeqrf( m, n, a, lda, work( itau ),
1909  $ work( iwork ), lwork-iwork+1, ierr )
1910  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1911 *
1912 * Generate Q in U
1913 * (Workspace: need N+M, prefer N+M*NB)
1914 *
1915  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1916  $ work( iwork ), lwork-iwork+1, ierr )
1917 *
1918 * Copy R from A to VT, zeroing out below it
1919 *
1920  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1921  IF( n.GT.1 )
1922  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1923  $ vt( 2, 1 ), ldvt )
1924  ie = itau
1925  itauq = ie + n
1926  itaup = itauq + n
1927  iwork = itaup + n
1928 *
1929 * Bidiagonalize R in VT
1930 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1931 *
1932  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1933  $ work( itauq ), work( itaup ),
1934  $ work( iwork ), lwork-iwork+1, ierr )
1935 *
1936 * Multiply Q in U by left bidiagonalizing vectors
1937 * in VT
1938 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1939 *
1940  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1941  $ work( itauq ), u, ldu, work( iwork ),
1942  $ lwork-iwork+1, ierr )
1943 *
1944 * Generate right bidiagonalizing vectors in VT
1945 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1946 *
1947  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1948  $ work( iwork ), lwork-iwork+1, ierr )
1949  iwork = ie + n
1950 *
1951 * Perform bidiagonal QR iteration, computing left
1952 * singular vectors of A in U and computing right
1953 * singular vectors of A in VT
1954 * (Workspace: need BDSPAC)
1955 *
1956  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1957  $ ldvt, u, ldu, dum, 1, work( iwork ),
1958  $ info )
1959 *
1960  END IF
1961 *
1962  END IF
1963 *
1964  END IF
1965 *
1966  ELSE
1967 *
1968 * M .LT. MNTHR
1969 *
1970 * Path 10 (M at least N, but not much larger)
1971 * Reduce to bidiagonal form without QR decomposition
1972 *
1973  ie = 1
1974  itauq = ie + n
1975  itaup = itauq + n
1976  iwork = itaup + n
1977 *
1978 * Bidiagonalize A
1979 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1980 *
1981  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1982  $ work( itaup ), work( iwork ), lwork-iwork+1,
1983  $ ierr )
1984  IF( wntuas ) THEN
1985 *
1986 * If left singular vectors desired in U, copy result to U
1987 * and generate left bidiagonalizing vectors in U
1988 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1989 *
1990  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1991  IF( wntus )
1992  $ ncu = n
1993  IF( wntua )
1994  $ ncu = m
1995  CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1996  $ work( iwork ), lwork-iwork+1, ierr )
1997  END IF
1998  IF( wntvas ) THEN
1999 *
2000 * If right singular vectors desired in VT, copy result to
2001 * VT and generate right bidiagonalizing vectors in VT
2002 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2003 *
2004  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2005  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2006  $ work( iwork ), lwork-iwork+1, ierr )
2007  END IF
2008  IF( wntuo ) THEN
2009 *
2010 * If left singular vectors desired in A, generate left
2011 * bidiagonalizing vectors in A
2012 * (Workspace: need 4*N, prefer 3*N+N*NB)
2013 *
2014  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2015  $ work( iwork ), lwork-iwork+1, ierr )
2016  END IF
2017  IF( wntvo ) THEN
2018 *
2019 * If right singular vectors desired in A, generate right
2020 * bidiagonalizing vectors in A
2021 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2022 *
2023  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2024  $ work( iwork ), lwork-iwork+1, ierr )
2025  END IF
2026  iwork = ie + n
2027  IF( wntuas .OR. wntuo )
2028  $ nru = m
2029  IF( wntun )
2030  $ nru = 0
2031  IF( wntvas .OR. wntvo )
2032  $ ncvt = n
2033  IF( wntvn )
2034  $ ncvt = 0
2035  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2036 *
2037 * Perform bidiagonal QR iteration, if desired, computing
2038 * left singular vectors in U and computing right singular
2039 * vectors in VT
2040 * (Workspace: need BDSPAC)
2041 *
2042  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2043  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2044  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2045 *
2046 * Perform bidiagonal QR iteration, if desired, computing
2047 * left singular vectors in U and computing right singular
2048 * vectors in A
2049 * (Workspace: need BDSPAC)
2050 *
2051  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2052  $ u, ldu, dum, 1, work( iwork ), info )
2053  ELSE
2054 *
2055 * Perform bidiagonal QR iteration, if desired, computing
2056 * left singular vectors in A and computing right singular
2057 * vectors in VT
2058 * (Workspace: need BDSPAC)
2059 *
2060  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2061  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2062  END IF
2063 *
2064  END IF
2065 *
2066  ELSE
2067 *
2068 * A has more columns than rows. If A has sufficiently more
2069 * columns than rows, first reduce using the LQ decomposition (if
2070 * sufficient workspace available)
2071 *
2072  IF( n.GE.mnthr ) THEN
2073 *
2074  IF( wntvn ) THEN
2075 *
2076 * Path 1t(N much larger than M, JOBVT='N')
2077 * No right singular vectors to be computed
2078 *
2079  itau = 1
2080  iwork = itau + m
2081 *
2082 * Compute A=L*Q
2083 * (Workspace: need 2*M, prefer M+M*NB)
2084 *
2085  CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2086  $ lwork-iwork+1, ierr )
2087 *
2088 * Zero out above L
2089 *
2090  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2091  ie = 1
2092  itauq = ie + m
2093  itaup = itauq + m
2094  iwork = itaup + m
2095 *
2096 * Bidiagonalize L in A
2097 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2098 *
2099  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2100  $ work( itaup ), work( iwork ), lwork-iwork+1,
2101  $ ierr )
2102  IF( wntuo .OR. wntuas ) THEN
2103 *
2104 * If left singular vectors desired, generate Q
2105 * (Workspace: need 4*M, prefer 3*M+M*NB)
2106 *
2107  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2108  $ work( iwork ), lwork-iwork+1, ierr )
2109  END IF
2110  iwork = ie + m
2111  nru = 0
2112  IF( wntuo .OR. wntuas )
2113  $ nru = m
2114 *
2115 * Perform bidiagonal QR iteration, computing left singular
2116 * vectors of A in A if desired
2117 * (Workspace: need BDSPAC)
2118 *
2119  CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2120  $ lda, dum, 1, work( iwork ), info )
2121 *
2122 * If left singular vectors desired in U, copy them there
2123 *
2124  IF( wntuas )
2125  $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2126 *
2127  ELSE IF( wntvo .AND. wntun ) THEN
2128 *
2129 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2130 * M right singular vectors to be overwritten on A and
2131 * no left singular vectors to be computed
2132 *
2133  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2134 *
2135 * Sufficient workspace for a fast algorithm
2136 *
2137  ir = 1
2138  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2139 *
2140 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2141 *
2142  ldwrku = lda
2143  chunk = n
2144  ldwrkr = lda
2145  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2146 *
2147 * WORK(IU) is LDA by N and WORK(IR) is M by M
2148 *
2149  ldwrku = lda
2150  chunk = n
2151  ldwrkr = m
2152  ELSE
2153 *
2154 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2155 *
2156  ldwrku = m
2157  chunk = ( lwork-m*m-m ) / m
2158  ldwrkr = m
2159  END IF
2160  itau = ir + ldwrkr*m
2161  iwork = itau + m
2162 *
2163 * Compute A=L*Q
2164 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2165 *
2166  CALL sgelqf( m, n, a, lda, work( itau ),
2167  $ work( iwork ), lwork-iwork+1, ierr )
2168 *
2169 * Copy L to WORK(IR) and zero out above it
2170 *
2171  CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2172  CALL slaset( 'U', m-1, m-1, zero, zero,
2173  $ work( ir+ldwrkr ), ldwrkr )
2174 *
2175 * Generate Q in A
2176 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2177 *
2178  CALL sorglq( m, n, m, a, lda, work( itau ),
2179  $ work( iwork ), lwork-iwork+1, ierr )
2180  ie = itau
2181  itauq = ie + m
2182  itaup = itauq + m
2183  iwork = itaup + m
2184 *
2185 * Bidiagonalize L in WORK(IR)
2186 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2187 *
2188  CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2189  $ work( itauq ), work( itaup ),
2190  $ work( iwork ), lwork-iwork+1, ierr )
2191 *
2192 * Generate right vectors bidiagonalizing L
2193 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2194 *
2195  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2196  $ work( itaup ), work( iwork ),
2197  $ lwork-iwork+1, ierr )
2198  iwork = ie + m
2199 *
2200 * Perform bidiagonal QR iteration, computing right
2201 * singular vectors of L in WORK(IR)
2202 * (Workspace: need M*M+BDSPAC)
2203 *
2204  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2205  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2206  $ work( iwork ), info )
2207  iu = ie + m
2208 *
2209 * Multiply right singular vectors of L in WORK(IR) by Q
2210 * in A, storing result in WORK(IU) and copying to A
2211 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2212 *
2213  DO 30 i = 1, n, chunk
2214  blk = min( n-i+1, chunk )
2215  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2216  $ ldwrkr, a( 1, i ), lda, zero,
2217  $ work( iu ), ldwrku )
2218  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2219  $ a( 1, i ), lda )
2220  30 CONTINUE
2221 *
2222  ELSE
2223 *
2224 * Insufficient workspace for a fast algorithm
2225 *
2226  ie = 1
2227  itauq = ie + m
2228  itaup = itauq + m
2229  iwork = itaup + m
2230 *
2231 * Bidiagonalize A
2232 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2233 *
2234  CALL sgebrd( m, n, a, lda, s, work( ie ),
2235  $ work( itauq ), work( itaup ),
2236  $ work( iwork ), lwork-iwork+1, ierr )
2237 *
2238 * Generate right vectors bidiagonalizing A
2239 * (Workspace: need 4*M, prefer 3*M+M*NB)
2240 *
2241  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2242  $ work( iwork ), lwork-iwork+1, ierr )
2243  iwork = ie + m
2244 *
2245 * Perform bidiagonal QR iteration, computing right
2246 * singular vectors of A in A
2247 * (Workspace: need BDSPAC)
2248 *
2249  CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2250  $ dum, 1, dum, 1, work( iwork ), info )
2251 *
2252  END IF
2253 *
2254  ELSE IF( wntvo .AND. wntuas ) THEN
2255 *
2256 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2257 * M right singular vectors to be overwritten on A and
2258 * M left singular vectors to be computed in U
2259 *
2260  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2261 *
2262 * Sufficient workspace for a fast algorithm
2263 *
2264  ir = 1
2265  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2266 *
2267 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2268 *
2269  ldwrku = lda
2270  chunk = n
2271  ldwrkr = lda
2272  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2273 *
2274 * WORK(IU) is LDA by N and WORK(IR) is M by M
2275 *
2276  ldwrku = lda
2277  chunk = n
2278  ldwrkr = m
2279  ELSE
2280 *
2281 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2282 *
2283  ldwrku = m
2284  chunk = ( lwork-m*m-m ) / m
2285  ldwrkr = m
2286  END IF
2287  itau = ir + ldwrkr*m
2288  iwork = itau + m
2289 *
2290 * Compute A=L*Q
2291 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2292 *
2293  CALL sgelqf( m, n, a, lda, work( itau ),
2294  $ work( iwork ), lwork-iwork+1, ierr )
2295 *
2296 * Copy L to U, zeroing about above it
2297 *
2298  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2299  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2300  $ ldu )
2301 *
2302 * Generate Q in A
2303 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2304 *
2305  CALL sorglq( m, n, m, a, lda, work( itau ),
2306  $ work( iwork ), lwork-iwork+1, ierr )
2307  ie = itau
2308  itauq = ie + m
2309  itaup = itauq + m
2310  iwork = itaup + m
2311 *
2312 * Bidiagonalize L in U, copying result to WORK(IR)
2313 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2314 *
2315  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2316  $ work( itauq ), work( itaup ),
2317  $ work( iwork ), lwork-iwork+1, ierr )
2318  CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2319 *
2320 * Generate right vectors bidiagonalizing L in WORK(IR)
2321 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2322 *
2323  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2324  $ work( itaup ), work( iwork ),
2325  $ lwork-iwork+1, ierr )
2326 *
2327 * Generate left vectors bidiagonalizing L in U
2328 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2329 *
2330  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2331  $ work( iwork ), lwork-iwork+1, ierr )
2332  iwork = ie + m
2333 *
2334 * Perform bidiagonal QR iteration, computing left
2335 * singular vectors of L in U, and computing right
2336 * singular vectors of L in WORK(IR)
2337 * (Workspace: need M*M+BDSPAC)
2338 *
2339  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2340  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2341  $ work( iwork ), info )
2342  iu = ie + m
2343 *
2344 * Multiply right singular vectors of L in WORK(IR) by Q
2345 * in A, storing result in WORK(IU) and copying to A
2346 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2347 *
2348  DO 40 i = 1, n, chunk
2349  blk = min( n-i+1, chunk )
2350  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2351  $ ldwrkr, a( 1, i ), lda, zero,
2352  $ work( iu ), ldwrku )
2353  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2354  $ a( 1, i ), lda )
2355  40 CONTINUE
2356 *
2357  ELSE
2358 *
2359 * Insufficient workspace for a fast algorithm
2360 *
2361  itau = 1
2362  iwork = itau + m
2363 *
2364 * Compute A=L*Q
2365 * (Workspace: need 2*M, prefer M+M*NB)
2366 *
2367  CALL sgelqf( m, n, a, lda, work( itau ),
2368  $ work( iwork ), lwork-iwork+1, ierr )
2369 *
2370 * Copy L to U, zeroing out above it
2371 *
2372  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2373  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2374  $ ldu )
2375 *
2376 * Generate Q in A
2377 * (Workspace: need 2*M, prefer M+M*NB)
2378 *
2379  CALL sorglq( m, n, m, a, lda, work( itau ),
2380  $ work( iwork ), lwork-iwork+1, ierr )
2381  ie = itau
2382  itauq = ie + m
2383  itaup = itauq + m
2384  iwork = itaup + m
2385 *
2386 * Bidiagonalize L in U
2387 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2388 *
2389  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2390  $ work( itauq ), work( itaup ),
2391  $ work( iwork ), lwork-iwork+1, ierr )
2392 *
2393 * Multiply right vectors bidiagonalizing L by Q in A
2394 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2395 *
2396  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2397  $ work( itaup ), a, lda, work( iwork ),
2398  $ lwork-iwork+1, ierr )
2399 *
2400 * Generate left vectors bidiagonalizing L in U
2401 * (Workspace: need 4*M, prefer 3*M+M*NB)
2402 *
2403  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2404  $ work( iwork ), lwork-iwork+1, ierr )
2405  iwork = ie + m
2406 *
2407 * Perform bidiagonal QR iteration, computing left
2408 * singular vectors of A in U and computing right
2409 * singular vectors of A in A
2410 * (Workspace: need BDSPAC)
2411 *
2412  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2413  $ u, ldu, dum, 1, work( iwork ), info )
2414 *
2415  END IF
2416 *
2417  ELSE IF( wntvs ) THEN
2418 *
2419  IF( wntun ) THEN
2420 *
2421 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2422 * M right singular vectors to be computed in VT and
2423 * no left singular vectors to be computed
2424 *
2425  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2426 *
2427 * Sufficient workspace for a fast algorithm
2428 *
2429  ir = 1
2430  IF( lwork.GE.wrkbl+lda*m ) THEN
2431 *
2432 * WORK(IR) is LDA by M
2433 *
2434  ldwrkr = lda
2435  ELSE
2436 *
2437 * WORK(IR) is M by M
2438 *
2439  ldwrkr = m
2440  END IF
2441  itau = ir + ldwrkr*m
2442  iwork = itau + m
2443 *
2444 * Compute A=L*Q
2445 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2446 *
2447  CALL sgelqf( m, n, a, lda, work( itau ),
2448  $ work( iwork ), lwork-iwork+1, ierr )
2449 *
2450 * Copy L to WORK(IR), zeroing out above it
2451 *
2452  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2453  $ ldwrkr )
2454  CALL slaset( 'U', m-1, m-1, zero, zero,
2455  $ work( ir+ldwrkr ), ldwrkr )
2456 *
2457 * Generate Q in A
2458 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2459 *
2460  CALL sorglq( m, n, m, a, lda, work( itau ),
2461  $ work( iwork ), lwork-iwork+1, ierr )
2462  ie = itau
2463  itauq = ie + m
2464  itaup = itauq + m
2465  iwork = itaup + m
2466 *
2467 * Bidiagonalize L in WORK(IR)
2468 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2469 *
2470  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2471  $ work( ie ), work( itauq ),
2472  $ work( itaup ), work( iwork ),
2473  $ lwork-iwork+1, ierr )
2474 *
2475 * Generate right vectors bidiagonalizing L in
2476 * WORK(IR)
2477 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2478 *
2479  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2480  $ work( itaup ), work( iwork ),
2481  $ lwork-iwork+1, ierr )
2482  iwork = ie + m
2483 *
2484 * Perform bidiagonal QR iteration, computing right
2485 * singular vectors of L in WORK(IR)
2486 * (Workspace: need M*M+BDSPAC)
2487 *
2488  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2489  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2490  $ work( iwork ), info )
2491 *
2492 * Multiply right singular vectors of L in WORK(IR) by
2493 * Q in A, storing result in VT
2494 * (Workspace: need M*M)
2495 *
2496  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2497  $ ldwrkr, a, lda, zero, vt, ldvt )
2498 *
2499  ELSE
2500 *
2501 * Insufficient workspace for a fast algorithm
2502 *
2503  itau = 1
2504  iwork = itau + m
2505 *
2506 * Compute A=L*Q
2507 * (Workspace: need 2*M, prefer M+M*NB)
2508 *
2509  CALL sgelqf( m, n, a, lda, work( itau ),
2510  $ work( iwork ), lwork-iwork+1, ierr )
2511 *
2512 * Copy result to VT
2513 *
2514  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2515 *
2516 * Generate Q in VT
2517 * (Workspace: need 2*M, prefer M+M*NB)
2518 *
2519  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2520  $ work( iwork ), lwork-iwork+1, ierr )
2521  ie = itau
2522  itauq = ie + m
2523  itaup = itauq + m
2524  iwork = itaup + m
2525 *
2526 * Zero out above L in A
2527 *
2528  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2529  $ lda )
2530 *
2531 * Bidiagonalize L in A
2532 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2533 *
2534  CALL sgebrd( m, m, a, lda, s, work( ie ),
2535  $ work( itauq ), work( itaup ),
2536  $ work( iwork ), lwork-iwork+1, ierr )
2537 *
2538 * Multiply right vectors bidiagonalizing L by Q in VT
2539 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2540 *
2541  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2542  $ work( itaup ), vt, ldvt,
2543  $ work( iwork ), lwork-iwork+1, ierr )
2544  iwork = ie + m
2545 *
2546 * Perform bidiagonal QR iteration, computing right
2547 * singular vectors of A in VT
2548 * (Workspace: need BDSPAC)
2549 *
2550  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2551  $ ldvt, dum, 1, dum, 1, work( iwork ),
2552  $ info )
2553 *
2554  END IF
2555 *
2556  ELSE IF( wntuo ) THEN
2557 *
2558 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2559 * M right singular vectors to be computed in VT and
2560 * M left singular vectors to be overwritten on A
2561 *
2562  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2563 *
2564 * Sufficient workspace for a fast algorithm
2565 *
2566  iu = 1
2567  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2568 *
2569 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2570 *
2571  ldwrku = lda
2572  ir = iu + ldwrku*m
2573  ldwrkr = lda
2574  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2575 *
2576 * WORK(IU) is LDA by M and WORK(IR) is M by M
2577 *
2578  ldwrku = lda
2579  ir = iu + ldwrku*m
2580  ldwrkr = m
2581  ELSE
2582 *
2583 * WORK(IU) is M by M and WORK(IR) is M by M
2584 *
2585  ldwrku = m
2586  ir = iu + ldwrku*m
2587  ldwrkr = m
2588  END IF
2589  itau = ir + ldwrkr*m
2590  iwork = itau + m
2591 *
2592 * Compute A=L*Q
2593 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2594 *
2595  CALL sgelqf( m, n, a, lda, work( itau ),
2596  $ work( iwork ), lwork-iwork+1, ierr )
2597 *
2598 * Copy L to WORK(IU), zeroing out below it
2599 *
2600  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2601  $ ldwrku )
2602  CALL slaset( 'U', m-1, m-1, zero, zero,
2603  $ work( iu+ldwrku ), ldwrku )
2604 *
2605 * Generate Q in A
2606 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2607 *
2608  CALL sorglq( m, n, m, a, lda, work( itau ),
2609  $ work( iwork ), lwork-iwork+1, ierr )
2610  ie = itau
2611  itauq = ie + m
2612  itaup = itauq + m
2613  iwork = itaup + m
2614 *
2615 * Bidiagonalize L in WORK(IU), copying result to
2616 * WORK(IR)
2617 * (Workspace: need 2*M*M+4*M,
2618 * prefer 2*M*M+3*M+2*M*NB)
2619 *
2620  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2621  $ work( ie ), work( itauq ),
2622  $ work( itaup ), work( iwork ),
2623  $ lwork-iwork+1, ierr )
2624  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2625  $ work( ir ), ldwrkr )
2626 *
2627 * Generate right bidiagonalizing vectors in WORK(IU)
2628 * (Workspace: need 2*M*M+4*M-1,
2629 * prefer 2*M*M+3*M+(M-1)*NB)
2630 *
2631  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2632  $ work( itaup ), work( iwork ),
2633  $ lwork-iwork+1, ierr )
2634 *
2635 * Generate left bidiagonalizing vectors in WORK(IR)
2636 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2637 *
2638  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2639  $ work( itauq ), work( iwork ),
2640  $ lwork-iwork+1, ierr )
2641  iwork = ie + m
2642 *
2643 * Perform bidiagonal QR iteration, computing left
2644 * singular vectors of L in WORK(IR) and computing
2645 * right singular vectors of L in WORK(IU)
2646 * (Workspace: need 2*M*M+BDSPAC)
2647 *
2648  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2649  $ work( iu ), ldwrku, work( ir ),
2650  $ ldwrkr, dum, 1, work( iwork ), info )
2651 *
2652 * Multiply right singular vectors of L in WORK(IU) by
2653 * Q in A, storing result in VT
2654 * (Workspace: need M*M)
2655 *
2656  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2657  $ ldwrku, a, lda, zero, vt, ldvt )
2658 *
2659 * Copy left singular vectors of L to A
2660 * (Workspace: need M*M)
2661 *
2662  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2663  $ lda )
2664 *
2665  ELSE
2666 *
2667 * Insufficient workspace for a fast algorithm
2668 *
2669  itau = 1
2670  iwork = itau + m
2671 *
2672 * Compute A=L*Q, copying result to VT
2673 * (Workspace: need 2*M, prefer M+M*NB)
2674 *
2675  CALL sgelqf( m, n, a, lda, work( itau ),
2676  $ work( iwork ), lwork-iwork+1, ierr )
2677  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2678 *
2679 * Generate Q in VT
2680 * (Workspace: need 2*M, prefer M+M*NB)
2681 *
2682  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2683  $ work( iwork ), lwork-iwork+1, ierr )
2684  ie = itau
2685  itauq = ie + m
2686  itaup = itauq + m
2687  iwork = itaup + m
2688 *
2689 * Zero out above L in A
2690 *
2691  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2692  $ lda )
2693 *
2694 * Bidiagonalize L in A
2695 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2696 *
2697  CALL sgebrd( m, m, a, lda, s, work( ie ),
2698  $ work( itauq ), work( itaup ),
2699  $ work( iwork ), lwork-iwork+1, ierr )
2700 *
2701 * Multiply right vectors bidiagonalizing L by Q in VT
2702 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2703 *
2704  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2705  $ work( itaup ), vt, ldvt,
2706  $ work( iwork ), lwork-iwork+1, ierr )
2707 *
2708 * Generate left bidiagonalizing vectors of L in A
2709 * (Workspace: need 4*M, prefer 3*M+M*NB)
2710 *
2711  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2712  $ work( iwork ), lwork-iwork+1, ierr )
2713  iwork = ie + m
2714 *
2715 * Perform bidiagonal QR iteration, compute left
2716 * singular vectors of A in A and compute right
2717 * singular vectors of A in VT
2718 * (Workspace: need BDSPAC)
2719 *
2720  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2721  $ ldvt, a, lda, dum, 1, work( iwork ),
2722  $ info )
2723 *
2724  END IF
2725 *
2726  ELSE IF( wntuas ) THEN
2727 *
2728 * Path 6t(N much larger than M, JOBU='S' or 'A',
2729 * JOBVT='S')
2730 * M right singular vectors to be computed in VT and
2731 * M left singular vectors to be computed in U
2732 *
2733  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2734 *
2735 * Sufficient workspace for a fast algorithm
2736 *
2737  iu = 1
2738  IF( lwork.GE.wrkbl+lda*m ) THEN
2739 *
2740 * WORK(IU) is LDA by N
2741 *
2742  ldwrku = lda
2743  ELSE
2744 *
2745 * WORK(IU) is LDA by M
2746 *
2747  ldwrku = m
2748  END IF
2749  itau = iu + ldwrku*m
2750  iwork = itau + m
2751 *
2752 * Compute A=L*Q
2753 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2754 *
2755  CALL sgelqf( m, n, a, lda, work( itau ),
2756  $ work( iwork ), lwork-iwork+1, ierr )
2757 *
2758 * Copy L to WORK(IU), zeroing out above it
2759 *
2760  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2761  $ ldwrku )
2762  CALL slaset( 'U', m-1, m-1, zero, zero,
2763  $ work( iu+ldwrku ), ldwrku )
2764 *
2765 * Generate Q in A
2766 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2767 *
2768  CALL sorglq( m, n, m, a, lda, work( itau ),
2769  $ work( iwork ), lwork-iwork+1, ierr )
2770  ie = itau
2771  itauq = ie + m
2772  itaup = itauq + m
2773  iwork = itaup + m
2774 *
2775 * Bidiagonalize L in WORK(IU), copying result to U
2776 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2777 *
2778  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2779  $ work( ie ), work( itauq ),
2780  $ work( itaup ), work( iwork ),
2781  $ lwork-iwork+1, ierr )
2782  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2783  $ ldu )
2784 *
2785 * Generate right bidiagonalizing vectors in WORK(IU)
2786 * (Workspace: need M*M+4*M-1,
2787 * prefer M*M+3*M+(M-1)*NB)
2788 *
2789  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2790  $ work( itaup ), work( iwork ),
2791  $ lwork-iwork+1, ierr )
2792 *
2793 * Generate left bidiagonalizing vectors in U
2794 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2795 *
2796  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2797  $ work( iwork ), lwork-iwork+1, ierr )
2798  iwork = ie + m
2799 *
2800 * Perform bidiagonal QR iteration, computing left
2801 * singular vectors of L in U and computing right
2802 * singular vectors of L in WORK(IU)
2803 * (Workspace: need M*M+BDSPAC)
2804 *
2805  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2806  $ work( iu ), ldwrku, u, ldu, dum, 1,
2807  $ work( iwork ), info )
2808 *
2809 * Multiply right singular vectors of L in WORK(IU) by
2810 * Q in A, storing result in VT
2811 * (Workspace: need M*M)
2812 *
2813  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2814  $ ldwrku, a, lda, zero, vt, ldvt )
2815 *
2816  ELSE
2817 *
2818 * Insufficient workspace for a fast algorithm
2819 *
2820  itau = 1
2821  iwork = itau + m
2822 *
2823 * Compute A=L*Q, copying result to VT
2824 * (Workspace: need 2*M, prefer M+M*NB)
2825 *
2826  CALL sgelqf( m, n, a, lda, work( itau ),
2827  $ work( iwork ), lwork-iwork+1, ierr )
2828  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2829 *
2830 * Generate Q in VT
2831 * (Workspace: need 2*M, prefer M+M*NB)
2832 *
2833  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2834  $ work( iwork ), lwork-iwork+1, ierr )
2835 *
2836 * Copy L to U, zeroing out above it
2837 *
2838  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2839  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2840  $ ldu )
2841  ie = itau
2842  itauq = ie + m
2843  itaup = itauq + m
2844  iwork = itaup + m
2845 *
2846 * Bidiagonalize L in U
2847 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2848 *
2849  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2850  $ work( itauq ), work( itaup ),
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Multiply right bidiagonalizing vectors in U by Q
2854 * in VT
2855 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2856 *
2857  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2858  $ work( itaup ), vt, ldvt,
2859  $ work( iwork ), lwork-iwork+1, ierr )
2860 *
2861 * Generate left bidiagonalizing vectors in U
2862 * (Workspace: need 4*M, prefer 3*M+M*NB)
2863 *
2864  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2865  $ work( iwork ), lwork-iwork+1, ierr )
2866  iwork = ie + m
2867 *
2868 * Perform bidiagonal QR iteration, computing left
2869 * singular vectors of A in U and computing right
2870 * singular vectors of A in VT
2871 * (Workspace: need BDSPAC)
2872 *
2873  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2874  $ ldvt, u, ldu, dum, 1, work( iwork ),
2875  $ info )
2876 *
2877  END IF
2878 *
2879  END IF
2880 *
2881  ELSE IF( wntva ) THEN
2882 *
2883  IF( wntun ) THEN
2884 *
2885 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2886 * N right singular vectors to be computed in VT and
2887 * no left singular vectors to be computed
2888 *
2889  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2890 *
2891 * Sufficient workspace for a fast algorithm
2892 *
2893  ir = 1
2894  IF( lwork.GE.wrkbl+lda*m ) THEN
2895 *
2896 * WORK(IR) is LDA by M
2897 *
2898  ldwrkr = lda
2899  ELSE
2900 *
2901 * WORK(IR) is M by M
2902 *
2903  ldwrkr = m
2904  END IF
2905  itau = ir + ldwrkr*m
2906  iwork = itau + m
2907 *
2908 * Compute A=L*Q, copying result to VT
2909 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2910 *
2911  CALL sgelqf( m, n, a, lda, work( itau ),
2912  $ work( iwork ), lwork-iwork+1, ierr )
2913  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2914 *
2915 * Copy L to WORK(IR), zeroing out above it
2916 *
2917  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2918  $ ldwrkr )
2919  CALL slaset( 'U', m-1, m-1, zero, zero,
2920  $ work( ir+ldwrkr ), ldwrkr )
2921 *
2922 * Generate Q in VT
2923 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2924 *
2925  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2926  $ work( iwork ), lwork-iwork+1, ierr )
2927  ie = itau
2928  itauq = ie + m
2929  itaup = itauq + m
2930  iwork = itaup + m
2931 *
2932 * Bidiagonalize L in WORK(IR)
2933 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2934 *
2935  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2936  $ work( ie ), work( itauq ),
2937  $ work( itaup ), work( iwork ),
2938  $ lwork-iwork+1, ierr )
2939 *
2940 * Generate right bidiagonalizing vectors in WORK(IR)
2941 * (Workspace: need M*M+4*M-1,
2942 * prefer M*M+3*M+(M-1)*NB)
2943 *
2944  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2945  $ work( itaup ), work( iwork ),
2946  $ lwork-iwork+1, ierr )
2947  iwork = ie + m
2948 *
2949 * Perform bidiagonal QR iteration, computing right
2950 * singular vectors of L in WORK(IR)
2951 * (Workspace: need M*M+BDSPAC)
2952 *
2953  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2954  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2955  $ work( iwork ), info )
2956 *
2957 * Multiply right singular vectors of L in WORK(IR) by
2958 * Q in VT, storing result in A
2959 * (Workspace: need M*M)
2960 *
2961  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2962  $ ldwrkr, vt, ldvt, zero, a, lda )
2963 *
2964 * Copy right singular vectors of A from A to VT
2965 *
2966  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2967 *
2968  ELSE
2969 *
2970 * Insufficient workspace for a fast algorithm
2971 *
2972  itau = 1
2973  iwork = itau + m
2974 *
2975 * Compute A=L*Q, copying result to VT
2976 * (Workspace: need 2*M, prefer M+M*NB)
2977 *
2978  CALL sgelqf( m, n, a, lda, work( itau ),
2979  $ work( iwork ), lwork-iwork+1, ierr )
2980  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2981 *
2982 * Generate Q in VT
2983 * (Workspace: need M+N, prefer M+N*NB)
2984 *
2985  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2986  $ work( iwork ), lwork-iwork+1, ierr )
2987  ie = itau
2988  itauq = ie + m
2989  itaup = itauq + m
2990  iwork = itaup + m
2991 *
2992 * Zero out above L in A
2993 *
2994  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2995  $ lda )
2996 *
2997 * Bidiagonalize L in A
2998 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2999 *
3000  CALL sgebrd( m, m, a, lda, s, work( ie ),
3001  $ work( itauq ), work( itaup ),
3002  $ work( iwork ), lwork-iwork+1, ierr )
3003 *
3004 * Multiply right bidiagonalizing vectors in A by Q
3005 * in VT
3006 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3007 *
3008  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3009  $ work( itaup ), vt, ldvt,
3010  $ work( iwork ), lwork-iwork+1, ierr )
3011  iwork = ie + m
3012 *
3013 * Perform bidiagonal QR iteration, computing right
3014 * singular vectors of A in VT
3015 * (Workspace: need BDSPAC)
3016 *
3017  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3018  $ ldvt, dum, 1, dum, 1, work( iwork ),
3019  $ info )
3020 *
3021  END IF
3022 *
3023  ELSE IF( wntuo ) THEN
3024 *
3025 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3026 * N right singular vectors to be computed in VT and
3027 * M left singular vectors to be overwritten on A
3028 *
3029  IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3030 *
3031 * Sufficient workspace for a fast algorithm
3032 *
3033  iu = 1
3034  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3035 *
3036 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3037 *
3038  ldwrku = lda
3039  ir = iu + ldwrku*m
3040  ldwrkr = lda
3041  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3042 *
3043 * WORK(IU) is LDA by M and WORK(IR) is M by M
3044 *
3045  ldwrku = lda
3046  ir = iu + ldwrku*m
3047  ldwrkr = m
3048  ELSE
3049 *
3050 * WORK(IU) is M by M and WORK(IR) is M by M
3051 *
3052  ldwrku = m
3053  ir = iu + ldwrku*m
3054  ldwrkr = m
3055  END IF
3056  itau = ir + ldwrkr*m
3057  iwork = itau + m
3058 *
3059 * Compute A=L*Q, copying result to VT
3060 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3061 *
3062  CALL sgelqf( m, n, a, lda, work( itau ),
3063  $ work( iwork ), lwork-iwork+1, ierr )
3064  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3065 *
3066 * Generate Q in VT
3067 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3068 *
3069  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3070  $ work( iwork ), lwork-iwork+1, ierr )
3071 *
3072 * Copy L to WORK(IU), zeroing out above it
3073 *
3074  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3075  $ ldwrku )
3076  CALL slaset( 'U', m-1, m-1, zero, zero,
3077  $ work( iu+ldwrku ), ldwrku )
3078  ie = itau
3079  itauq = ie + m
3080  itaup = itauq + m
3081  iwork = itaup + m
3082 *
3083 * Bidiagonalize L in WORK(IU), copying result to
3084 * WORK(IR)
3085 * (Workspace: need 2*M*M+4*M,
3086 * prefer 2*M*M+3*M+2*M*NB)
3087 *
3088  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3089  $ work( ie ), work( itauq ),
3090  $ work( itaup ), work( iwork ),
3091  $ lwork-iwork+1, ierr )
3092  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3093  $ work( ir ), ldwrkr )
3094 *
3095 * Generate right bidiagonalizing vectors in WORK(IU)
3096 * (Workspace: need 2*M*M+4*M-1,
3097 * prefer 2*M*M+3*M+(M-1)*NB)
3098 *
3099  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3100  $ work( itaup ), work( iwork ),
3101  $ lwork-iwork+1, ierr )
3102 *
3103 * Generate left bidiagonalizing vectors in WORK(IR)
3104 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3105 *
3106  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3107  $ work( itauq ), work( iwork ),
3108  $ lwork-iwork+1, ierr )
3109  iwork = ie + m
3110 *
3111 * Perform bidiagonal QR iteration, computing left
3112 * singular vectors of L in WORK(IR) and computing
3113 * right singular vectors of L in WORK(IU)
3114 * (Workspace: need 2*M*M+BDSPAC)
3115 *
3116  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3117  $ work( iu ), ldwrku, work( ir ),
3118  $ ldwrkr, dum, 1, work( iwork ), info )
3119 *
3120 * Multiply right singular vectors of L in WORK(IU) by
3121 * Q in VT, storing result in A
3122 * (Workspace: need M*M)
3123 *
3124  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3125  $ ldwrku, vt, ldvt, zero, a, lda )
3126 *
3127 * Copy right singular vectors of A from A to VT
3128 *
3129  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3130 *
3131 * Copy left singular vectors of A from WORK(IR) to A
3132 *
3133  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3134  $ lda )
3135 *
3136  ELSE
3137 *
3138 * Insufficient workspace for a fast algorithm
3139 *
3140  itau = 1
3141  iwork = itau + m
3142 *
3143 * Compute A=L*Q, copying result to VT
3144 * (Workspace: need 2*M, prefer M+M*NB)
3145 *
3146  CALL sgelqf( m, n, a, lda, work( itau ),
3147  $ work( iwork ), lwork-iwork+1, ierr )
3148  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3149 *
3150 * Generate Q in VT
3151 * (Workspace: need M+N, prefer M+N*NB)
3152 *
3153  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3154  $ work( iwork ), lwork-iwork+1, ierr )
3155  ie = itau
3156  itauq = ie + m
3157  itaup = itauq + m
3158  iwork = itaup + m
3159 *
3160 * Zero out above L in A
3161 *
3162  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3163  $ lda )
3164 *
3165 * Bidiagonalize L in A
3166 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3167 *
3168  CALL sgebrd( m, m, a, lda, s, work( ie ),
3169  $ work( itauq ), work( itaup ),
3170  $ work( iwork ), lwork-iwork+1, ierr )
3171 *
3172 * Multiply right bidiagonalizing vectors in A by Q
3173 * in VT
3174 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3175 *
3176  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3177  $ work( itaup ), vt, ldvt,
3178  $ work( iwork ), lwork-iwork+1, ierr )
3179 *
3180 * Generate left bidiagonalizing vectors in A
3181 * (Workspace: need 4*M, prefer 3*M+M*NB)
3182 *
3183  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3184  $ work( iwork ), lwork-iwork+1, ierr )
3185  iwork = ie + m
3186 *
3187 * Perform bidiagonal QR iteration, computing left
3188 * singular vectors of A in A and computing right
3189 * singular vectors of A in VT
3190 * (Workspace: need BDSPAC)
3191 *
3192  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3193  $ ldvt, a, lda, dum, 1, work( iwork ),
3194  $ info )
3195 *
3196  END IF
3197 *
3198  ELSE IF( wntuas ) THEN
3199 *
3200 * Path 9t(N much larger than M, JOBU='S' or 'A',
3201 * JOBVT='A')
3202 * N right singular vectors to be computed in VT and
3203 * M left singular vectors to be computed in U
3204 *
3205  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3206 *
3207 * Sufficient workspace for a fast algorithm
3208 *
3209  iu = 1
3210  IF( lwork.GE.wrkbl+lda*m ) THEN
3211 *
3212 * WORK(IU) is LDA by M
3213 *
3214  ldwrku = lda
3215  ELSE
3216 *
3217 * WORK(IU) is M by M
3218 *
3219  ldwrku = m
3220  END IF
3221  itau = iu + ldwrku*m
3222  iwork = itau + m
3223 *
3224 * Compute A=L*Q, copying result to VT
3225 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3226 *
3227  CALL sgelqf( m, n, a, lda, work( itau ),
3228  $ work( iwork ), lwork-iwork+1, ierr )
3229  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3230 *
3231 * Generate Q in VT
3232 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3233 *
3234  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3235  $ work( iwork ), lwork-iwork+1, ierr )
3236 *
3237 * Copy L to WORK(IU), zeroing out above it
3238 *
3239  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3240  $ ldwrku )
3241  CALL slaset( 'U', m-1, m-1, zero, zero,
3242  $ work( iu+ldwrku ), ldwrku )
3243  ie = itau
3244  itauq = ie + m
3245  itaup = itauq + m
3246  iwork = itaup + m
3247 *
3248 * Bidiagonalize L in WORK(IU), copying result to U
3249 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3250 *
3251  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3252  $ work( ie ), work( itauq ),
3253  $ work( itaup ), work( iwork ),
3254  $ lwork-iwork+1, ierr )
3255  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3256  $ ldu )
3257 *
3258 * Generate right bidiagonalizing vectors in WORK(IU)
3259 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3260 *
3261  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3262  $ work( itaup ), work( iwork ),
3263  $ lwork-iwork+1, ierr )
3264 *
3265 * Generate left bidiagonalizing vectors in U
3266 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3267 *
3268  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3269  $ work( iwork ), lwork-iwork+1, ierr )
3270  iwork = ie + m
3271 *
3272 * Perform bidiagonal QR iteration, computing left
3273 * singular vectors of L in U and computing right
3274 * singular vectors of L in WORK(IU)
3275 * (Workspace: need M*M+BDSPAC)
3276 *
3277  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3278  $ work( iu ), ldwrku, u, ldu, dum, 1,
3279  $ work( iwork ), info )
3280 *
3281 * Multiply right singular vectors of L in WORK(IU) by
3282 * Q in VT, storing result in A
3283 * (Workspace: need M*M)
3284 *
3285  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3286  $ ldwrku, vt, ldvt, zero, a, lda )
3287 *
3288 * Copy right singular vectors of A from A to VT
3289 *
3290  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3291 *
3292  ELSE
3293 *
3294 * Insufficient workspace for a fast algorithm
3295 *
3296  itau = 1
3297  iwork = itau + m
3298 *
3299 * Compute A=L*Q, copying result to VT
3300 * (Workspace: need 2*M, prefer M+M*NB)
3301 *
3302  CALL sgelqf( m, n, a, lda, work( itau ),
3303  $ work( iwork ), lwork-iwork+1, ierr )
3304  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3305 *
3306 * Generate Q in VT
3307 * (Workspace: need M+N, prefer M+N*NB)
3308 *
3309  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3310  $ work( iwork ), lwork-iwork+1, ierr )
3311 *
3312 * Copy L to U, zeroing out above it
3313 *
3314  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3315  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3316  $ ldu )
3317  ie = itau
3318  itauq = ie + m
3319  itaup = itauq + m
3320  iwork = itaup + m
3321 *
3322 * Bidiagonalize L in U
3323 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3324 *
3325  CALL sgebrd( m, m, u, ldu, s, work( ie ),
3326  $ work( itauq ), work( itaup ),
3327  $ work( iwork ), lwork-iwork+1, ierr )
3328 *
3329 * Multiply right bidiagonalizing vectors in U by Q
3330 * in VT
3331 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3332 *
3333  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3334  $ work( itaup ), vt, ldvt,
3335  $ work( iwork ), lwork-iwork+1, ierr )
3336 *
3337 * Generate left bidiagonalizing vectors in U
3338 * (Workspace: need 4*M, prefer 3*M+M*NB)
3339 *
3340  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3341  $ work( iwork ), lwork-iwork+1, ierr )
3342  iwork = ie + m
3343 *
3344 * Perform bidiagonal QR iteration, computing left
3345 * singular vectors of A in U and computing right
3346 * singular vectors of A in VT
3347 * (Workspace: need BDSPAC)
3348 *
3349  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3350  $ ldvt, u, ldu, dum, 1, work( iwork ),
3351  $ info )
3352 *
3353  END IF
3354 *
3355  END IF
3356 *
3357  END IF
3358 *
3359  ELSE
3360 *
3361 * N .LT. MNTHR
3362 *
3363 * Path 10t(N greater than M, but not much larger)
3364 * Reduce to bidiagonal form without LQ decomposition
3365 *
3366  ie = 1
3367  itauq = ie + m
3368  itaup = itauq + m
3369  iwork = itaup + m
3370 *
3371 * Bidiagonalize A
3372 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3373 *
3374  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3375  $ work( itaup ), work( iwork ), lwork-iwork+1,
3376  $ ierr )
3377  IF( wntuas ) THEN
3378 *
3379 * If left singular vectors desired in U, copy result to U
3380 * and generate left bidiagonalizing vectors in U
3381 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3382 *
3383  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3384  CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3385  $ work( iwork ), lwork-iwork+1, ierr )
3386  END IF
3387  IF( wntvas ) THEN
3388 *
3389 * If right singular vectors desired in VT, copy result to
3390 * VT and generate right bidiagonalizing vectors in VT
3391 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3392 *
3393  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3394  IF( wntva )
3395  $ nrvt = n
3396  IF( wntvs )
3397  $ nrvt = m
3398  CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3399  $ work( iwork ), lwork-iwork+1, ierr )
3400  END IF
3401  IF( wntuo ) THEN
3402 *
3403 * If left singular vectors desired in A, generate left
3404 * bidiagonalizing vectors in A
3405 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3406 *
3407  CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3408  $ work( iwork ), lwork-iwork+1, ierr )
3409  END IF
3410  IF( wntvo ) THEN
3411 *
3412 * If right singular vectors desired in A, generate right
3413 * bidiagonalizing vectors in A
3414 * (Workspace: need 4*M, prefer 3*M+M*NB)
3415 *
3416  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3417  $ work( iwork ), lwork-iwork+1, ierr )
3418  END IF
3419  iwork = ie + m
3420  IF( wntuas .OR. wntuo )
3421  $ nru = m
3422  IF( wntun )
3423  $ nru = 0
3424  IF( wntvas .OR. wntvo )
3425  $ ncvt = n
3426  IF( wntvn )
3427  $ ncvt = 0
3428  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3429 *
3430 * Perform bidiagonal QR iteration, if desired, computing
3431 * left singular vectors in U and computing right singular
3432 * vectors in VT
3433 * (Workspace: need BDSPAC)
3434 *
3435  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3436  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3437  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3438 *
3439 * Perform bidiagonal QR iteration, if desired, computing
3440 * left singular vectors in U and computing right singular
3441 * vectors in A
3442 * (Workspace: need BDSPAC)
3443 *
3444  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3445  $ u, ldu, dum, 1, work( iwork ), info )
3446  ELSE
3447 *
3448 * Perform bidiagonal QR iteration, if desired, computing
3449 * left singular vectors in A and computing right singular
3450 * vectors in VT
3451 * (Workspace: need BDSPAC)
3452 *
3453  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3454  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3455  END IF
3456 *
3457  END IF
3458 *
3459  END IF
3460 *
3461 * If SBDSQR failed to converge, copy unconverged superdiagonals
3462 * to WORK( 2:MINMN )
3463 *
3464  IF( info.NE.0 ) THEN
3465  IF( ie.GT.2 ) THEN
3466  DO 50 i = 1, minmn - 1
3467  work( i+1 ) = work( i+ie-1 )
3468  50 CONTINUE
3469  END IF
3470  IF( ie.LT.2 ) THEN
3471  DO 60 i = minmn - 1, 1, -1
3472  work( i+1 ) = work( i+ie-1 )
3473  60 CONTINUE
3474  END IF
3475  END IF
3476 *
3477 * Undo scaling if necessary
3478 *
3479  IF( iscl.EQ.1 ) THEN
3480  IF( anrm.GT.bignum )
3481  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3482  $ ierr )
3483  IF( info.NE.0 .AND. anrm.GT.bignum )
3484  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3485  $ minmn, ierr )
3486  IF( anrm.LT.smlnum )
3487  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3488  $ ierr )
3489  IF( info.NE.0 .AND. anrm.LT.smlnum )
3490  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3491  $ minmn, ierr )
3492  END IF
3493 *
3494 * Return optimal workspace in WORK(1)
3495 *
3496  work( 1 ) = maxwrk
3497 *
3498  RETURN
3499 *
3500 * End of SGESVD
3501 *
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:143
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine sbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
SBDSQR
Definition: sbdsqr.f:240
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:157
real function slange(NORM, M, N, A, LDA, WORK)
SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition: slange.f:114
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:145
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:205
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:143
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:196
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:127
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:128
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:68
Here is the call graph for this function:
Here is the caller graph for this function: