LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
cgesvd.f
Go to the documentation of this file.
1 *> \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL RWORK( * ), S( * )
30 * COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> CGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is REAL array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is REAL array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if CBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \ingroup complexGEsing
210 *
211 * =====================================================================
212  SUBROUTINE cgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
213  $ WORK, LWORK, RWORK, INFO )
214 *
215 * -- LAPACK driver routine --
216 * -- LAPACK is a software package provided by Univ. of Tennessee, --
217 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
218 *
219 * .. Scalar Arguments ..
220  CHARACTER JOBU, JOBVT
221  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222 * ..
223 * .. Array Arguments ..
224  REAL RWORK( * ), S( * )
225  COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
226  $ work( * )
227 * ..
228 *
229 * =====================================================================
230 *
231 * .. Parameters ..
232  COMPLEX CZERO, CONE
233  parameter( czero = ( 0.0e0, 0.0e0 ),
234  $ cone = ( 1.0e0, 0.0e0 ) )
235  REAL ZERO, ONE
236  parameter( zero = 0.0e0, one = 1.0e0 )
237 * ..
238 * .. Local Scalars ..
239  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
240  $ wntva, wntvas, wntvn, wntvo, wntvs
241  INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
242  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
243  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
244  $ nrvt, wrkbl
245  INTEGER LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M,
246  $ lwork_cgebrd, lwork_cungbr_p, lwork_cungbr_q,
247  $ lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m
248  REAL ANRM, BIGNUM, EPS, SMLNUM
249 * ..
250 * .. Local Arrays ..
251  REAL DUM( 1 )
252  COMPLEX CDUM( 1 )
253 * ..
254 * .. External Subroutines ..
255  EXTERNAL cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf, clacpy,
257  $ slascl, xerbla
258 * ..
259 * .. External Functions ..
260  LOGICAL LSAME
261  INTEGER ILAENV
262  REAL CLANGE, SLAMCH
263  EXTERNAL lsame, ilaenv, clange, slamch
264 * ..
265 * .. Intrinsic Functions ..
266  INTRINSIC max, min, sqrt
267 * ..
268 * .. Executable Statements ..
269 *
270 * Test the input arguments
271 *
272  info = 0
273  minmn = min( m, n )
274  wntua = lsame( jobu, 'A' )
275  wntus = lsame( jobu, 'S' )
276  wntuas = wntua .OR. wntus
277  wntuo = lsame( jobu, 'O' )
278  wntun = lsame( jobu, 'N' )
279  wntva = lsame( jobvt, 'A' )
280  wntvs = lsame( jobvt, 'S' )
281  wntvas = wntva .OR. wntvs
282  wntvo = lsame( jobvt, 'O' )
283  wntvn = lsame( jobvt, 'N' )
284  lquery = ( lwork.EQ.-1 )
285 *
286  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
287  info = -1
288  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
289  $ ( wntvo .AND. wntuo ) ) THEN
290  info = -2
291  ELSE IF( m.LT.0 ) THEN
292  info = -3
293  ELSE IF( n.LT.0 ) THEN
294  info = -4
295  ELSE IF( lda.LT.max( 1, m ) ) THEN
296  info = -6
297  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
298  info = -9
299  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
300  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
301  info = -11
302  END IF
303 *
304 * Compute workspace
305 * (Note: Comments in the code beginning "Workspace:" describe the
306 * minimal amount of workspace needed at that point in the code,
307 * as well as the preferred amount for good performance.
308 * CWorkspace refers to complex workspace, and RWorkspace to
309 * real workspace. NB refers to the optimal block size for the
310 * immediately following subroutine, as returned by ILAENV.)
311 *
312  IF( info.EQ.0 ) THEN
313  minwrk = 1
314  maxwrk = 1
315  IF( m.GE.n .AND. minmn.GT.0 ) THEN
316 *
317 * Space needed for ZBDSQR is BDSPAC = 5*N
318 *
319  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
320 * Compute space needed for CGEQRF
321  CALL cgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
322  lwork_cgeqrf = int( cdum(1) )
323 * Compute space needed for CUNGQR
324  CALL cungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
325  lwork_cungqr_n = int( cdum(1) )
326  CALL cungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
327  lwork_cungqr_m = int( cdum(1) )
328 * Compute space needed for CGEBRD
329  CALL cgebrd( n, n, a, lda, s, dum(1), cdum(1),
330  $ cdum(1), cdum(1), -1, ierr )
331  lwork_cgebrd = int( cdum(1) )
332 * Compute space needed for CUNGBR
333  CALL cungbr( 'P', n, n, n, a, lda, cdum(1),
334  $ cdum(1), -1, ierr )
335  lwork_cungbr_p = int( cdum(1) )
336  CALL cungbr( 'Q', n, n, n, a, lda, cdum(1),
337  $ cdum(1), -1, ierr )
338  lwork_cungbr_q = int( cdum(1) )
339 *
340  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
341  IF( m.GE.mnthr ) THEN
342  IF( wntun ) THEN
343 *
344 * Path 1 (M much larger than N, JOBU='N')
345 *
346  maxwrk = n + lwork_cgeqrf
347  maxwrk = max( maxwrk, 2*n+lwork_cgebrd )
348  IF( wntvo .OR. wntvas )
349  $ maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
350  minwrk = 3*n
351  ELSE IF( wntuo .AND. wntvn ) THEN
352 *
353 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
354 *
355  wrkbl = n + lwork_cgeqrf
356  wrkbl = max( wrkbl, n+lwork_cungqr_n )
357  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
358  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
359  maxwrk = max( n*n+wrkbl, n*n+m*n )
360  minwrk = 2*n + m
361  ELSE IF( wntuo .AND. wntvas ) THEN
362 *
363 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
364 * 'A')
365 *
366  wrkbl = n + lwork_cgeqrf
367  wrkbl = max( wrkbl, n+lwork_cungqr_n )
368  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
369  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
370  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
371  maxwrk = max( n*n+wrkbl, n*n+m*n )
372  minwrk = 2*n + m
373  ELSE IF( wntus .AND. wntvn ) THEN
374 *
375 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
376 *
377  wrkbl = n + lwork_cgeqrf
378  wrkbl = max( wrkbl, n+lwork_cungqr_n )
379  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
380  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
381  maxwrk = n*n + wrkbl
382  minwrk = 2*n + m
383  ELSE IF( wntus .AND. wntvo ) THEN
384 *
385 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
386 *
387  wrkbl = n + lwork_cgeqrf
388  wrkbl = max( wrkbl, n+lwork_cungqr_n )
389  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
390  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
391  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
392  maxwrk = 2*n*n + wrkbl
393  minwrk = 2*n + m
394  ELSE IF( wntus .AND. wntvas ) THEN
395 *
396 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397 * 'A')
398 *
399  wrkbl = n + lwork_cgeqrf
400  wrkbl = max( wrkbl, n+lwork_cungqr_n )
401  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
402  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
403  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
404  maxwrk = n*n + wrkbl
405  minwrk = 2*n + m
406  ELSE IF( wntua .AND. wntvn ) THEN
407 *
408 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
409 *
410  wrkbl = n + lwork_cgeqrf
411  wrkbl = max( wrkbl, n+lwork_cungqr_m )
412  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
413  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
414  maxwrk = n*n + wrkbl
415  minwrk = 2*n + m
416  ELSE IF( wntua .AND. wntvo ) THEN
417 *
418 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
419 *
420  wrkbl = n + lwork_cgeqrf
421  wrkbl = max( wrkbl, n+lwork_cungqr_m )
422  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
423  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
424  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
425  maxwrk = 2*n*n + wrkbl
426  minwrk = 2*n + m
427  ELSE IF( wntua .AND. wntvas ) THEN
428 *
429 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430 * 'A')
431 *
432  wrkbl = n + lwork_cgeqrf
433  wrkbl = max( wrkbl, n+lwork_cungqr_m )
434  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
435  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
436  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
437  maxwrk = n*n + wrkbl
438  minwrk = 2*n + m
439  END IF
440  ELSE
441 *
442 * Path 10 (M at least N, but not much larger)
443 *
444  CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
445  $ cdum(1), cdum(1), -1, ierr )
446  lwork_cgebrd = int( cdum(1) )
447  maxwrk = 2*n + lwork_cgebrd
448  IF( wntus .OR. wntuo ) THEN
449  CALL cungbr( 'Q', m, n, n, a, lda, cdum(1),
450  $ cdum(1), -1, ierr )
451  lwork_cungbr_q = int( cdum(1) )
452  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
453  END IF
454  IF( wntua ) THEN
455  CALL cungbr( 'Q', m, m, n, a, lda, cdum(1),
456  $ cdum(1), -1, ierr )
457  lwork_cungbr_q = int( cdum(1) )
458  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
459  END IF
460  IF( .NOT.wntvn ) THEN
461  maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
462  END IF
463  minwrk = 2*n + m
464  END IF
465  ELSE IF( minmn.GT.0 ) THEN
466 *
467 * Space needed for CBDSQR is BDSPAC = 5*M
468 *
469  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
470 * Compute space needed for CGELQF
471  CALL cgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
472  lwork_cgelqf = int( cdum(1) )
473 * Compute space needed for CUNGLQ
474  CALL cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
475  $ ierr )
476  lwork_cunglq_n = int( cdum(1) )
477  CALL cunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
478  lwork_cunglq_m = int( cdum(1) )
479 * Compute space needed for CGEBRD
480  CALL cgebrd( m, m, a, lda, s, dum(1), cdum(1),
481  $ cdum(1), cdum(1), -1, ierr )
482  lwork_cgebrd = int( cdum(1) )
483 * Compute space needed for CUNGBR P
484  CALL cungbr( 'P', m, m, m, a, n, cdum(1),
485  $ cdum(1), -1, ierr )
486  lwork_cungbr_p = int( cdum(1) )
487 * Compute space needed for CUNGBR Q
488  CALL cungbr( 'Q', m, m, m, a, n, cdum(1),
489  $ cdum(1), -1, ierr )
490  lwork_cungbr_q = int( cdum(1) )
491  IF( n.GE.mnthr ) THEN
492  IF( wntvn ) THEN
493 *
494 * Path 1t(N much larger than M, JOBVT='N')
495 *
496  maxwrk = m + lwork_cgelqf
497  maxwrk = max( maxwrk, 2*m+lwork_cgebrd )
498  IF( wntuo .OR. wntuas )
499  $ maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
500  minwrk = 3*m
501  ELSE IF( wntvo .AND. wntun ) THEN
502 *
503 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
504 *
505  wrkbl = m + lwork_cgelqf
506  wrkbl = max( wrkbl, m+lwork_cunglq_m )
507  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
508  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
509  maxwrk = max( m*m+wrkbl, m*m+m*n )
510  minwrk = 2*m + n
511  ELSE IF( wntvo .AND. wntuas ) THEN
512 *
513 * Path 3t(N much larger than M, JOBU='S' or 'A',
514 * JOBVT='O')
515 *
516  wrkbl = m + lwork_cgelqf
517  wrkbl = max( wrkbl, m+lwork_cunglq_m )
518  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
519  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
520  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
521  maxwrk = max( m*m+wrkbl, m*m+m*n )
522  minwrk = 2*m + n
523  ELSE IF( wntvs .AND. wntun ) THEN
524 *
525 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
526 *
527  wrkbl = m + lwork_cgelqf
528  wrkbl = max( wrkbl, m+lwork_cunglq_m )
529  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
530  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
531  maxwrk = m*m + wrkbl
532  minwrk = 2*m + n
533  ELSE IF( wntvs .AND. wntuo ) THEN
534 *
535 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
536 *
537  wrkbl = m + lwork_cgelqf
538  wrkbl = max( wrkbl, m+lwork_cunglq_m )
539  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
540  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
541  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
542  maxwrk = 2*m*m + wrkbl
543  minwrk = 2*m + n
544  ELSE IF( wntvs .AND. wntuas ) THEN
545 *
546 * Path 6t(N much larger than M, JOBU='S' or 'A',
547 * JOBVT='S')
548 *
549  wrkbl = m + lwork_cgelqf
550  wrkbl = max( wrkbl, m+lwork_cunglq_m )
551  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
552  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
553  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
554  maxwrk = m*m + wrkbl
555  minwrk = 2*m + n
556  ELSE IF( wntva .AND. wntun ) THEN
557 *
558 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
559 *
560  wrkbl = m + lwork_cgelqf
561  wrkbl = max( wrkbl, m+lwork_cunglq_n )
562  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
563  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
564  maxwrk = m*m + wrkbl
565  minwrk = 2*m + n
566  ELSE IF( wntva .AND. wntuo ) THEN
567 *
568 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
569 *
570  wrkbl = m + lwork_cgelqf
571  wrkbl = max( wrkbl, m+lwork_cunglq_n )
572  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
573  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
574  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
575  maxwrk = 2*m*m + wrkbl
576  minwrk = 2*m + n
577  ELSE IF( wntva .AND. wntuas ) THEN
578 *
579 * Path 9t(N much larger than M, JOBU='S' or 'A',
580 * JOBVT='A')
581 *
582  wrkbl = m + lwork_cgelqf
583  wrkbl = max( wrkbl, m+lwork_cunglq_n )
584  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
585  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
586  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
587  maxwrk = m*m + wrkbl
588  minwrk = 2*m + n
589  END IF
590  ELSE
591 *
592 * Path 10t(N greater than M, but not much larger)
593 *
594  CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
595  $ cdum(1), cdum(1), -1, ierr )
596  lwork_cgebrd = int( cdum(1) )
597  maxwrk = 2*m + lwork_cgebrd
598  IF( wntvs .OR. wntvo ) THEN
599 * Compute space needed for CUNGBR P
600  CALL cungbr( 'P', m, n, m, a, n, cdum(1),
601  $ cdum(1), -1, ierr )
602  lwork_cungbr_p = int( cdum(1) )
603  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
604  END IF
605  IF( wntva ) THEN
606  CALL cungbr( 'P', n, n, m, a, n, cdum(1),
607  $ cdum(1), -1, ierr )
608  lwork_cungbr_p = int( cdum(1) )
609  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
610  END IF
611  IF( .NOT.wntun ) THEN
612  maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
613  END IF
614  minwrk = 2*m + n
615  END IF
616  END IF
617  maxwrk = max( minwrk, maxwrk )
618  work( 1 ) = maxwrk
619 *
620  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
621  info = -13
622  END IF
623  END IF
624 *
625  IF( info.NE.0 ) THEN
626  CALL xerbla( 'CGESVD', -info )
627  RETURN
628  ELSE IF( lquery ) THEN
629  RETURN
630  END IF
631 *
632 * Quick return if possible
633 *
634  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
635  RETURN
636  END IF
637 *
638 * Get machine constants
639 *
640  eps = slamch( 'P' )
641  smlnum = sqrt( slamch( 'S' ) ) / eps
642  bignum = one / smlnum
643 *
644 * Scale A if max element outside range [SMLNUM,BIGNUM]
645 *
646  anrm = clange( 'M', m, n, a, lda, dum )
647  iscl = 0
648  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
649  iscl = 1
650  CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
651  ELSE IF( anrm.GT.bignum ) THEN
652  iscl = 1
653  CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
654  END IF
655 *
656  IF( m.GE.n ) THEN
657 *
658 * A has at least as many rows as columns. If A has sufficiently
659 * more rows than columns, first reduce using the QR
660 * decomposition (if sufficient workspace available)
661 *
662  IF( m.GE.mnthr ) THEN
663 *
664  IF( wntun ) THEN
665 *
666 * Path 1 (M much larger than N, JOBU='N')
667 * No left singular vectors to be computed
668 *
669  itau = 1
670  iwork = itau + n
671 *
672 * Compute A=Q*R
673 * (CWorkspace: need 2*N, prefer N+N*NB)
674 * (RWorkspace: need 0)
675 *
676  CALL cgeqrf( m, n, a, lda, work( itau ), work( iwork ),
677  $ lwork-iwork+1, ierr )
678 *
679 * Zero out below R
680 *
681  IF( n .GT. 1 ) THEN
682  CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
683  $ lda )
684  END IF
685  ie = 1
686  itauq = 1
687  itaup = itauq + n
688  iwork = itaup + n
689 *
690 * Bidiagonalize R in A
691 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
692 * (RWorkspace: need N)
693 *
694  CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
695  $ work( itaup ), work( iwork ), lwork-iwork+1,
696  $ ierr )
697  ncvt = 0
698  IF( wntvo .OR. wntvas ) THEN
699 *
700 * If right singular vectors desired, generate P'.
701 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
702 * (RWorkspace: 0)
703 *
704  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
705  $ work( iwork ), lwork-iwork+1, ierr )
706  ncvt = n
707  END IF
708  irwork = ie + n
709 *
710 * Perform bidiagonal QR iteration, computing right
711 * singular vectors of A in A if desired
712 * (CWorkspace: 0)
713 * (RWorkspace: need BDSPAC)
714 *
715  CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
716  $ cdum, 1, cdum, 1, rwork( irwork ), info )
717 *
718 * If right singular vectors desired in VT, copy them there
719 *
720  IF( wntvas )
721  $ CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
722 *
723  ELSE IF( wntuo .AND. wntvn ) THEN
724 *
725 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
726 * N left singular vectors to be overwritten on A and
727 * no right singular vectors to be computed
728 *
729  IF( lwork.GE.n*n+3*n ) THEN
730 *
731 * Sufficient workspace for a fast algorithm
732 *
733  ir = 1
734  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
735 *
736 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
737 *
738  ldwrku = lda
739  ldwrkr = lda
740  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
741 *
742 * WORK(IU) is LDA by N, WORK(IR) is N by N
743 *
744  ldwrku = lda
745  ldwrkr = n
746  ELSE
747 *
748 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
749 *
750  ldwrku = ( lwork-n*n ) / n
751  ldwrkr = n
752  END IF
753  itau = ir + ldwrkr*n
754  iwork = itau + n
755 *
756 * Compute A=Q*R
757 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
758 * (RWorkspace: 0)
759 *
760  CALL cgeqrf( m, n, a, lda, work( itau ),
761  $ work( iwork ), lwork-iwork+1, ierr )
762 *
763 * Copy R to WORK(IR) and zero out below it
764 *
765  CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
766  CALL claset( 'L', n-1, n-1, czero, czero,
767  $ work( ir+1 ), ldwrkr )
768 *
769 * Generate Q in A
770 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
771 * (RWorkspace: 0)
772 *
773  CALL cungqr( m, n, n, a, lda, work( itau ),
774  $ work( iwork ), lwork-iwork+1, ierr )
775  ie = 1
776  itauq = itau
777  itaup = itauq + n
778  iwork = itaup + n
779 *
780 * Bidiagonalize R in WORK(IR)
781 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
782 * (RWorkspace: need N)
783 *
784  CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
785  $ work( itauq ), work( itaup ),
786  $ work( iwork ), lwork-iwork+1, ierr )
787 *
788 * Generate left vectors bidiagonalizing R
789 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
790 * (RWorkspace: need 0)
791 *
792  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
793  $ work( itauq ), work( iwork ),
794  $ lwork-iwork+1, ierr )
795  irwork = ie + n
796 *
797 * Perform bidiagonal QR iteration, computing left
798 * singular vectors of R in WORK(IR)
799 * (CWorkspace: need N*N)
800 * (RWorkspace: need BDSPAC)
801 *
802  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
803  $ work( ir ), ldwrkr, cdum, 1,
804  $ rwork( irwork ), info )
805  iu = itauq
806 *
807 * Multiply Q in A by left singular vectors of R in
808 * WORK(IR), storing result in WORK(IU) and copying to A
809 * (CWorkspace: need N*N+N, prefer N*N+M*N)
810 * (RWorkspace: 0)
811 *
812  DO 10 i = 1, m, ldwrku
813  chunk = min( m-i+1, ldwrku )
814  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
815  $ lda, work( ir ), ldwrkr, czero,
816  $ work( iu ), ldwrku )
817  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
818  $ a( i, 1 ), lda )
819  10 CONTINUE
820 *
821  ELSE
822 *
823 * Insufficient workspace for a fast algorithm
824 *
825  ie = 1
826  itauq = 1
827  itaup = itauq + n
828  iwork = itaup + n
829 *
830 * Bidiagonalize A
831 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
832 * (RWorkspace: N)
833 *
834  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
835  $ work( itauq ), work( itaup ),
836  $ work( iwork ), lwork-iwork+1, ierr )
837 *
838 * Generate left vectors bidiagonalizing A
839 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
840 * (RWorkspace: 0)
841 *
842  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
843  $ work( iwork ), lwork-iwork+1, ierr )
844  irwork = ie + n
845 *
846 * Perform bidiagonal QR iteration, computing left
847 * singular vectors of A in A
848 * (CWorkspace: need 0)
849 * (RWorkspace: need BDSPAC)
850 *
851  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
852  $ a, lda, cdum, 1, rwork( irwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+3*n ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
891 * (RWorkspace: 0)
892 *
893  CALL cgeqrf( m, n, a, lda, work( itau ),
894  $ work( iwork ), lwork-iwork+1, ierr )
895 *
896 * Copy R to VT, zeroing out below it
897 *
898  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
899  IF( n.GT.1 )
900  $ CALL claset( 'L', n-1, n-1, czero, czero,
901  $ vt( 2, 1 ), ldvt )
902 *
903 * Generate Q in A
904 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
905 * (RWorkspace: 0)
906 *
907  CALL cungqr( m, n, n, a, lda, work( itau ),
908  $ work( iwork ), lwork-iwork+1, ierr )
909  ie = 1
910  itauq = itau
911  itaup = itauq + n
912  iwork = itaup + n
913 *
914 * Bidiagonalize R in VT, copying result to WORK(IR)
915 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
916 * (RWorkspace: need N)
917 *
918  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
919  $ work( itauq ), work( itaup ),
920  $ work( iwork ), lwork-iwork+1, ierr )
921  CALL clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
922 *
923 * Generate left vectors bidiagonalizing R in WORK(IR)
924 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
925 * (RWorkspace: 0)
926 *
927  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
928  $ work( itauq ), work( iwork ),
929  $ lwork-iwork+1, ierr )
930 *
931 * Generate right vectors bidiagonalizing R in VT
932 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
933 * (RWorkspace: 0)
934 *
935  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
936  $ work( iwork ), lwork-iwork+1, ierr )
937  irwork = ie + n
938 *
939 * Perform bidiagonal QR iteration, computing left
940 * singular vectors of R in WORK(IR) and computing right
941 * singular vectors of R in VT
942 * (CWorkspace: need N*N)
943 * (RWorkspace: need BDSPAC)
944 *
945  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
946  $ ldvt, work( ir ), ldwrkr, cdum, 1,
947  $ rwork( irwork ), info )
948  iu = itauq
949 *
950 * Multiply Q in A by left singular vectors of R in
951 * WORK(IR), storing result in WORK(IU) and copying to A
952 * (CWorkspace: need N*N+N, prefer N*N+M*N)
953 * (RWorkspace: 0)
954 *
955  DO 20 i = 1, m, ldwrku
956  chunk = min( m-i+1, ldwrku )
957  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
958  $ lda, work( ir ), ldwrkr, czero,
959  $ work( iu ), ldwrku )
960  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
961  $ a( i, 1 ), lda )
962  20 CONTINUE
963 *
964  ELSE
965 *
966 * Insufficient workspace for a fast algorithm
967 *
968  itau = 1
969  iwork = itau + n
970 *
971 * Compute A=Q*R
972 * (CWorkspace: need 2*N, prefer N+N*NB)
973 * (RWorkspace: 0)
974 *
975  CALL cgeqrf( m, n, a, lda, work( itau ),
976  $ work( iwork ), lwork-iwork+1, ierr )
977 *
978 * Copy R to VT, zeroing out below it
979 *
980  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
981  IF( n.GT.1 )
982  $ CALL claset( 'L', n-1, n-1, czero, czero,
983  $ vt( 2, 1 ), ldvt )
984 *
985 * Generate Q in A
986 * (CWorkspace: need 2*N, prefer N+N*NB)
987 * (RWorkspace: 0)
988 *
989  CALL cungqr( m, n, n, a, lda, work( itau ),
990  $ work( iwork ), lwork-iwork+1, ierr )
991  ie = 1
992  itauq = itau
993  itaup = itauq + n
994  iwork = itaup + n
995 *
996 * Bidiagonalize R in VT
997 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
998 * (RWorkspace: N)
999 *
1000  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1001  $ work( itauq ), work( itaup ),
1002  $ work( iwork ), lwork-iwork+1, ierr )
1003 *
1004 * Multiply Q in A by left vectors bidiagonalizing R
1005 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1006 * (RWorkspace: 0)
1007 *
1008  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1009  $ work( itauq ), a, lda, work( iwork ),
1010  $ lwork-iwork+1, ierr )
1011 *
1012 * Generate right vectors bidiagonalizing R in VT
1013 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1014 * (RWorkspace: 0)
1015 *
1016  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1017  $ work( iwork ), lwork-iwork+1, ierr )
1018  irwork = ie + n
1019 *
1020 * Perform bidiagonal QR iteration, computing left
1021 * singular vectors of A in A and computing right
1022 * singular vectors of A in VT
1023 * (CWorkspace: 0)
1024 * (RWorkspace: need BDSPAC)
1025 *
1026  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1027  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1028  $ info )
1029 *
1030  END IF
1031 *
1032  ELSE IF( wntus ) THEN
1033 *
1034  IF( wntvn ) THEN
1035 *
1036 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1037 * N left singular vectors to be computed in U and
1038 * no right singular vectors to be computed
1039 *
1040  IF( lwork.GE.n*n+3*n ) THEN
1041 *
1042 * Sufficient workspace for a fast algorithm
1043 *
1044  ir = 1
1045  IF( lwork.GE.wrkbl+lda*n ) THEN
1046 *
1047 * WORK(IR) is LDA by N
1048 *
1049  ldwrkr = lda
1050  ELSE
1051 *
1052 * WORK(IR) is N by N
1053 *
1054  ldwrkr = n
1055  END IF
1056  itau = ir + ldwrkr*n
1057  iwork = itau + n
1058 *
1059 * Compute A=Q*R
1060 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1061 * (RWorkspace: 0)
1062 *
1063  CALL cgeqrf( m, n, a, lda, work( itau ),
1064  $ work( iwork ), lwork-iwork+1, ierr )
1065 *
1066 * Copy R to WORK(IR), zeroing out below it
1067 *
1068  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1069  $ ldwrkr )
1070  CALL claset( 'L', n-1, n-1, czero, czero,
1071  $ work( ir+1 ), ldwrkr )
1072 *
1073 * Generate Q in A
1074 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1075 * (RWorkspace: 0)
1076 *
1077  CALL cungqr( m, n, n, a, lda, work( itau ),
1078  $ work( iwork ), lwork-iwork+1, ierr )
1079  ie = 1
1080  itauq = itau
1081  itaup = itauq + n
1082  iwork = itaup + n
1083 *
1084 * Bidiagonalize R in WORK(IR)
1085 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1086 * (RWorkspace: need N)
1087 *
1088  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1089  $ rwork( ie ), work( itauq ),
1090  $ work( itaup ), work( iwork ),
1091  $ lwork-iwork+1, ierr )
1092 *
1093 * Generate left vectors bidiagonalizing R in WORK(IR)
1094 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1095 * (RWorkspace: 0)
1096 *
1097  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1098  $ work( itauq ), work( iwork ),
1099  $ lwork-iwork+1, ierr )
1100  irwork = ie + n
1101 *
1102 * Perform bidiagonal QR iteration, computing left
1103 * singular vectors of R in WORK(IR)
1104 * (CWorkspace: need N*N)
1105 * (RWorkspace: need BDSPAC)
1106 *
1107  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1108  $ 1, work( ir ), ldwrkr, cdum, 1,
1109  $ rwork( irwork ), info )
1110 *
1111 * Multiply Q in A by left singular vectors of R in
1112 * WORK(IR), storing result in U
1113 * (CWorkspace: need N*N)
1114 * (RWorkspace: 0)
1115 *
1116  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1117  $ work( ir ), ldwrkr, czero, u, ldu )
1118 *
1119  ELSE
1120 *
1121 * Insufficient workspace for a fast algorithm
1122 *
1123  itau = 1
1124  iwork = itau + n
1125 *
1126 * Compute A=Q*R, copying result to U
1127 * (CWorkspace: need 2*N, prefer N+N*NB)
1128 * (RWorkspace: 0)
1129 *
1130  CALL cgeqrf( m, n, a, lda, work( itau ),
1131  $ work( iwork ), lwork-iwork+1, ierr )
1132  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1133 *
1134 * Generate Q in U
1135 * (CWorkspace: need 2*N, prefer N+N*NB)
1136 * (RWorkspace: 0)
1137 *
1138  CALL cungqr( m, n, n, u, ldu, work( itau ),
1139  $ work( iwork ), lwork-iwork+1, ierr )
1140  ie = 1
1141  itauq = itau
1142  itaup = itauq + n
1143  iwork = itaup + n
1144 *
1145 * Zero out below R in A
1146 *
1147  IF( n .GT. 1 ) THEN
1148  CALL claset( 'L', n-1, n-1, czero, czero,
1149  $ a( 2, 1 ), lda )
1150  END IF
1151 *
1152 * Bidiagonalize R in A
1153 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1154 * (RWorkspace: need N)
1155 *
1156  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1157  $ work( itauq ), work( itaup ),
1158  $ work( iwork ), lwork-iwork+1, ierr )
1159 *
1160 * Multiply Q in U by left vectors bidiagonalizing R
1161 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1162 * (RWorkspace: 0)
1163 *
1164  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1165  $ work( itauq ), u, ldu, work( iwork ),
1166  $ lwork-iwork+1, ierr )
1167  irwork = ie + n
1168 *
1169 * Perform bidiagonal QR iteration, computing left
1170 * singular vectors of A in U
1171 * (CWorkspace: 0)
1172 * (RWorkspace: need BDSPAC)
1173 *
1174  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1175  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1176  $ info )
1177 *
1178  END IF
1179 *
1180  ELSE IF( wntvo ) THEN
1181 *
1182 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1183 * N left singular vectors to be computed in U and
1184 * N right singular vectors to be overwritten on A
1185 *
1186  IF( lwork.GE.2*n*n+3*n ) THEN
1187 *
1188 * Sufficient workspace for a fast algorithm
1189 *
1190  iu = 1
1191  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1192 *
1193 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1194 *
1195  ldwrku = lda
1196  ir = iu + ldwrku*n
1197  ldwrkr = lda
1198  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1199 *
1200 * WORK(IU) is LDA by N and WORK(IR) is N by N
1201 *
1202  ldwrku = lda
1203  ir = iu + ldwrku*n
1204  ldwrkr = n
1205  ELSE
1206 *
1207 * WORK(IU) is N by N and WORK(IR) is N by N
1208 *
1209  ldwrku = n
1210  ir = iu + ldwrku*n
1211  ldwrkr = n
1212  END IF
1213  itau = ir + ldwrkr*n
1214  iwork = itau + n
1215 *
1216 * Compute A=Q*R
1217 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1218 * (RWorkspace: 0)
1219 *
1220  CALL cgeqrf( m, n, a, lda, work( itau ),
1221  $ work( iwork ), lwork-iwork+1, ierr )
1222 *
1223 * Copy R to WORK(IU), zeroing out below it
1224 *
1225  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1226  $ ldwrku )
1227  CALL claset( 'L', n-1, n-1, czero, czero,
1228  $ work( iu+1 ), ldwrku )
1229 *
1230 * Generate Q in A
1231 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1232 * (RWorkspace: 0)
1233 *
1234  CALL cungqr( m, n, n, a, lda, work( itau ),
1235  $ work( iwork ), lwork-iwork+1, ierr )
1236  ie = 1
1237  itauq = itau
1238  itaup = itauq + n
1239  iwork = itaup + n
1240 *
1241 * Bidiagonalize R in WORK(IU), copying result to
1242 * WORK(IR)
1243 * (CWorkspace: need 2*N*N+3*N,
1244 * prefer 2*N*N+2*N+2*N*NB)
1245 * (RWorkspace: need N)
1246 *
1247  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1248  $ rwork( ie ), work( itauq ),
1249  $ work( itaup ), work( iwork ),
1250  $ lwork-iwork+1, ierr )
1251  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1252  $ work( ir ), ldwrkr )
1253 *
1254 * Generate left bidiagonalizing vectors in WORK(IU)
1255 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1256 * (RWorkspace: 0)
1257 *
1258  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1259  $ work( itauq ), work( iwork ),
1260  $ lwork-iwork+1, ierr )
1261 *
1262 * Generate right bidiagonalizing vectors in WORK(IR)
1263 * (CWorkspace: need 2*N*N+3*N-1,
1264 * prefer 2*N*N+2*N+(N-1)*NB)
1265 * (RWorkspace: 0)
1266 *
1267  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1268  $ work( itaup ), work( iwork ),
1269  $ lwork-iwork+1, ierr )
1270  irwork = ie + n
1271 *
1272 * Perform bidiagonal QR iteration, computing left
1273 * singular vectors of R in WORK(IU) and computing
1274 * right singular vectors of R in WORK(IR)
1275 * (CWorkspace: need 2*N*N)
1276 * (RWorkspace: need BDSPAC)
1277 *
1278  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1279  $ work( ir ), ldwrkr, work( iu ),
1280  $ ldwrku, cdum, 1, rwork( irwork ),
1281  $ info )
1282 *
1283 * Multiply Q in A by left singular vectors of R in
1284 * WORK(IU), storing result in U
1285 * (CWorkspace: need N*N)
1286 * (RWorkspace: 0)
1287 *
1288  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1289  $ work( iu ), ldwrku, czero, u, ldu )
1290 *
1291 * Copy right singular vectors of R to A
1292 * (CWorkspace: need N*N)
1293 * (RWorkspace: 0)
1294 *
1295  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1296  $ lda )
1297 *
1298  ELSE
1299 *
1300 * Insufficient workspace for a fast algorithm
1301 *
1302  itau = 1
1303  iwork = itau + n
1304 *
1305 * Compute A=Q*R, copying result to U
1306 * (CWorkspace: need 2*N, prefer N+N*NB)
1307 * (RWorkspace: 0)
1308 *
1309  CALL cgeqrf( m, n, a, lda, work( itau ),
1310  $ work( iwork ), lwork-iwork+1, ierr )
1311  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1312 *
1313 * Generate Q in U
1314 * (CWorkspace: need 2*N, prefer N+N*NB)
1315 * (RWorkspace: 0)
1316 *
1317  CALL cungqr( m, n, n, u, ldu, work( itau ),
1318  $ work( iwork ), lwork-iwork+1, ierr )
1319  ie = 1
1320  itauq = itau
1321  itaup = itauq + n
1322  iwork = itaup + n
1323 *
1324 * Zero out below R in A
1325 *
1326  IF( n .GT. 1 ) THEN
1327  CALL claset( 'L', n-1, n-1, czero, czero,
1328  $ a( 2, 1 ), lda )
1329  END IF
1330 *
1331 * Bidiagonalize R in A
1332 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1333 * (RWorkspace: need N)
1334 *
1335  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1336  $ work( itauq ), work( itaup ),
1337  $ work( iwork ), lwork-iwork+1, ierr )
1338 *
1339 * Multiply Q in U by left vectors bidiagonalizing R
1340 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1341 * (RWorkspace: 0)
1342 *
1343  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1344  $ work( itauq ), u, ldu, work( iwork ),
1345  $ lwork-iwork+1, ierr )
1346 *
1347 * Generate right vectors bidiagonalizing R in A
1348 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1349 * (RWorkspace: 0)
1350 *
1351  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1352  $ work( iwork ), lwork-iwork+1, ierr )
1353  irwork = ie + n
1354 *
1355 * Perform bidiagonal QR iteration, computing left
1356 * singular vectors of A in U and computing right
1357 * singular vectors of A in A
1358 * (CWorkspace: 0)
1359 * (RWorkspace: need BDSPAC)
1360 *
1361  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1362  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1363  $ info )
1364 *
1365  END IF
1366 *
1367  ELSE IF( wntvas ) THEN
1368 *
1369 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1370 * or 'A')
1371 * N left singular vectors to be computed in U and
1372 * N right singular vectors to be computed in VT
1373 *
1374  IF( lwork.GE.n*n+3*n ) THEN
1375 *
1376 * Sufficient workspace for a fast algorithm
1377 *
1378  iu = 1
1379  IF( lwork.GE.wrkbl+lda*n ) THEN
1380 *
1381 * WORK(IU) is LDA by N
1382 *
1383  ldwrku = lda
1384  ELSE
1385 *
1386 * WORK(IU) is N by N
1387 *
1388  ldwrku = n
1389  END IF
1390  itau = iu + ldwrku*n
1391  iwork = itau + n
1392 *
1393 * Compute A=Q*R
1394 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1395 * (RWorkspace: 0)
1396 *
1397  CALL cgeqrf( m, n, a, lda, work( itau ),
1398  $ work( iwork ), lwork-iwork+1, ierr )
1399 *
1400 * Copy R to WORK(IU), zeroing out below it
1401 *
1402  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1403  $ ldwrku )
1404  CALL claset( 'L', n-1, n-1, czero, czero,
1405  $ work( iu+1 ), ldwrku )
1406 *
1407 * Generate Q in A
1408 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1409 * (RWorkspace: 0)
1410 *
1411  CALL cungqr( m, n, n, a, lda, work( itau ),
1412  $ work( iwork ), lwork-iwork+1, ierr )
1413  ie = 1
1414  itauq = itau
1415  itaup = itauq + n
1416  iwork = itaup + n
1417 *
1418 * Bidiagonalize R in WORK(IU), copying result to VT
1419 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1420 * (RWorkspace: need N)
1421 *
1422  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1423  $ rwork( ie ), work( itauq ),
1424  $ work( itaup ), work( iwork ),
1425  $ lwork-iwork+1, ierr )
1426  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1427  $ ldvt )
1428 *
1429 * Generate left bidiagonalizing vectors in WORK(IU)
1430 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1431 * (RWorkspace: 0)
1432 *
1433  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1434  $ work( itauq ), work( iwork ),
1435  $ lwork-iwork+1, ierr )
1436 *
1437 * Generate right bidiagonalizing vectors in VT
1438 * (CWorkspace: need N*N+3*N-1,
1439 * prefer N*N+2*N+(N-1)*NB)
1440 * (RWorkspace: 0)
1441 *
1442  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1443  $ work( iwork ), lwork-iwork+1, ierr )
1444  irwork = ie + n
1445 *
1446 * Perform bidiagonal QR iteration, computing left
1447 * singular vectors of R in WORK(IU) and computing
1448 * right singular vectors of R in VT
1449 * (CWorkspace: need N*N)
1450 * (RWorkspace: need BDSPAC)
1451 *
1452  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1453  $ ldvt, work( iu ), ldwrku, cdum, 1,
1454  $ rwork( irwork ), info )
1455 *
1456 * Multiply Q in A by left singular vectors of R in
1457 * WORK(IU), storing result in U
1458 * (CWorkspace: need N*N)
1459 * (RWorkspace: 0)
1460 *
1461  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1462  $ work( iu ), ldwrku, czero, u, ldu )
1463 *
1464  ELSE
1465 *
1466 * Insufficient workspace for a fast algorithm
1467 *
1468  itau = 1
1469  iwork = itau + n
1470 *
1471 * Compute A=Q*R, copying result to U
1472 * (CWorkspace: need 2*N, prefer N+N*NB)
1473 * (RWorkspace: 0)
1474 *
1475  CALL cgeqrf( m, n, a, lda, work( itau ),
1476  $ work( iwork ), lwork-iwork+1, ierr )
1477  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1478 *
1479 * Generate Q in U
1480 * (CWorkspace: need 2*N, prefer N+N*NB)
1481 * (RWorkspace: 0)
1482 *
1483  CALL cungqr( m, n, n, u, ldu, work( itau ),
1484  $ work( iwork ), lwork-iwork+1, ierr )
1485 *
1486 * Copy R to VT, zeroing out below it
1487 *
1488  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1489  IF( n.GT.1 )
1490  $ CALL claset( 'L', n-1, n-1, czero, czero,
1491  $ vt( 2, 1 ), ldvt )
1492  ie = 1
1493  itauq = itau
1494  itaup = itauq + n
1495  iwork = itaup + n
1496 *
1497 * Bidiagonalize R in VT
1498 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1499 * (RWorkspace: need N)
1500 *
1501  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1502  $ work( itauq ), work( itaup ),
1503  $ work( iwork ), lwork-iwork+1, ierr )
1504 *
1505 * Multiply Q in U by left bidiagonalizing vectors
1506 * in VT
1507 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1508 * (RWorkspace: 0)
1509 *
1510  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1511  $ work( itauq ), u, ldu, work( iwork ),
1512  $ lwork-iwork+1, ierr )
1513 *
1514 * Generate right bidiagonalizing vectors in VT
1515 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1516 * (RWorkspace: 0)
1517 *
1518  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1519  $ work( iwork ), lwork-iwork+1, ierr )
1520  irwork = ie + n
1521 *
1522 * Perform bidiagonal QR iteration, computing left
1523 * singular vectors of A in U and computing right
1524 * singular vectors of A in VT
1525 * (CWorkspace: 0)
1526 * (RWorkspace: need BDSPAC)
1527 *
1528  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1529  $ ldvt, u, ldu, cdum, 1,
1530  $ rwork( irwork ), info )
1531 *
1532  END IF
1533 *
1534  END IF
1535 *
1536  ELSE IF( wntua ) THEN
1537 *
1538  IF( wntvn ) THEN
1539 *
1540 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1541 * M left singular vectors to be computed in U and
1542 * no right singular vectors to be computed
1543 *
1544  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1545 *
1546 * Sufficient workspace for a fast algorithm
1547 *
1548  ir = 1
1549  IF( lwork.GE.wrkbl+lda*n ) THEN
1550 *
1551 * WORK(IR) is LDA by N
1552 *
1553  ldwrkr = lda
1554  ELSE
1555 *
1556 * WORK(IR) is N by N
1557 *
1558  ldwrkr = n
1559  END IF
1560  itau = ir + ldwrkr*n
1561  iwork = itau + n
1562 *
1563 * Compute A=Q*R, copying result to U
1564 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1565 * (RWorkspace: 0)
1566 *
1567  CALL cgeqrf( m, n, a, lda, work( itau ),
1568  $ work( iwork ), lwork-iwork+1, ierr )
1569  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1570 *
1571 * Copy R to WORK(IR), zeroing out below it
1572 *
1573  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1574  $ ldwrkr )
1575  CALL claset( 'L', n-1, n-1, czero, czero,
1576  $ work( ir+1 ), ldwrkr )
1577 *
1578 * Generate Q in U
1579 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1580 * (RWorkspace: 0)
1581 *
1582  CALL cungqr( m, m, n, u, ldu, work( itau ),
1583  $ work( iwork ), lwork-iwork+1, ierr )
1584  ie = 1
1585  itauq = itau
1586  itaup = itauq + n
1587  iwork = itaup + n
1588 *
1589 * Bidiagonalize R in WORK(IR)
1590 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1591 * (RWorkspace: need N)
1592 *
1593  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1594  $ rwork( ie ), work( itauq ),
1595  $ work( itaup ), work( iwork ),
1596  $ lwork-iwork+1, ierr )
1597 *
1598 * Generate left bidiagonalizing vectors in WORK(IR)
1599 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1600 * (RWorkspace: 0)
1601 *
1602  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1603  $ work( itauq ), work( iwork ),
1604  $ lwork-iwork+1, ierr )
1605  irwork = ie + n
1606 *
1607 * Perform bidiagonal QR iteration, computing left
1608 * singular vectors of R in WORK(IR)
1609 * (CWorkspace: need N*N)
1610 * (RWorkspace: need BDSPAC)
1611 *
1612  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1613  $ 1, work( ir ), ldwrkr, cdum, 1,
1614  $ rwork( irwork ), info )
1615 *
1616 * Multiply Q in U by left singular vectors of R in
1617 * WORK(IR), storing result in A
1618 * (CWorkspace: need N*N)
1619 * (RWorkspace: 0)
1620 *
1621  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1622  $ work( ir ), ldwrkr, czero, a, lda )
1623 *
1624 * Copy left singular vectors of A from A to U
1625 *
1626  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1627 *
1628  ELSE
1629 *
1630 * Insufficient workspace for a fast algorithm
1631 *
1632  itau = 1
1633  iwork = itau + n
1634 *
1635 * Compute A=Q*R, copying result to U
1636 * (CWorkspace: need 2*N, prefer N+N*NB)
1637 * (RWorkspace: 0)
1638 *
1639  CALL cgeqrf( m, n, a, lda, work( itau ),
1640  $ work( iwork ), lwork-iwork+1, ierr )
1641  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1642 *
1643 * Generate Q in U
1644 * (CWorkspace: need N+M, prefer N+M*NB)
1645 * (RWorkspace: 0)
1646 *
1647  CALL cungqr( m, m, n, u, ldu, work( itau ),
1648  $ work( iwork ), lwork-iwork+1, ierr )
1649  ie = 1
1650  itauq = itau
1651  itaup = itauq + n
1652  iwork = itaup + n
1653 *
1654 * Zero out below R in A
1655 *
1656  IF( n .GT. 1 ) THEN
1657  CALL claset( 'L', n-1, n-1, czero, czero,
1658  $ a( 2, 1 ), lda )
1659  END IF
1660 *
1661 * Bidiagonalize R in A
1662 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1663 * (RWorkspace: need N)
1664 *
1665  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1666  $ work( itauq ), work( itaup ),
1667  $ work( iwork ), lwork-iwork+1, ierr )
1668 *
1669 * Multiply Q in U by left bidiagonalizing vectors
1670 * in A
1671 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1672 * (RWorkspace: 0)
1673 *
1674  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1675  $ work( itauq ), u, ldu, work( iwork ),
1676  $ lwork-iwork+1, ierr )
1677  irwork = ie + n
1678 *
1679 * Perform bidiagonal QR iteration, computing left
1680 * singular vectors of A in U
1681 * (CWorkspace: 0)
1682 * (RWorkspace: need BDSPAC)
1683 *
1684  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1685  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1686  $ info )
1687 *
1688  END IF
1689 *
1690  ELSE IF( wntvo ) THEN
1691 *
1692 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1693 * M left singular vectors to be computed in U and
1694 * N right singular vectors to be overwritten on A
1695 *
1696  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1697 *
1698 * Sufficient workspace for a fast algorithm
1699 *
1700  iu = 1
1701  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1702 *
1703 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1704 *
1705  ldwrku = lda
1706  ir = iu + ldwrku*n
1707  ldwrkr = lda
1708  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1709 *
1710 * WORK(IU) is LDA by N and WORK(IR) is N by N
1711 *
1712  ldwrku = lda
1713  ir = iu + ldwrku*n
1714  ldwrkr = n
1715  ELSE
1716 *
1717 * WORK(IU) is N by N and WORK(IR) is N by N
1718 *
1719  ldwrku = n
1720  ir = iu + ldwrku*n
1721  ldwrkr = n
1722  END IF
1723  itau = ir + ldwrkr*n
1724  iwork = itau + n
1725 *
1726 * Compute A=Q*R, copying result to U
1727 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1728 * (RWorkspace: 0)
1729 *
1730  CALL cgeqrf( m, n, a, lda, work( itau ),
1731  $ work( iwork ), lwork-iwork+1, ierr )
1732  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1733 *
1734 * Generate Q in U
1735 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1736 * (RWorkspace: 0)
1737 *
1738  CALL cungqr( m, m, n, u, ldu, work( itau ),
1739  $ work( iwork ), lwork-iwork+1, ierr )
1740 *
1741 * Copy R to WORK(IU), zeroing out below it
1742 *
1743  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1744  $ ldwrku )
1745  CALL claset( 'L', n-1, n-1, czero, czero,
1746  $ work( iu+1 ), ldwrku )
1747  ie = 1
1748  itauq = itau
1749  itaup = itauq + n
1750  iwork = itaup + n
1751 *
1752 * Bidiagonalize R in WORK(IU), copying result to
1753 * WORK(IR)
1754 * (CWorkspace: need 2*N*N+3*N,
1755 * prefer 2*N*N+2*N+2*N*NB)
1756 * (RWorkspace: need N)
1757 *
1758  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1759  $ rwork( ie ), work( itauq ),
1760  $ work( itaup ), work( iwork ),
1761  $ lwork-iwork+1, ierr )
1762  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1763  $ work( ir ), ldwrkr )
1764 *
1765 * Generate left bidiagonalizing vectors in WORK(IU)
1766 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1767 * (RWorkspace: 0)
1768 *
1769  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1770  $ work( itauq ), work( iwork ),
1771  $ lwork-iwork+1, ierr )
1772 *
1773 * Generate right bidiagonalizing vectors in WORK(IR)
1774 * (CWorkspace: need 2*N*N+3*N-1,
1775 * prefer 2*N*N+2*N+(N-1)*NB)
1776 * (RWorkspace: 0)
1777 *
1778  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1779  $ work( itaup ), work( iwork ),
1780  $ lwork-iwork+1, ierr )
1781  irwork = ie + n
1782 *
1783 * Perform bidiagonal QR iteration, computing left
1784 * singular vectors of R in WORK(IU) and computing
1785 * right singular vectors of R in WORK(IR)
1786 * (CWorkspace: need 2*N*N)
1787 * (RWorkspace: need BDSPAC)
1788 *
1789  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1790  $ work( ir ), ldwrkr, work( iu ),
1791  $ ldwrku, cdum, 1, rwork( irwork ),
1792  $ info )
1793 *
1794 * Multiply Q in U by left singular vectors of R in
1795 * WORK(IU), storing result in A
1796 * (CWorkspace: need N*N)
1797 * (RWorkspace: 0)
1798 *
1799  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1800  $ work( iu ), ldwrku, czero, a, lda )
1801 *
1802 * Copy left singular vectors of A from A to U
1803 *
1804  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1805 *
1806 * Copy right singular vectors of R from WORK(IR) to A
1807 *
1808  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1809  $ lda )
1810 *
1811  ELSE
1812 *
1813 * Insufficient workspace for a fast algorithm
1814 *
1815  itau = 1
1816  iwork = itau + n
1817 *
1818 * Compute A=Q*R, copying result to U
1819 * (CWorkspace: need 2*N, prefer N+N*NB)
1820 * (RWorkspace: 0)
1821 *
1822  CALL cgeqrf( m, n, a, lda, work( itau ),
1823  $ work( iwork ), lwork-iwork+1, ierr )
1824  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1825 *
1826 * Generate Q in U
1827 * (CWorkspace: need N+M, prefer N+M*NB)
1828 * (RWorkspace: 0)
1829 *
1830  CALL cungqr( m, m, n, u, ldu, work( itau ),
1831  $ work( iwork ), lwork-iwork+1, ierr )
1832  ie = 1
1833  itauq = itau
1834  itaup = itauq + n
1835  iwork = itaup + n
1836 *
1837 * Zero out below R in A
1838 *
1839  IF( n .GT. 1 ) THEN
1840  CALL claset( 'L', n-1, n-1, czero, czero,
1841  $ a( 2, 1 ), lda )
1842  END IF
1843 *
1844 * Bidiagonalize R in A
1845 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1846 * (RWorkspace: need N)
1847 *
1848  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1849  $ work( itauq ), work( itaup ),
1850  $ work( iwork ), lwork-iwork+1, ierr )
1851 *
1852 * Multiply Q in U by left bidiagonalizing vectors
1853 * in A
1854 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1855 * (RWorkspace: 0)
1856 *
1857  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1858  $ work( itauq ), u, ldu, work( iwork ),
1859  $ lwork-iwork+1, ierr )
1860 *
1861 * Generate right bidiagonalizing vectors in A
1862 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1863 * (RWorkspace: 0)
1864 *
1865  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1866  $ work( iwork ), lwork-iwork+1, ierr )
1867  irwork = ie + n
1868 *
1869 * Perform bidiagonal QR iteration, computing left
1870 * singular vectors of A in U and computing right
1871 * singular vectors of A in A
1872 * (CWorkspace: 0)
1873 * (RWorkspace: need BDSPAC)
1874 *
1875  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1876  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1877  $ info )
1878 *
1879  END IF
1880 *
1881  ELSE IF( wntvas ) THEN
1882 *
1883 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1884 * or 'A')
1885 * M left singular vectors to be computed in U and
1886 * N right singular vectors to be computed in VT
1887 *
1888  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1889 *
1890 * Sufficient workspace for a fast algorithm
1891 *
1892  iu = 1
1893  IF( lwork.GE.wrkbl+lda*n ) THEN
1894 *
1895 * WORK(IU) is LDA by N
1896 *
1897  ldwrku = lda
1898  ELSE
1899 *
1900 * WORK(IU) is N by N
1901 *
1902  ldwrku = n
1903  END IF
1904  itau = iu + ldwrku*n
1905  iwork = itau + n
1906 *
1907 * Compute A=Q*R, copying result to U
1908 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1909 * (RWorkspace: 0)
1910 *
1911  CALL cgeqrf( m, n, a, lda, work( itau ),
1912  $ work( iwork ), lwork-iwork+1, ierr )
1913  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1914 *
1915 * Generate Q in U
1916 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1917 * (RWorkspace: 0)
1918 *
1919  CALL cungqr( m, m, n, u, ldu, work( itau ),
1920  $ work( iwork ), lwork-iwork+1, ierr )
1921 *
1922 * Copy R to WORK(IU), zeroing out below it
1923 *
1924  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1925  $ ldwrku )
1926  CALL claset( 'L', n-1, n-1, czero, czero,
1927  $ work( iu+1 ), ldwrku )
1928  ie = 1
1929  itauq = itau
1930  itaup = itauq + n
1931  iwork = itaup + n
1932 *
1933 * Bidiagonalize R in WORK(IU), copying result to VT
1934 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1935 * (RWorkspace: need N)
1936 *
1937  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1938  $ rwork( ie ), work( itauq ),
1939  $ work( itaup ), work( iwork ),
1940  $ lwork-iwork+1, ierr )
1941  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1942  $ ldvt )
1943 *
1944 * Generate left bidiagonalizing vectors in WORK(IU)
1945 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1946 * (RWorkspace: 0)
1947 *
1948  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1949  $ work( itauq ), work( iwork ),
1950  $ lwork-iwork+1, ierr )
1951 *
1952 * Generate right bidiagonalizing vectors in VT
1953 * (CWorkspace: need N*N+3*N-1,
1954 * prefer N*N+2*N+(N-1)*NB)
1955 * (RWorkspace: need 0)
1956 *
1957  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1958  $ work( iwork ), lwork-iwork+1, ierr )
1959  irwork = ie + n
1960 *
1961 * Perform bidiagonal QR iteration, computing left
1962 * singular vectors of R in WORK(IU) and computing
1963 * right singular vectors of R in VT
1964 * (CWorkspace: need N*N)
1965 * (RWorkspace: need BDSPAC)
1966 *
1967  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1968  $ ldvt, work( iu ), ldwrku, cdum, 1,
1969  $ rwork( irwork ), info )
1970 *
1971 * Multiply Q in U by left singular vectors of R in
1972 * WORK(IU), storing result in A
1973 * (CWorkspace: need N*N)
1974 * (RWorkspace: 0)
1975 *
1976  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1977  $ work( iu ), ldwrku, czero, a, lda )
1978 *
1979 * Copy left singular vectors of A from A to U
1980 *
1981  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1982 *
1983  ELSE
1984 *
1985 * Insufficient workspace for a fast algorithm
1986 *
1987  itau = 1
1988  iwork = itau + n
1989 *
1990 * Compute A=Q*R, copying result to U
1991 * (CWorkspace: need 2*N, prefer N+N*NB)
1992 * (RWorkspace: 0)
1993 *
1994  CALL cgeqrf( m, n, a, lda, work( itau ),
1995  $ work( iwork ), lwork-iwork+1, ierr )
1996  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1997 *
1998 * Generate Q in U
1999 * (CWorkspace: need N+M, prefer N+M*NB)
2000 * (RWorkspace: 0)
2001 *
2002  CALL cungqr( m, m, n, u, ldu, work( itau ),
2003  $ work( iwork ), lwork-iwork+1, ierr )
2004 *
2005 * Copy R from A to VT, zeroing out below it
2006 *
2007  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2008  IF( n.GT.1 )
2009  $ CALL claset( 'L', n-1, n-1, czero, czero,
2010  $ vt( 2, 1 ), ldvt )
2011  ie = 1
2012  itauq = itau
2013  itaup = itauq + n
2014  iwork = itaup + n
2015 *
2016 * Bidiagonalize R in VT
2017 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2018 * (RWorkspace: need N)
2019 *
2020  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
2021  $ work( itauq ), work( itaup ),
2022  $ work( iwork ), lwork-iwork+1, ierr )
2023 *
2024 * Multiply Q in U by left bidiagonalizing vectors
2025 * in VT
2026 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2027 * (RWorkspace: 0)
2028 *
2029  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2030  $ work( itauq ), u, ldu, work( iwork ),
2031  $ lwork-iwork+1, ierr )
2032 *
2033 * Generate right bidiagonalizing vectors in VT
2034 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2035 * (RWorkspace: 0)
2036 *
2037  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2038  $ work( iwork ), lwork-iwork+1, ierr )
2039  irwork = ie + n
2040 *
2041 * Perform bidiagonal QR iteration, computing left
2042 * singular vectors of A in U and computing right
2043 * singular vectors of A in VT
2044 * (CWorkspace: 0)
2045 * (RWorkspace: need BDSPAC)
2046 *
2047  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2048  $ ldvt, u, ldu, cdum, 1,
2049  $ rwork( irwork ), info )
2050 *
2051  END IF
2052 *
2053  END IF
2054 *
2055  END IF
2056 *
2057  ELSE
2058 *
2059 * M .LT. MNTHR
2060 *
2061 * Path 10 (M at least N, but not much larger)
2062 * Reduce to bidiagonal form without QR decomposition
2063 *
2064  ie = 1
2065  itauq = 1
2066  itaup = itauq + n
2067  iwork = itaup + n
2068 *
2069 * Bidiagonalize A
2070 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2071 * (RWorkspace: need N)
2072 *
2073  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2074  $ work( itaup ), work( iwork ), lwork-iwork+1,
2075  $ ierr )
2076  IF( wntuas ) THEN
2077 *
2078 * If left singular vectors desired in U, copy result to U
2079 * and generate left bidiagonalizing vectors in U
2080 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2081 * (RWorkspace: 0)
2082 *
2083  CALL clacpy( 'L', m, n, a, lda, u, ldu )
2084  IF( wntus )
2085  $ ncu = n
2086  IF( wntua )
2087  $ ncu = m
2088  CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2089  $ work( iwork ), lwork-iwork+1, ierr )
2090  END IF
2091  IF( wntvas ) THEN
2092 *
2093 * If right singular vectors desired in VT, copy result to
2094 * VT and generate right bidiagonalizing vectors in VT
2095 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2096 * (RWorkspace: 0)
2097 *
2098  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2099  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2100  $ work( iwork ), lwork-iwork+1, ierr )
2101  END IF
2102  IF( wntuo ) THEN
2103 *
2104 * If left singular vectors desired in A, generate left
2105 * bidiagonalizing vectors in A
2106 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2107 * (RWorkspace: 0)
2108 *
2109  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
2110  $ work( iwork ), lwork-iwork+1, ierr )
2111  END IF
2112  IF( wntvo ) THEN
2113 *
2114 * If right singular vectors desired in A, generate right
2115 * bidiagonalizing vectors in A
2116 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2117 * (RWorkspace: 0)
2118 *
2119  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
2120  $ work( iwork ), lwork-iwork+1, ierr )
2121  END IF
2122  irwork = ie + n
2123  IF( wntuas .OR. wntuo )
2124  $ nru = m
2125  IF( wntun )
2126  $ nru = 0
2127  IF( wntvas .OR. wntvo )
2128  $ ncvt = n
2129  IF( wntvn )
2130  $ ncvt = 0
2131  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2132 *
2133 * Perform bidiagonal QR iteration, if desired, computing
2134 * left singular vectors in U and computing right singular
2135 * vectors in VT
2136 * (CWorkspace: 0)
2137 * (RWorkspace: need BDSPAC)
2138 *
2139  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2140  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2141  $ info )
2142  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2143 *
2144 * Perform bidiagonal QR iteration, if desired, computing
2145 * left singular vectors in U and computing right singular
2146 * vectors in A
2147 * (CWorkspace: 0)
2148 * (RWorkspace: need BDSPAC)
2149 *
2150  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2151  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2152  $ info )
2153  ELSE
2154 *
2155 * Perform bidiagonal QR iteration, if desired, computing
2156 * left singular vectors in A and computing right singular
2157 * vectors in VT
2158 * (CWorkspace: 0)
2159 * (RWorkspace: need BDSPAC)
2160 *
2161  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2162  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2163  $ info )
2164  END IF
2165 *
2166  END IF
2167 *
2168  ELSE
2169 *
2170 * A has more columns than rows. If A has sufficiently more
2171 * columns than rows, first reduce using the LQ decomposition (if
2172 * sufficient workspace available)
2173 *
2174  IF( n.GE.mnthr ) THEN
2175 *
2176  IF( wntvn ) THEN
2177 *
2178 * Path 1t(N much larger than M, JOBVT='N')
2179 * No right singular vectors to be computed
2180 *
2181  itau = 1
2182  iwork = itau + m
2183 *
2184 * Compute A=L*Q
2185 * (CWorkspace: need 2*M, prefer M+M*NB)
2186 * (RWorkspace: 0)
2187 *
2188  CALL cgelqf( m, n, a, lda, work( itau ), work( iwork ),
2189  $ lwork-iwork+1, ierr )
2190 *
2191 * Zero out above L
2192 *
2193  CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2194  $ lda )
2195  ie = 1
2196  itauq = 1
2197  itaup = itauq + m
2198  iwork = itaup + m
2199 *
2200 * Bidiagonalize L in A
2201 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2202 * (RWorkspace: need M)
2203 *
2204  CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2205  $ work( itaup ), work( iwork ), lwork-iwork+1,
2206  $ ierr )
2207  IF( wntuo .OR. wntuas ) THEN
2208 *
2209 * If left singular vectors desired, generate Q
2210 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2211 * (RWorkspace: 0)
2212 *
2213  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2214  $ work( iwork ), lwork-iwork+1, ierr )
2215  END IF
2216  irwork = ie + m
2217  nru = 0
2218  IF( wntuo .OR. wntuas )
2219  $ nru = m
2220 *
2221 * Perform bidiagonal QR iteration, computing left singular
2222 * vectors of A in A if desired
2223 * (CWorkspace: 0)
2224 * (RWorkspace: need BDSPAC)
2225 *
2226  CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2227  $ a, lda, cdum, 1, rwork( irwork ), info )
2228 *
2229 * If left singular vectors desired in U, copy them there
2230 *
2231  IF( wntuas )
2232  $ CALL clacpy( 'F', m, m, a, lda, u, ldu )
2233 *
2234  ELSE IF( wntvo .AND. wntun ) THEN
2235 *
2236 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2237 * M right singular vectors to be overwritten on A and
2238 * no left singular vectors to be computed
2239 *
2240  IF( lwork.GE.m*m+3*m ) THEN
2241 *
2242 * Sufficient workspace for a fast algorithm
2243 *
2244  ir = 1
2245  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2246 *
2247 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2248 *
2249  ldwrku = lda
2250  chunk = n
2251  ldwrkr = lda
2252  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2253 *
2254 * WORK(IU) is LDA by N and WORK(IR) is M by M
2255 *
2256  ldwrku = lda
2257  chunk = n
2258  ldwrkr = m
2259  ELSE
2260 *
2261 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2262 *
2263  ldwrku = m
2264  chunk = ( lwork-m*m ) / m
2265  ldwrkr = m
2266  END IF
2267  itau = ir + ldwrkr*m
2268  iwork = itau + m
2269 *
2270 * Compute A=L*Q
2271 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2272 * (RWorkspace: 0)
2273 *
2274  CALL cgelqf( m, n, a, lda, work( itau ),
2275  $ work( iwork ), lwork-iwork+1, ierr )
2276 *
2277 * Copy L to WORK(IR) and zero out above it
2278 *
2279  CALL clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2280  CALL claset( 'U', m-1, m-1, czero, czero,
2281  $ work( ir+ldwrkr ), ldwrkr )
2282 *
2283 * Generate Q in A
2284 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2285 * (RWorkspace: 0)
2286 *
2287  CALL cunglq( m, n, m, a, lda, work( itau ),
2288  $ work( iwork ), lwork-iwork+1, ierr )
2289  ie = 1
2290  itauq = itau
2291  itaup = itauq + m
2292  iwork = itaup + m
2293 *
2294 * Bidiagonalize L in WORK(IR)
2295 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2296 * (RWorkspace: need M)
2297 *
2298  CALL cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2299  $ work( itauq ), work( itaup ),
2300  $ work( iwork ), lwork-iwork+1, ierr )
2301 *
2302 * Generate right vectors bidiagonalizing L
2303 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2304 * (RWorkspace: 0)
2305 *
2306  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2307  $ work( itaup ), work( iwork ),
2308  $ lwork-iwork+1, ierr )
2309  irwork = ie + m
2310 *
2311 * Perform bidiagonal QR iteration, computing right
2312 * singular vectors of L in WORK(IR)
2313 * (CWorkspace: need M*M)
2314 * (RWorkspace: need BDSPAC)
2315 *
2316  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2317  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2318  $ rwork( irwork ), info )
2319  iu = itauq
2320 *
2321 * Multiply right singular vectors of L in WORK(IR) by Q
2322 * in A, storing result in WORK(IU) and copying to A
2323 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2324 * (RWorkspace: 0)
2325 *
2326  DO 30 i = 1, n, chunk
2327  blk = min( n-i+1, chunk )
2328  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2329  $ ldwrkr, a( 1, i ), lda, czero,
2330  $ work( iu ), ldwrku )
2331  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2332  $ a( 1, i ), lda )
2333  30 CONTINUE
2334 *
2335  ELSE
2336 *
2337 * Insufficient workspace for a fast algorithm
2338 *
2339  ie = 1
2340  itauq = 1
2341  itaup = itauq + m
2342  iwork = itaup + m
2343 *
2344 * Bidiagonalize A
2345 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2346 * (RWorkspace: need M)
2347 *
2348  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
2349  $ work( itauq ), work( itaup ),
2350  $ work( iwork ), lwork-iwork+1, ierr )
2351 *
2352 * Generate right vectors bidiagonalizing A
2353 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2354 * (RWorkspace: 0)
2355 *
2356  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
2357  $ work( iwork ), lwork-iwork+1, ierr )
2358  irwork = ie + m
2359 *
2360 * Perform bidiagonal QR iteration, computing right
2361 * singular vectors of A in A
2362 * (CWorkspace: 0)
2363 * (RWorkspace: need BDSPAC)
2364 *
2365  CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2366  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2367 *
2368  END IF
2369 *
2370  ELSE IF( wntvo .AND. wntuas ) THEN
2371 *
2372 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2373 * M right singular vectors to be overwritten on A and
2374 * M left singular vectors to be computed in U
2375 *
2376  IF( lwork.GE.m*m+3*m ) THEN
2377 *
2378 * Sufficient workspace for a fast algorithm
2379 *
2380  ir = 1
2381  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2382 *
2383 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2384 *
2385  ldwrku = lda
2386  chunk = n
2387  ldwrkr = lda
2388  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2389 *
2390 * WORK(IU) is LDA by N and WORK(IR) is M by M
2391 *
2392  ldwrku = lda
2393  chunk = n
2394  ldwrkr = m
2395  ELSE
2396 *
2397 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2398 *
2399  ldwrku = m
2400  chunk = ( lwork-m*m ) / m
2401  ldwrkr = m
2402  END IF
2403  itau = ir + ldwrkr*m
2404  iwork = itau + m
2405 *
2406 * Compute A=L*Q
2407 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2408 * (RWorkspace: 0)
2409 *
2410  CALL cgelqf( m, n, a, lda, work( itau ),
2411  $ work( iwork ), lwork-iwork+1, ierr )
2412 *
2413 * Copy L to U, zeroing about above it
2414 *
2415  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2416  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2417  $ ldu )
2418 *
2419 * Generate Q in A
2420 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2421 * (RWorkspace: 0)
2422 *
2423  CALL cunglq( m, n, m, a, lda, work( itau ),
2424  $ work( iwork ), lwork-iwork+1, ierr )
2425  ie = 1
2426  itauq = itau
2427  itaup = itauq + m
2428  iwork = itaup + m
2429 *
2430 * Bidiagonalize L in U, copying result to WORK(IR)
2431 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2432 * (RWorkspace: need M)
2433 *
2434  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2435  $ work( itauq ), work( itaup ),
2436  $ work( iwork ), lwork-iwork+1, ierr )
2437  CALL clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2438 *
2439 * Generate right vectors bidiagonalizing L in WORK(IR)
2440 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2441 * (RWorkspace: 0)
2442 *
2443  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2444  $ work( itaup ), work( iwork ),
2445  $ lwork-iwork+1, ierr )
2446 *
2447 * Generate left vectors bidiagonalizing L in U
2448 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2449 * (RWorkspace: 0)
2450 *
2451  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2452  $ work( iwork ), lwork-iwork+1, ierr )
2453  irwork = ie + m
2454 *
2455 * Perform bidiagonal QR iteration, computing left
2456 * singular vectors of L in U, and computing right
2457 * singular vectors of L in WORK(IR)
2458 * (CWorkspace: need M*M)
2459 * (RWorkspace: need BDSPAC)
2460 *
2461  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2462  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2463  $ rwork( irwork ), info )
2464  iu = itauq
2465 *
2466 * Multiply right singular vectors of L in WORK(IR) by Q
2467 * in A, storing result in WORK(IU) and copying to A
2468 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2469 * (RWorkspace: 0)
2470 *
2471  DO 40 i = 1, n, chunk
2472  blk = min( n-i+1, chunk )
2473  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2474  $ ldwrkr, a( 1, i ), lda, czero,
2475  $ work( iu ), ldwrku )
2476  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2477  $ a( 1, i ), lda )
2478  40 CONTINUE
2479 *
2480  ELSE
2481 *
2482 * Insufficient workspace for a fast algorithm
2483 *
2484  itau = 1
2485  iwork = itau + m
2486 *
2487 * Compute A=L*Q
2488 * (CWorkspace: need 2*M, prefer M+M*NB)
2489 * (RWorkspace: 0)
2490 *
2491  CALL cgelqf( m, n, a, lda, work( itau ),
2492  $ work( iwork ), lwork-iwork+1, ierr )
2493 *
2494 * Copy L to U, zeroing out above it
2495 *
2496  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2497  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2498  $ ldu )
2499 *
2500 * Generate Q in A
2501 * (CWorkspace: need 2*M, prefer M+M*NB)
2502 * (RWorkspace: 0)
2503 *
2504  CALL cunglq( m, n, m, a, lda, work( itau ),
2505  $ work( iwork ), lwork-iwork+1, ierr )
2506  ie = 1
2507  itauq = itau
2508  itaup = itauq + m
2509  iwork = itaup + m
2510 *
2511 * Bidiagonalize L in U
2512 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2513 * (RWorkspace: need M)
2514 *
2515  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2516  $ work( itauq ), work( itaup ),
2517  $ work( iwork ), lwork-iwork+1, ierr )
2518 *
2519 * Multiply right vectors bidiagonalizing L by Q in A
2520 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2521 * (RWorkspace: 0)
2522 *
2523  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2524  $ work( itaup ), a, lda, work( iwork ),
2525  $ lwork-iwork+1, ierr )
2526 *
2527 * Generate left vectors bidiagonalizing L in U
2528 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2529 * (RWorkspace: 0)
2530 *
2531  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2532  $ work( iwork ), lwork-iwork+1, ierr )
2533  irwork = ie + m
2534 *
2535 * Perform bidiagonal QR iteration, computing left
2536 * singular vectors of A in U and computing right
2537 * singular vectors of A in A
2538 * (CWorkspace: 0)
2539 * (RWorkspace: need BDSPAC)
2540 *
2541  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2542  $ u, ldu, cdum, 1, rwork( irwork ), info )
2543 *
2544  END IF
2545 *
2546  ELSE IF( wntvs ) THEN
2547 *
2548  IF( wntun ) THEN
2549 *
2550 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2551 * M right singular vectors to be computed in VT and
2552 * no left singular vectors to be computed
2553 *
2554  IF( lwork.GE.m*m+3*m ) THEN
2555 *
2556 * Sufficient workspace for a fast algorithm
2557 *
2558  ir = 1
2559  IF( lwork.GE.wrkbl+lda*m ) THEN
2560 *
2561 * WORK(IR) is LDA by M
2562 *
2563  ldwrkr = lda
2564  ELSE
2565 *
2566 * WORK(IR) is M by M
2567 *
2568  ldwrkr = m
2569  END IF
2570  itau = ir + ldwrkr*m
2571  iwork = itau + m
2572 *
2573 * Compute A=L*Q
2574 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2575 * (RWorkspace: 0)
2576 *
2577  CALL cgelqf( m, n, a, lda, work( itau ),
2578  $ work( iwork ), lwork-iwork+1, ierr )
2579 *
2580 * Copy L to WORK(IR), zeroing out above it
2581 *
2582  CALL clacpy( 'L', m, m, a, lda, work( ir ),
2583  $ ldwrkr )
2584  CALL claset( 'U', m-1, m-1, czero, czero,
2585  $ work( ir+ldwrkr ), ldwrkr )
2586 *
2587 * Generate Q in A
2588 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2589 * (RWorkspace: 0)
2590 *
2591  CALL cunglq( m, n, m, a, lda, work( itau ),
2592  $ work( iwork ), lwork-iwork+1, ierr )
2593  ie = 1
2594  itauq = itau
2595  itaup = itauq + m
2596  iwork = itaup + m
2597 *
2598 * Bidiagonalize L in WORK(IR)
2599 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2600 * (RWorkspace: need M)
2601 *
2602  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
2603  $ rwork( ie ), work( itauq ),
2604  $ work( itaup ), work( iwork ),
2605  $ lwork-iwork+1, ierr )
2606 *
2607 * Generate right vectors bidiagonalizing L in
2608 * WORK(IR)
2609 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2610 * (RWorkspace: 0)
2611 *
2612  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2613  $ work( itaup ), work( iwork ),
2614  $ lwork-iwork+1, ierr )
2615  irwork = ie + m
2616 *
2617 * Perform bidiagonal QR iteration, computing right
2618 * singular vectors of L in WORK(IR)
2619 * (CWorkspace: need M*M)
2620 * (RWorkspace: need BDSPAC)
2621 *
2622  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2623  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2624  $ rwork( irwork ), info )
2625 *
2626 * Multiply right singular vectors of L in WORK(IR) by
2627 * Q in A, storing result in VT
2628 * (CWorkspace: need M*M)
2629 * (RWorkspace: 0)
2630 *
2631  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
2632  $ ldwrkr, a, lda, czero, vt, ldvt )
2633 *
2634  ELSE
2635 *
2636 * Insufficient workspace for a fast algorithm
2637 *
2638  itau = 1
2639  iwork = itau + m
2640 *
2641 * Compute A=L*Q
2642 * (CWorkspace: need 2*M, prefer M+M*NB)
2643 * (RWorkspace: 0)
2644 *
2645  CALL cgelqf( m, n, a, lda, work( itau ),
2646  $ work( iwork ), lwork-iwork+1, ierr )
2647 *
2648 * Copy result to VT
2649 *
2650  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2651 *
2652 * Generate Q in VT
2653 * (CWorkspace: need 2*M, prefer M+M*NB)
2654 * (RWorkspace: 0)
2655 *
2656  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2657  $ work( iwork ), lwork-iwork+1, ierr )
2658  ie = 1
2659  itauq = itau
2660  itaup = itauq + m
2661  iwork = itaup + m
2662 *
2663 * Zero out above L in A
2664 *
2665  CALL claset( 'U', m-1, m-1, czero, czero,
2666  $ a( 1, 2 ), lda )
2667 *
2668 * Bidiagonalize L in A
2669 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2670 * (RWorkspace: need M)
2671 *
2672  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2673  $ work( itauq ), work( itaup ),
2674  $ work( iwork ), lwork-iwork+1, ierr )
2675 *
2676 * Multiply right vectors bidiagonalizing L by Q in VT
2677 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2678 * (RWorkspace: 0)
2679 *
2680  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2681  $ work( itaup ), vt, ldvt,
2682  $ work( iwork ), lwork-iwork+1, ierr )
2683  irwork = ie + m
2684 *
2685 * Perform bidiagonal QR iteration, computing right
2686 * singular vectors of A in VT
2687 * (CWorkspace: 0)
2688 * (RWorkspace: need BDSPAC)
2689 *
2690  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2691  $ ldvt, cdum, 1, cdum, 1,
2692  $ rwork( irwork ), info )
2693 *
2694  END IF
2695 *
2696  ELSE IF( wntuo ) THEN
2697 *
2698 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2699 * M right singular vectors to be computed in VT and
2700 * M left singular vectors to be overwritten on A
2701 *
2702  IF( lwork.GE.2*m*m+3*m ) THEN
2703 *
2704 * Sufficient workspace for a fast algorithm
2705 *
2706  iu = 1
2707  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2708 *
2709 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2710 *
2711  ldwrku = lda
2712  ir = iu + ldwrku*m
2713  ldwrkr = lda
2714  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2715 *
2716 * WORK(IU) is LDA by M and WORK(IR) is M by M
2717 *
2718  ldwrku = lda
2719  ir = iu + ldwrku*m
2720  ldwrkr = m
2721  ELSE
2722 *
2723 * WORK(IU) is M by M and WORK(IR) is M by M
2724 *
2725  ldwrku = m
2726  ir = iu + ldwrku*m
2727  ldwrkr = m
2728  END IF
2729  itau = ir + ldwrkr*m
2730  iwork = itau + m
2731 *
2732 * Compute A=L*Q
2733 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2734 * (RWorkspace: 0)
2735 *
2736  CALL cgelqf( m, n, a, lda, work( itau ),
2737  $ work( iwork ), lwork-iwork+1, ierr )
2738 *
2739 * Copy L to WORK(IU), zeroing out below it
2740 *
2741  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2742  $ ldwrku )
2743  CALL claset( 'U', m-1, m-1, czero, czero,
2744  $ work( iu+ldwrku ), ldwrku )
2745 *
2746 * Generate Q in A
2747 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2748 * (RWorkspace: 0)
2749 *
2750  CALL cunglq( m, n, m, a, lda, work( itau ),
2751  $ work( iwork ), lwork-iwork+1, ierr )
2752  ie = 1
2753  itauq = itau
2754  itaup = itauq + m
2755  iwork = itaup + m
2756 *
2757 * Bidiagonalize L in WORK(IU), copying result to
2758 * WORK(IR)
2759 * (CWorkspace: need 2*M*M+3*M,
2760 * prefer 2*M*M+2*M+2*M*NB)
2761 * (RWorkspace: need M)
2762 *
2763  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2764  $ rwork( ie ), work( itauq ),
2765  $ work( itaup ), work( iwork ),
2766  $ lwork-iwork+1, ierr )
2767  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
2768  $ work( ir ), ldwrkr )
2769 *
2770 * Generate right bidiagonalizing vectors in WORK(IU)
2771 * (CWorkspace: need 2*M*M+3*M-1,
2772 * prefer 2*M*M+2*M+(M-1)*NB)
2773 * (RWorkspace: 0)
2774 *
2775  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2776  $ work( itaup ), work( iwork ),
2777  $ lwork-iwork+1, ierr )
2778 *
2779 * Generate left bidiagonalizing vectors in WORK(IR)
2780 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2781 * (RWorkspace: 0)
2782 *
2783  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2784  $ work( itauq ), work( iwork ),
2785  $ lwork-iwork+1, ierr )
2786  irwork = ie + m
2787 *
2788 * Perform bidiagonal QR iteration, computing left
2789 * singular vectors of L in WORK(IR) and computing
2790 * right singular vectors of L in WORK(IU)
2791 * (CWorkspace: need 2*M*M)
2792 * (RWorkspace: need BDSPAC)
2793 *
2794  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2795  $ work( iu ), ldwrku, work( ir ),
2796  $ ldwrkr, cdum, 1, rwork( irwork ),
2797  $ info )
2798 *
2799 * Multiply right singular vectors of L in WORK(IU) by
2800 * Q in A, storing result in VT
2801 * (CWorkspace: need M*M)
2802 * (RWorkspace: 0)
2803 *
2804  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2805  $ ldwrku, a, lda, czero, vt, ldvt )
2806 *
2807 * Copy left singular vectors of L to A
2808 * (CWorkspace: need M*M)
2809 * (RWorkspace: 0)
2810 *
2811  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
2812  $ lda )
2813 *
2814  ELSE
2815 *
2816 * Insufficient workspace for a fast algorithm
2817 *
2818  itau = 1
2819  iwork = itau + m
2820 *
2821 * Compute A=L*Q, copying result to VT
2822 * (CWorkspace: need 2*M, prefer M+M*NB)
2823 * (RWorkspace: 0)
2824 *
2825  CALL cgelqf( m, n, a, lda, work( itau ),
2826  $ work( iwork ), lwork-iwork+1, ierr )
2827  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2828 *
2829 * Generate Q in VT
2830 * (CWorkspace: need 2*M, prefer M+M*NB)
2831 * (RWorkspace: 0)
2832 *
2833  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2834  $ work( iwork ), lwork-iwork+1, ierr )
2835  ie = 1
2836  itauq = itau
2837  itaup = itauq + m
2838  iwork = itaup + m
2839 *
2840 * Zero out above L in A
2841 *
2842  CALL claset( 'U', m-1, m-1, czero, czero,
2843  $ a( 1, 2 ), lda )
2844 *
2845 * Bidiagonalize L in A
2846 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2847 * (RWorkspace: need M)
2848 *
2849  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2850  $ work( itauq ), work( itaup ),
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Multiply right vectors bidiagonalizing L by Q in VT
2854 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2855 * (RWorkspace: 0)
2856 *
2857  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2858  $ work( itaup ), vt, ldvt,
2859  $ work( iwork ), lwork-iwork+1, ierr )
2860 *
2861 * Generate left bidiagonalizing vectors of L in A
2862 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2863 * (RWorkspace: 0)
2864 *
2865  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2866  $ work( iwork ), lwork-iwork+1, ierr )
2867  irwork = ie + m
2868 *
2869 * Perform bidiagonal QR iteration, computing left
2870 * singular vectors of A in A and computing right
2871 * singular vectors of A in VT
2872 * (CWorkspace: 0)
2873 * (RWorkspace: need BDSPAC)
2874 *
2875  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2876  $ ldvt, a, lda, cdum, 1,
2877  $ rwork( irwork ), info )
2878 *
2879  END IF
2880 *
2881  ELSE IF( wntuas ) THEN
2882 *
2883 * Path 6t(N much larger than M, JOBU='S' or 'A',
2884 * JOBVT='S')
2885 * M right singular vectors to be computed in VT and
2886 * M left singular vectors to be computed in U
2887 *
2888  IF( lwork.GE.m*m+3*m ) THEN
2889 *
2890 * Sufficient workspace for a fast algorithm
2891 *
2892  iu = 1
2893  IF( lwork.GE.wrkbl+lda*m ) THEN
2894 *
2895 * WORK(IU) is LDA by N
2896 *
2897  ldwrku = lda
2898  ELSE
2899 *
2900 * WORK(IU) is LDA by M
2901 *
2902  ldwrku = m
2903  END IF
2904  itau = iu + ldwrku*m
2905  iwork = itau + m
2906 *
2907 * Compute A=L*Q
2908 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2909 * (RWorkspace: 0)
2910 *
2911  CALL cgelqf( m, n, a, lda, work( itau ),
2912  $ work( iwork ), lwork-iwork+1, ierr )
2913 *
2914 * Copy L to WORK(IU), zeroing out above it
2915 *
2916  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2917  $ ldwrku )
2918  CALL claset( 'U', m-1, m-1, czero, czero,
2919  $ work( iu+ldwrku ), ldwrku )
2920 *
2921 * Generate Q in A
2922 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2923 * (RWorkspace: 0)
2924 *
2925  CALL cunglq( m, n, m, a, lda, work( itau ),
2926  $ work( iwork ), lwork-iwork+1, ierr )
2927  ie = 1
2928  itauq = itau
2929  itaup = itauq + m
2930  iwork = itaup + m
2931 *
2932 * Bidiagonalize L in WORK(IU), copying result to U
2933 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2934 * (RWorkspace: need M)
2935 *
2936  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2937  $ rwork( ie ), work( itauq ),
2938  $ work( itaup ), work( iwork ),
2939  $ lwork-iwork+1, ierr )
2940  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
2941  $ ldu )
2942 *
2943 * Generate right bidiagonalizing vectors in WORK(IU)
2944 * (CWorkspace: need M*M+3*M-1,
2945 * prefer M*M+2*M+(M-1)*NB)
2946 * (RWorkspace: 0)
2947 *
2948  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2949  $ work( itaup ), work( iwork ),
2950  $ lwork-iwork+1, ierr )
2951 *
2952 * Generate left bidiagonalizing vectors in U
2953 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2954 * (RWorkspace: 0)
2955 *
2956  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2957  $ work( iwork ), lwork-iwork+1, ierr )
2958  irwork = ie + m
2959 *
2960 * Perform bidiagonal QR iteration, computing left
2961 * singular vectors of L in U and computing right
2962 * singular vectors of L in WORK(IU)
2963 * (CWorkspace: need M*M)
2964 * (RWorkspace: need BDSPAC)
2965 *
2966  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2967  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2968  $ rwork( irwork ), info )
2969 *
2970 * Multiply right singular vectors of L in WORK(IU) by
2971 * Q in A, storing result in VT
2972 * (CWorkspace: need M*M)
2973 * (RWorkspace: 0)
2974 *
2975  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2976  $ ldwrku, a, lda, czero, vt, ldvt )
2977 *
2978  ELSE
2979 *
2980 * Insufficient workspace for a fast algorithm
2981 *
2982  itau = 1
2983  iwork = itau + m
2984 *
2985 * Compute A=L*Q, copying result to VT
2986 * (CWorkspace: need 2*M, prefer M+M*NB)
2987 * (RWorkspace: 0)
2988 *
2989  CALL cgelqf( m, n, a, lda, work( itau ),
2990  $ work( iwork ), lwork-iwork+1, ierr )
2991  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2992 *
2993 * Generate Q in VT
2994 * (CWorkspace: need 2*M, prefer M+M*NB)
2995 * (RWorkspace: 0)
2996 *
2997  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2998  $ work( iwork ), lwork-iwork+1, ierr )
2999 *
3000 * Copy L to U, zeroing out above it
3001 *
3002  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3003  CALL claset( 'U', m-1, m-1, czero, czero,
3004  $ u( 1, 2 ), ldu )
3005  ie = 1
3006  itauq = itau
3007  itaup = itauq + m
3008  iwork = itaup + m
3009 *
3010 * Bidiagonalize L in U
3011 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3012 * (RWorkspace: need M)
3013 *
3014  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3015  $ work( itauq ), work( itaup ),
3016  $ work( iwork ), lwork-iwork+1, ierr )
3017 *
3018 * Multiply right bidiagonalizing vectors in U by Q
3019 * in VT
3020 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3021 * (RWorkspace: 0)
3022 *
3023  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3024  $ work( itaup ), vt, ldvt,
3025  $ work( iwork ), lwork-iwork+1, ierr )
3026 *
3027 * Generate left bidiagonalizing vectors in U
3028 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3029 * (RWorkspace: 0)
3030 *
3031  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3032  $ work( iwork ), lwork-iwork+1, ierr )
3033  irwork = ie + m
3034 *
3035 * Perform bidiagonal QR iteration, computing left
3036 * singular vectors of A in U and computing right
3037 * singular vectors of A in VT
3038 * (CWorkspace: 0)
3039 * (RWorkspace: need BDSPAC)
3040 *
3041  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3042  $ ldvt, u, ldu, cdum, 1,
3043  $ rwork( irwork ), info )
3044 *
3045  END IF
3046 *
3047  END IF
3048 *
3049  ELSE IF( wntva ) THEN
3050 *
3051  IF( wntun ) THEN
3052 *
3053 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3054 * N right singular vectors to be computed in VT and
3055 * no left singular vectors to be computed
3056 *
3057  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3058 *
3059 * Sufficient workspace for a fast algorithm
3060 *
3061  ir = 1
3062  IF( lwork.GE.wrkbl+lda*m ) THEN
3063 *
3064 * WORK(IR) is LDA by M
3065 *
3066  ldwrkr = lda
3067  ELSE
3068 *
3069 * WORK(IR) is M by M
3070 *
3071  ldwrkr = m
3072  END IF
3073  itau = ir + ldwrkr*m
3074  iwork = itau + m
3075 *
3076 * Compute A=L*Q, copying result to VT
3077 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3078 * (RWorkspace: 0)
3079 *
3080  CALL cgelqf( m, n, a, lda, work( itau ),
3081  $ work( iwork ), lwork-iwork+1, ierr )
3082  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3083 *
3084 * Copy L to WORK(IR), zeroing out above it
3085 *
3086  CALL clacpy( 'L', m, m, a, lda, work( ir ),
3087  $ ldwrkr )
3088  CALL claset( 'U', m-1, m-1, czero, czero,
3089  $ work( ir+ldwrkr ), ldwrkr )
3090 *
3091 * Generate Q in VT
3092 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3093 * (RWorkspace: 0)
3094 *
3095  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3096  $ work( iwork ), lwork-iwork+1, ierr )
3097  ie = 1
3098  itauq = itau
3099  itaup = itauq + m
3100  iwork = itaup + m
3101 *
3102 * Bidiagonalize L in WORK(IR)
3103 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3104 * (RWorkspace: need M)
3105 *
3106  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
3107  $ rwork( ie ), work( itauq ),
3108  $ work( itaup ), work( iwork ),
3109  $ lwork-iwork+1, ierr )
3110 *
3111 * Generate right bidiagonalizing vectors in WORK(IR)
3112 * (CWorkspace: need M*M+3*M-1,
3113 * prefer M*M+2*M+(M-1)*NB)
3114 * (RWorkspace: 0)
3115 *
3116  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
3117  $ work( itaup ), work( iwork ),
3118  $ lwork-iwork+1, ierr )
3119  irwork = ie + m
3120 *
3121 * Perform bidiagonal QR iteration, computing right
3122 * singular vectors of L in WORK(IR)
3123 * (CWorkspace: need M*M)
3124 * (RWorkspace: need BDSPAC)
3125 *
3126  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3127  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3128  $ rwork( irwork ), info )
3129 *
3130 * Multiply right singular vectors of L in WORK(IR) by
3131 * Q in VT, storing result in A
3132 * (CWorkspace: need M*M)
3133 * (RWorkspace: 0)
3134 *
3135  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
3136  $ ldwrkr, vt, ldvt, czero, a, lda )
3137 *
3138 * Copy right singular vectors of A from A to VT
3139 *
3140  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3141 *
3142  ELSE
3143 *
3144 * Insufficient workspace for a fast algorithm
3145 *
3146  itau = 1
3147  iwork = itau + m
3148 *
3149 * Compute A=L*Q, copying result to VT
3150 * (CWorkspace: need 2*M, prefer M+M*NB)
3151 * (RWorkspace: 0)
3152 *
3153  CALL cgelqf( m, n, a, lda, work( itau ),
3154  $ work( iwork ), lwork-iwork+1, ierr )
3155  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3156 *
3157 * Generate Q in VT
3158 * (CWorkspace: need M+N, prefer M+N*NB)
3159 * (RWorkspace: 0)
3160 *
3161  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3162  $ work( iwork ), lwork-iwork+1, ierr )
3163  ie = 1
3164  itauq = itau
3165  itaup = itauq + m
3166  iwork = itaup + m
3167 *
3168 * Zero out above L in A
3169 *
3170  CALL claset( 'U', m-1, m-1, czero, czero,
3171  $ a( 1, 2 ), lda )
3172 *
3173 * Bidiagonalize L in A
3174 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3175 * (RWorkspace: need M)
3176 *
3177  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3178  $ work( itauq ), work( itaup ),
3179  $ work( iwork ), lwork-iwork+1, ierr )
3180 *
3181 * Multiply right bidiagonalizing vectors in A by Q
3182 * in VT
3183 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3184 * (RWorkspace: 0)
3185 *
3186  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3187  $ work( itaup ), vt, ldvt,
3188  $ work( iwork ), lwork-iwork+1, ierr )
3189  irwork = ie + m
3190 *
3191 * Perform bidiagonal QR iteration, computing right
3192 * singular vectors of A in VT
3193 * (CWorkspace: 0)
3194 * (RWorkspace: need BDSPAC)
3195 *
3196  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3197  $ ldvt, cdum, 1, cdum, 1,
3198  $ rwork( irwork ), info )
3199 *
3200  END IF
3201 *
3202  ELSE IF( wntuo ) THEN
3203 *
3204 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3205 * N right singular vectors to be computed in VT and
3206 * M left singular vectors to be overwritten on A
3207 *
3208  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3209 *
3210 * Sufficient workspace for a fast algorithm
3211 *
3212  iu = 1
3213  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3214 *
3215 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3216 *
3217  ldwrku = lda
3218  ir = iu + ldwrku*m
3219  ldwrkr = lda
3220  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3221 *
3222 * WORK(IU) is LDA by M and WORK(IR) is M by M
3223 *
3224  ldwrku = lda
3225  ir = iu + ldwrku*m
3226  ldwrkr = m
3227  ELSE
3228 *
3229 * WORK(IU) is M by M and WORK(IR) is M by M
3230 *
3231  ldwrku = m
3232  ir = iu + ldwrku*m
3233  ldwrkr = m
3234  END IF
3235  itau = ir + ldwrkr*m
3236  iwork = itau + m
3237 *
3238 * Compute A=L*Q, copying result to VT
3239 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3240 * (RWorkspace: 0)
3241 *
3242  CALL cgelqf( m, n, a, lda, work( itau ),
3243  $ work( iwork ), lwork-iwork+1, ierr )
3244  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3245 *
3246 * Generate Q in VT
3247 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3248 * (RWorkspace: 0)
3249 *
3250  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3251  $ work( iwork ), lwork-iwork+1, ierr )
3252 *
3253 * Copy L to WORK(IU), zeroing out above it
3254 *
3255  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3256  $ ldwrku )
3257  CALL claset( 'U', m-1, m-1, czero, czero,
3258  $ work( iu+ldwrku ), ldwrku )
3259  ie = 1
3260  itauq = itau
3261  itaup = itauq + m
3262  iwork = itaup + m
3263 *
3264 * Bidiagonalize L in WORK(IU), copying result to
3265 * WORK(IR)
3266 * (CWorkspace: need 2*M*M+3*M,
3267 * prefer 2*M*M+2*M+2*M*NB)
3268 * (RWorkspace: need M)
3269 *
3270  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3271  $ rwork( ie ), work( itauq ),
3272  $ work( itaup ), work( iwork ),
3273  $ lwork-iwork+1, ierr )
3274  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
3275  $ work( ir ), ldwrkr )
3276 *
3277 * Generate right bidiagonalizing vectors in WORK(IU)
3278 * (CWorkspace: need 2*M*M+3*M-1,
3279 * prefer 2*M*M+2*M+(M-1)*NB)
3280 * (RWorkspace: 0)
3281 *
3282  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3283  $ work( itaup ), work( iwork ),
3284  $ lwork-iwork+1, ierr )
3285 *
3286 * Generate left bidiagonalizing vectors in WORK(IR)
3287 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3288 * (RWorkspace: 0)
3289 *
3290  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3291  $ work( itauq ), work( iwork ),
3292  $ lwork-iwork+1, ierr )
3293  irwork = ie + m
3294 *
3295 * Perform bidiagonal QR iteration, computing left
3296 * singular vectors of L in WORK(IR) and computing
3297 * right singular vectors of L in WORK(IU)
3298 * (CWorkspace: need 2*M*M)
3299 * (RWorkspace: need BDSPAC)
3300 *
3301  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3302  $ work( iu ), ldwrku, work( ir ),
3303  $ ldwrkr, cdum, 1, rwork( irwork ),
3304  $ info )
3305 *
3306 * Multiply right singular vectors of L in WORK(IU) by
3307 * Q in VT, storing result in A
3308 * (CWorkspace: need M*M)
3309 * (RWorkspace: 0)
3310 *
3311  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3312  $ ldwrku, vt, ldvt, czero, a, lda )
3313 *
3314 * Copy right singular vectors of A from A to VT
3315 *
3316  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3317 *
3318 * Copy left singular vectors of A from WORK(IR) to A
3319 *
3320  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
3321  $ lda )
3322 *
3323  ELSE
3324 *
3325 * Insufficient workspace for a fast algorithm
3326 *
3327  itau = 1
3328  iwork = itau + m
3329 *
3330 * Compute A=L*Q, copying result to VT
3331 * (CWorkspace: need 2*M, prefer M+M*NB)
3332 * (RWorkspace: 0)
3333 *
3334  CALL cgelqf( m, n, a, lda, work( itau ),
3335  $ work( iwork ), lwork-iwork+1, ierr )
3336  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3337 *
3338 * Generate Q in VT
3339 * (CWorkspace: need M+N, prefer M+N*NB)
3340 * (RWorkspace: 0)
3341 *
3342  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3343  $ work( iwork ), lwork-iwork+1, ierr )
3344  ie = 1
3345  itauq = itau
3346  itaup = itauq + m
3347  iwork = itaup + m
3348 *
3349 * Zero out above L in A
3350 *
3351  CALL claset( 'U', m-1, m-1, czero, czero,
3352  $ a( 1, 2 ), lda )
3353 *
3354 * Bidiagonalize L in A
3355 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3356 * (RWorkspace: need M)
3357 *
3358  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3359  $ work( itauq ), work( itaup ),
3360  $ work( iwork ), lwork-iwork+1, ierr )
3361 *
3362 * Multiply right bidiagonalizing vectors in A by Q
3363 * in VT
3364 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3365 * (RWorkspace: 0)
3366 *
3367  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3368  $ work( itaup ), vt, ldvt,
3369  $ work( iwork ), lwork-iwork+1, ierr )
3370 *
3371 * Generate left bidiagonalizing vectors in A
3372 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3373 * (RWorkspace: 0)
3374 *
3375  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
3376  $ work( iwork ), lwork-iwork+1, ierr )
3377  irwork = ie + m
3378 *
3379 * Perform bidiagonal QR iteration, computing left
3380 * singular vectors of A in A and computing right
3381 * singular vectors of A in VT
3382 * (CWorkspace: 0)
3383 * (RWorkspace: need BDSPAC)
3384 *
3385  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3386  $ ldvt, a, lda, cdum, 1,
3387  $ rwork( irwork ), info )
3388 *
3389  END IF
3390 *
3391  ELSE IF( wntuas ) THEN
3392 *
3393 * Path 9t(N much larger than M, JOBU='S' or 'A',
3394 * JOBVT='A')
3395 * N right singular vectors to be computed in VT and
3396 * M left singular vectors to be computed in U
3397 *
3398  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3399 *
3400 * Sufficient workspace for a fast algorithm
3401 *
3402  iu = 1
3403  IF( lwork.GE.wrkbl+lda*m ) THEN
3404 *
3405 * WORK(IU) is LDA by M
3406 *
3407  ldwrku = lda
3408  ELSE
3409 *
3410 * WORK(IU) is M by M
3411 *
3412  ldwrku = m
3413  END IF
3414  itau = iu + ldwrku*m
3415  iwork = itau + m
3416 *
3417 * Compute A=L*Q, copying result to VT
3418 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3419 * (RWorkspace: 0)
3420 *
3421  CALL cgelqf( m, n, a, lda, work( itau ),
3422  $ work( iwork ), lwork-iwork+1, ierr )
3423  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3424 *
3425 * Generate Q in VT
3426 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3427 * (RWorkspace: 0)
3428 *
3429  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3430  $ work( iwork ), lwork-iwork+1, ierr )
3431 *
3432 * Copy L to WORK(IU), zeroing out above it
3433 *
3434  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3435  $ ldwrku )
3436  CALL claset( 'U', m-1, m-1, czero, czero,
3437  $ work( iu+ldwrku ), ldwrku )
3438  ie = 1
3439  itauq = itau
3440  itaup = itauq + m
3441  iwork = itaup + m
3442 *
3443 * Bidiagonalize L in WORK(IU), copying result to U
3444 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3445 * (RWorkspace: need M)
3446 *
3447  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3448  $ rwork( ie ), work( itauq ),
3449  $ work( itaup ), work( iwork ),
3450  $ lwork-iwork+1, ierr )
3451  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
3452  $ ldu )
3453 *
3454 * Generate right bidiagonalizing vectors in WORK(IU)
3455 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3456 * (RWorkspace: 0)
3457 *
3458  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3459  $ work( itaup ), work( iwork ),
3460  $ lwork-iwork+1, ierr )
3461 *
3462 * Generate left bidiagonalizing vectors in U
3463 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3464 * (RWorkspace: 0)
3465 *
3466  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3467  $ work( iwork ), lwork-iwork+1, ierr )
3468  irwork = ie + m
3469 *
3470 * Perform bidiagonal QR iteration, computing left
3471 * singular vectors of L in U and computing right
3472 * singular vectors of L in WORK(IU)
3473 * (CWorkspace: need M*M)
3474 * (RWorkspace: need BDSPAC)
3475 *
3476  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3477  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3478  $ rwork( irwork ), info )
3479 *
3480 * Multiply right singular vectors of L in WORK(IU) by
3481 * Q in VT, storing result in A
3482 * (CWorkspace: need M*M)
3483 * (RWorkspace: 0)
3484 *
3485  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3486  $ ldwrku, vt, ldvt, czero, a, lda )
3487 *
3488 * Copy right singular vectors of A from A to VT
3489 *
3490  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3491 *
3492  ELSE
3493 *
3494 * Insufficient workspace for a fast algorithm
3495 *
3496  itau = 1
3497  iwork = itau + m
3498 *
3499 * Compute A=L*Q, copying result to VT
3500 * (CWorkspace: need 2*M, prefer M+M*NB)
3501 * (RWorkspace: 0)
3502 *
3503  CALL cgelqf( m, n, a, lda, work( itau ),
3504  $ work( iwork ), lwork-iwork+1, ierr )
3505  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3506 *
3507 * Generate Q in VT
3508 * (CWorkspace: need M+N, prefer M+N*NB)
3509 * (RWorkspace: 0)
3510 *
3511  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3512  $ work( iwork ), lwork-iwork+1, ierr )
3513 *
3514 * Copy L to U, zeroing out above it
3515 *
3516  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3517  CALL claset( 'U', m-1, m-1, czero, czero,
3518  $ u( 1, 2 ), ldu )
3519  ie = 1
3520  itauq = itau
3521  itaup = itauq + m
3522  iwork = itaup + m
3523 *
3524 * Bidiagonalize L in U
3525 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3526 * (RWorkspace: need M)
3527 *
3528  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3529  $ work( itauq ), work( itaup ),
3530  $ work( iwork ), lwork-iwork+1, ierr )
3531 *
3532 * Multiply right bidiagonalizing vectors in U by Q
3533 * in VT
3534 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3535 * (RWorkspace: 0)
3536 *
3537  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3538  $ work( itaup ), vt, ldvt,
3539  $ work( iwork ), lwork-iwork+1, ierr )
3540 *
3541 * Generate left bidiagonalizing vectors in U
3542 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3543 * (RWorkspace: 0)
3544 *
3545  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3546  $ work( iwork ), lwork-iwork+1, ierr )
3547  irwork = ie + m
3548 *
3549 * Perform bidiagonal QR iteration, computing left
3550 * singular vectors of A in U and computing right
3551 * singular vectors of A in VT
3552 * (CWorkspace: 0)
3553 * (RWorkspace: need BDSPAC)
3554 *
3555  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3556  $ ldvt, u, ldu, cdum, 1,
3557  $ rwork( irwork ), info )
3558 *
3559  END IF
3560 *
3561  END IF
3562 *
3563  END IF
3564 *
3565  ELSE
3566 *
3567 * N .LT. MNTHR
3568 *
3569 * Path 10t(N greater than M, but not much larger)
3570 * Reduce to bidiagonal form without LQ decomposition
3571 *
3572  ie = 1
3573  itauq = 1
3574  itaup = itauq + m
3575  iwork = itaup + m
3576 *
3577 * Bidiagonalize A
3578 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3579 * (RWorkspace: M)
3580 *
3581  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3582  $ work( itaup ), work( iwork ), lwork-iwork+1,
3583  $ ierr )
3584  IF( wntuas ) THEN
3585 *
3586 * If left singular vectors desired in U, copy result to U
3587 * and generate left bidiagonalizing vectors in U
3588 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3589 * (RWorkspace: 0)
3590 *
3591  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3592  CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3593  $ work( iwork ), lwork-iwork+1, ierr )
3594  END IF
3595  IF( wntvas ) THEN
3596 *
3597 * If right singular vectors desired in VT, copy result to
3598 * VT and generate right bidiagonalizing vectors in VT
3599 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3600 * (RWorkspace: 0)
3601 *
3602  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3603  IF( wntva )
3604  $ nrvt = n
3605  IF( wntvs )
3606  $ nrvt = m
3607  CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3608  $ work( iwork ), lwork-iwork+1, ierr )
3609  END IF
3610  IF( wntuo ) THEN
3611 *
3612 * If left singular vectors desired in A, generate left
3613 * bidiagonalizing vectors in A
3614 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3615 * (RWorkspace: 0)
3616 *
3617  CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),
3618  $ work( iwork ), lwork-iwork+1, ierr )
3619  END IF
3620  IF( wntvo ) THEN
3621 *
3622 * If right singular vectors desired in A, generate right
3623 * bidiagonalizing vectors in A
3624 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3625 * (RWorkspace: 0)
3626 *
3627  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
3628  $ work( iwork ), lwork-iwork+1, ierr )
3629  END IF
3630  irwork = ie + m
3631  IF( wntuas .OR. wntuo )
3632  $ nru = m
3633  IF( wntun )
3634  $ nru = 0
3635  IF( wntvas .OR. wntvo )
3636  $ ncvt = n
3637  IF( wntvn )
3638  $ ncvt = 0
3639  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3640 *
3641 * Perform bidiagonal QR iteration, if desired, computing
3642 * left singular vectors in U and computing right singular
3643 * vectors in VT
3644 * (CWorkspace: 0)
3645 * (RWorkspace: need BDSPAC)
3646 *
3647  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3648  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3649  $ info )
3650  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3651 *
3652 * Perform bidiagonal QR iteration, if desired, computing
3653 * left singular vectors in U and computing right singular
3654 * vectors in A
3655 * (CWorkspace: 0)
3656 * (RWorkspace: need BDSPAC)
3657 *
3658  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3659  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3660  $ info )
3661  ELSE
3662 *
3663 * Perform bidiagonal QR iteration, if desired, computing
3664 * left singular vectors in A and computing right singular
3665 * vectors in VT
3666 * (CWorkspace: 0)
3667 * (RWorkspace: need BDSPAC)
3668 *
3669  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3670  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3671  $ info )
3672  END IF
3673 *
3674  END IF
3675 *
3676  END IF
3677 *
3678 * Undo scaling if necessary
3679 *
3680  IF( iscl.EQ.1 ) THEN
3681  IF( anrm.GT.bignum )
3682  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3683  $ ierr )
3684  IF( info.NE.0 .AND. anrm.GT.bignum )
3685  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3686  $ rwork( ie ), minmn, ierr )
3687  IF( anrm.LT.smlnum )
3688  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3689  $ ierr )
3690  IF( info.NE.0 .AND. anrm.LT.smlnum )
3691  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3692  $ rwork( ie ), minmn, ierr )
3693  END IF
3694 *
3695 * Return optimal workspace in WORK(1)
3696 *
3697  work( 1 ) = maxwrk
3698 *
3699  RETURN
3700 *
3701 * End of CGESVD
3702 *
3703  END
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:143
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine cungbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGBR
Definition: cungbr.f:157
subroutine cgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGEQRF
Definition: cgeqrf.f:145
subroutine cgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
CGEBRD
Definition: cgebrd.f:206
subroutine cgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGELQF
Definition: cgelqf.f:143
subroutine cgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO)
CGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: cgesvd.f:214
subroutine claset(UPLO, M, N, ALPHA, BETA, A, LDA)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: claset.f:106
subroutine clascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: clascl.f:143
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
Definition: clacpy.f:103
subroutine cunglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGLQ
Definition: cunglq.f:127
subroutine cunmbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMBR
Definition: cunmbr.f:197
subroutine cbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, RWORK, INFO)
CBDSQR
Definition: cbdsqr.f:222
subroutine cungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGQR
Definition: cungqr.f:128