LAPACK  3.7.1
LAPACK: Linear Algebra PACKage
cgesvd.f
Go to the documentation of this file.
1 *> \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL RWORK( * ), S( * )
30 * COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> CGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is REAL array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is REAL array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if CBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \date April 2012
210 *
211 *> \ingroup complexGEsing
212 *
213 * =====================================================================
214  SUBROUTINE cgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
215  $ WORK, LWORK, RWORK, INFO )
216 *
217 * -- LAPACK driver routine (version 3.7.0) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220 * April 2012
221 *
222 * .. Scalar Arguments ..
223  CHARACTER JOBU, JOBVT
224  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
225 * ..
226 * .. Array Arguments ..
227  REAL RWORK( * ), S( * )
228  COMPLEX A( lda, * ), U( ldu, * ), VT( ldvt, * ),
229  $ work( * )
230 * ..
231 *
232 * =====================================================================
233 *
234 * .. Parameters ..
235  COMPLEX CZERO, CONE
236  parameter( czero = ( 0.0e0, 0.0e0 ),
237  $ cone = ( 1.0e0, 0.0e0 ) )
238  REAL ZERO, ONE
239  parameter( zero = 0.0e0, one = 1.0e0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
243  $ wntva, wntvas, wntvn, wntvo, wntvs
244  INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
245  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
246  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
247  $ nrvt, wrkbl
248  INTEGER LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M,
249  $ lwork_cgebrd, lwork_cungbr_p, lwork_cungbr_q,
250  $ lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m
251  REAL ANRM, BIGNUM, EPS, SMLNUM
252 * ..
253 * .. Local Arrays ..
254  REAL DUM( 1 )
255  COMPLEX CDUM( 1 )
256 * ..
257 * .. External Subroutines ..
258  EXTERNAL cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf, clacpy,
260  $ slascl, xerbla
261 * ..
262 * .. External Functions ..
263  LOGICAL LSAME
264  INTEGER ILAENV
265  REAL CLANGE, SLAMCH
266  EXTERNAL lsame, ilaenv, clange, slamch
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, min, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input arguments
274 *
275  info = 0
276  minmn = min( m, n )
277  wntua = lsame( jobu, 'A' )
278  wntus = lsame( jobu, 'S' )
279  wntuas = wntua .OR. wntus
280  wntuo = lsame( jobu, 'O' )
281  wntun = lsame( jobu, 'N' )
282  wntva = lsame( jobvt, 'A' )
283  wntvs = lsame( jobvt, 'S' )
284  wntvas = wntva .OR. wntvs
285  wntvo = lsame( jobvt, 'O' )
286  wntvn = lsame( jobvt, 'N' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
290  info = -1
291  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
292  $ ( wntvo .AND. wntuo ) ) THEN
293  info = -2
294  ELSE IF( m.LT.0 ) THEN
295  info = -3
296  ELSE IF( n.LT.0 ) THEN
297  info = -4
298  ELSE IF( lda.LT.max( 1, m ) ) THEN
299  info = -6
300  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
301  info = -9
302  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
303  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
304  info = -11
305  END IF
306 *
307 * Compute workspace
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  IF( m.GE.n .AND. minmn.GT.0 ) THEN
319 *
320 * Space needed for ZBDSQR is BDSPAC = 5*N
321 *
322  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
323 * Compute space needed for CGEQRF
324  CALL cgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
325  lwork_cgeqrf = int( cdum(1) )
326 * Compute space needed for CUNGQR
327  CALL cungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
328  lwork_cungqr_n = int( cdum(1) )
329  CALL cungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
330  lwork_cungqr_m = int( cdum(1) )
331 * Compute space needed for CGEBRD
332  CALL cgebrd( n, n, a, lda, s, dum(1), cdum(1),
333  $ cdum(1), cdum(1), -1, ierr )
334  lwork_cgebrd = int( cdum(1) )
335 * Compute space needed for CUNGBR
336  CALL cungbr( 'P', n, n, n, a, lda, cdum(1),
337  $ cdum(1), -1, ierr )
338  lwork_cungbr_p = int( cdum(1) )
339  CALL cungbr( 'Q', n, n, n, a, lda, cdum(1),
340  $ cdum(1), -1, ierr )
341  lwork_cungbr_q = int( cdum(1) )
342 *
343  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
344  IF( m.GE.mnthr ) THEN
345  IF( wntun ) THEN
346 *
347 * Path 1 (M much larger than N, JOBU='N')
348 *
349  maxwrk = n + lwork_cgeqrf
350  maxwrk = max( maxwrk, 2*n+lwork_cgebrd )
351  IF( wntvo .OR. wntvas )
352  $ maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
353  minwrk = 3*n
354  ELSE IF( wntuo .AND. wntvn ) THEN
355 *
356 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
357 *
358  wrkbl = n + lwork_cgeqrf
359  wrkbl = max( wrkbl, n+lwork_cungqr_n )
360  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
361  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
362  maxwrk = max( n*n+wrkbl, n*n+m*n )
363  minwrk = 2*n + m
364  ELSE IF( wntuo .AND. wntvas ) THEN
365 *
366 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
367 * 'A')
368 *
369  wrkbl = n + lwork_cgeqrf
370  wrkbl = max( wrkbl, n+lwork_cungqr_n )
371  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
372  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
373  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
374  maxwrk = max( n*n+wrkbl, n*n+m*n )
375  minwrk = 2*n + m
376  ELSE IF( wntus .AND. wntvn ) THEN
377 *
378 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
379 *
380  wrkbl = n + lwork_cgeqrf
381  wrkbl = max( wrkbl, n+lwork_cungqr_n )
382  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
383  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
384  maxwrk = n*n + wrkbl
385  minwrk = 2*n + m
386  ELSE IF( wntus .AND. wntvo ) THEN
387 *
388 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
389 *
390  wrkbl = n + lwork_cgeqrf
391  wrkbl = max( wrkbl, n+lwork_cungqr_n )
392  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
393  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
394  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
395  maxwrk = 2*n*n + wrkbl
396  minwrk = 2*n + m
397  ELSE IF( wntus .AND. wntvas ) THEN
398 *
399 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
400 * 'A')
401 *
402  wrkbl = n + lwork_cgeqrf
403  wrkbl = max( wrkbl, n+lwork_cungqr_n )
404  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
405  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
406  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
407  maxwrk = n*n + wrkbl
408  minwrk = 2*n + m
409  ELSE IF( wntua .AND. wntvn ) THEN
410 *
411 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
412 *
413  wrkbl = n + lwork_cgeqrf
414  wrkbl = max( wrkbl, n+lwork_cungqr_m )
415  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
416  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
417  maxwrk = n*n + wrkbl
418  minwrk = 2*n + m
419  ELSE IF( wntua .AND. wntvo ) THEN
420 *
421 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
422 *
423  wrkbl = n + lwork_cgeqrf
424  wrkbl = max( wrkbl, n+lwork_cungqr_m )
425  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
426  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
427  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = 2*n + m
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_cgeqrf
436  wrkbl = max( wrkbl, n+lwork_cungqr_m )
437  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
438  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
439  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
440  maxwrk = n*n + wrkbl
441  minwrk = 2*n + m
442  END IF
443  ELSE
444 *
445 * Path 10 (M at least N, but not much larger)
446 *
447  CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
448  $ cdum(1), cdum(1), -1, ierr )
449  lwork_cgebrd = int( cdum(1) )
450  maxwrk = 2*n + lwork_cgebrd
451  IF( wntus .OR. wntuo ) THEN
452  CALL cungbr( 'Q', m, n, n, a, lda, cdum(1),
453  $ cdum(1), -1, ierr )
454  lwork_cungbr_q = int( cdum(1) )
455  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
456  END IF
457  IF( wntua ) THEN
458  CALL cungbr( 'Q', m, m, n, a, lda, cdum(1),
459  $ cdum(1), -1, ierr )
460  lwork_cungbr_q = int( cdum(1) )
461  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
462  END IF
463  IF( .NOT.wntvn ) THEN
464  maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
465  END IF
466  minwrk = 2*n + m
467  END IF
468  ELSE IF( minmn.GT.0 ) THEN
469 *
470 * Space needed for CBDSQR is BDSPAC = 5*M
471 *
472  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
473 * Compute space needed for CGELQF
474  CALL cgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
475  lwork_cgelqf = int( cdum(1) )
476 * Compute space needed for CUNGLQ
477  CALL cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
478  $ ierr )
479  lwork_cunglq_n = int( cdum(1) )
480  CALL cunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
481  lwork_cunglq_m = int( cdum(1) )
482 * Compute space needed for CGEBRD
483  CALL cgebrd( m, m, a, lda, s, dum(1), cdum(1),
484  $ cdum(1), cdum(1), -1, ierr )
485  lwork_cgebrd = int( cdum(1) )
486 * Compute space needed for CUNGBR P
487  CALL cungbr( 'P', m, m, m, a, n, cdum(1),
488  $ cdum(1), -1, ierr )
489  lwork_cungbr_p = int( cdum(1) )
490 * Compute space needed for CUNGBR Q
491  CALL cungbr( 'Q', m, m, m, a, n, cdum(1),
492  $ cdum(1), -1, ierr )
493  lwork_cungbr_q = int( cdum(1) )
494  IF( n.GE.mnthr ) THEN
495  IF( wntvn ) THEN
496 *
497 * Path 1t(N much larger than M, JOBVT='N')
498 *
499  maxwrk = m + lwork_cgelqf
500  maxwrk = max( maxwrk, 2*m+lwork_cgebrd )
501  IF( wntuo .OR. wntuas )
502  $ maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
503  minwrk = 3*m
504  ELSE IF( wntvo .AND. wntun ) THEN
505 *
506 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507 *
508  wrkbl = m + lwork_cgelqf
509  wrkbl = max( wrkbl, m+lwork_cunglq_m )
510  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
511  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
512  maxwrk = max( m*m+wrkbl, m*m+m*n )
513  minwrk = 2*m + n
514  ELSE IF( wntvo .AND. wntuas ) THEN
515 *
516 * Path 3t(N much larger than M, JOBU='S' or 'A',
517 * JOBVT='O')
518 *
519  wrkbl = m + lwork_cgelqf
520  wrkbl = max( wrkbl, m+lwork_cunglq_m )
521  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
522  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
523  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
524  maxwrk = max( m*m+wrkbl, m*m+m*n )
525  minwrk = 2*m + n
526  ELSE IF( wntvs .AND. wntun ) THEN
527 *
528 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
529 *
530  wrkbl = m + lwork_cgelqf
531  wrkbl = max( wrkbl, m+lwork_cunglq_m )
532  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
533  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
534  maxwrk = m*m + wrkbl
535  minwrk = 2*m + n
536  ELSE IF( wntvs .AND. wntuo ) THEN
537 *
538 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
539 *
540  wrkbl = m + lwork_cgelqf
541  wrkbl = max( wrkbl, m+lwork_cunglq_m )
542  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
543  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
544  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
545  maxwrk = 2*m*m + wrkbl
546  minwrk = 2*m + n
547  ELSE IF( wntvs .AND. wntuas ) THEN
548 *
549 * Path 6t(N much larger than M, JOBU='S' or 'A',
550 * JOBVT='S')
551 *
552  wrkbl = m + lwork_cgelqf
553  wrkbl = max( wrkbl, m+lwork_cunglq_m )
554  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
555  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
556  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
557  maxwrk = m*m + wrkbl
558  minwrk = 2*m + n
559  ELSE IF( wntva .AND. wntun ) THEN
560 *
561 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
562 *
563  wrkbl = m + lwork_cgelqf
564  wrkbl = max( wrkbl, m+lwork_cunglq_n )
565  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
566  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
567  maxwrk = m*m + wrkbl
568  minwrk = 2*m + n
569  ELSE IF( wntva .AND. wntuo ) THEN
570 *
571 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
572 *
573  wrkbl = m + lwork_cgelqf
574  wrkbl = max( wrkbl, m+lwork_cunglq_n )
575  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
576  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
577  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
578  maxwrk = 2*m*m + wrkbl
579  minwrk = 2*m + n
580  ELSE IF( wntva .AND. wntuas ) THEN
581 *
582 * Path 9t(N much larger than M, JOBU='S' or 'A',
583 * JOBVT='A')
584 *
585  wrkbl = m + lwork_cgelqf
586  wrkbl = max( wrkbl, m+lwork_cunglq_n )
587  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
588  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
589  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
590  maxwrk = m*m + wrkbl
591  minwrk = 2*m + n
592  END IF
593  ELSE
594 *
595 * Path 10t(N greater than M, but not much larger)
596 *
597  CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
598  $ cdum(1), cdum(1), -1, ierr )
599  lwork_cgebrd = int( cdum(1) )
600  maxwrk = 2*m + lwork_cgebrd
601  IF( wntvs .OR. wntvo ) THEN
602 * Compute space needed for CUNGBR P
603  CALL cungbr( 'P', m, n, m, a, n, cdum(1),
604  $ cdum(1), -1, ierr )
605  lwork_cungbr_p = int( cdum(1) )
606  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
607  END IF
608  IF( wntva ) THEN
609  CALL cungbr( 'P', n, n, m, a, n, cdum(1),
610  $ cdum(1), -1, ierr )
611  lwork_cungbr_p = int( cdum(1) )
612  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
613  END IF
614  IF( .NOT.wntun ) THEN
615  maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
616  END IF
617  minwrk = 2*m + n
618  END IF
619  END IF
620  maxwrk = max( minwrk, maxwrk )
621  work( 1 ) = maxwrk
622 *
623  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
624  info = -13
625  END IF
626  END IF
627 *
628  IF( info.NE.0 ) THEN
629  CALL xerbla( 'CGESVD', -info )
630  RETURN
631  ELSE IF( lquery ) THEN
632  RETURN
633  END IF
634 *
635 * Quick return if possible
636 *
637  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
638  RETURN
639  END IF
640 *
641 * Get machine constants
642 *
643  eps = slamch( 'P' )
644  smlnum = sqrt( slamch( 'S' ) ) / eps
645  bignum = one / smlnum
646 *
647 * Scale A if max element outside range [SMLNUM,BIGNUM]
648 *
649  anrm = clange( 'M', m, n, a, lda, dum )
650  iscl = 0
651  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
652  iscl = 1
653  CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
654  ELSE IF( anrm.GT.bignum ) THEN
655  iscl = 1
656  CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
657  END IF
658 *
659  IF( m.GE.n ) THEN
660 *
661 * A has at least as many rows as columns. If A has sufficiently
662 * more rows than columns, first reduce using the QR
663 * decomposition (if sufficient workspace available)
664 *
665  IF( m.GE.mnthr ) THEN
666 *
667  IF( wntun ) THEN
668 *
669 * Path 1 (M much larger than N, JOBU='N')
670 * No left singular vectors to be computed
671 *
672  itau = 1
673  iwork = itau + n
674 *
675 * Compute A=Q*R
676 * (CWorkspace: need 2*N, prefer N+N*NB)
677 * (RWorkspace: need 0)
678 *
679  CALL cgeqrf( m, n, a, lda, work( itau ), work( iwork ),
680  $ lwork-iwork+1, ierr )
681 *
682 * Zero out below R
683 *
684  IF( n .GT. 1 ) THEN
685  CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
686  $ lda )
687  END IF
688  ie = 1
689  itauq = 1
690  itaup = itauq + n
691  iwork = itaup + n
692 *
693 * Bidiagonalize R in A
694 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
695 * (RWorkspace: need N)
696 *
697  CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
698  $ work( itaup ), work( iwork ), lwork-iwork+1,
699  $ ierr )
700  ncvt = 0
701  IF( wntvo .OR. wntvas ) THEN
702 *
703 * If right singular vectors desired, generate P'.
704 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
705 * (RWorkspace: 0)
706 *
707  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
708  $ work( iwork ), lwork-iwork+1, ierr )
709  ncvt = n
710  END IF
711  irwork = ie + n
712 *
713 * Perform bidiagonal QR iteration, computing right
714 * singular vectors of A in A if desired
715 * (CWorkspace: 0)
716 * (RWorkspace: need BDSPAC)
717 *
718  CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
719  $ cdum, 1, cdum, 1, rwork( irwork ), info )
720 *
721 * If right singular vectors desired in VT, copy them there
722 *
723  IF( wntvas )
724  $ CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
725 *
726  ELSE IF( wntuo .AND. wntvn ) THEN
727 *
728 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
729 * N left singular vectors to be overwritten on A and
730 * no right singular vectors to be computed
731 *
732  IF( lwork.GE.n*n+3*n ) THEN
733 *
734 * Sufficient workspace for a fast algorithm
735 *
736  ir = 1
737  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
738 *
739 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
740 *
741  ldwrku = lda
742  ldwrkr = lda
743  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
744 *
745 * WORK(IU) is LDA by N, WORK(IR) is N by N
746 *
747  ldwrku = lda
748  ldwrkr = n
749  ELSE
750 *
751 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
752 *
753  ldwrku = ( lwork-n*n ) / n
754  ldwrkr = n
755  END IF
756  itau = ir + ldwrkr*n
757  iwork = itau + n
758 *
759 * Compute A=Q*R
760 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
761 * (RWorkspace: 0)
762 *
763  CALL cgeqrf( m, n, a, lda, work( itau ),
764  $ work( iwork ), lwork-iwork+1, ierr )
765 *
766 * Copy R to WORK(IR) and zero out below it
767 *
768  CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
769  CALL claset( 'L', n-1, n-1, czero, czero,
770  $ work( ir+1 ), ldwrkr )
771 *
772 * Generate Q in A
773 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
774 * (RWorkspace: 0)
775 *
776  CALL cungqr( m, n, n, a, lda, work( itau ),
777  $ work( iwork ), lwork-iwork+1, ierr )
778  ie = 1
779  itauq = itau
780  itaup = itauq + n
781  iwork = itaup + n
782 *
783 * Bidiagonalize R in WORK(IR)
784 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
785 * (RWorkspace: need N)
786 *
787  CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
788  $ work( itauq ), work( itaup ),
789  $ work( iwork ), lwork-iwork+1, ierr )
790 *
791 * Generate left vectors bidiagonalizing R
792 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
793 * (RWorkspace: need 0)
794 *
795  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
796  $ work( itauq ), work( iwork ),
797  $ lwork-iwork+1, ierr )
798  irwork = ie + n
799 *
800 * Perform bidiagonal QR iteration, computing left
801 * singular vectors of R in WORK(IR)
802 * (CWorkspace: need N*N)
803 * (RWorkspace: need BDSPAC)
804 *
805  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
806  $ work( ir ), ldwrkr, cdum, 1,
807  $ rwork( irwork ), info )
808  iu = itauq
809 *
810 * Multiply Q in A by left singular vectors of R in
811 * WORK(IR), storing result in WORK(IU) and copying to A
812 * (CWorkspace: need N*N+N, prefer N*N+M*N)
813 * (RWorkspace: 0)
814 *
815  DO 10 i = 1, m, ldwrku
816  chunk = min( m-i+1, ldwrku )
817  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
818  $ lda, work( ir ), ldwrkr, czero,
819  $ work( iu ), ldwrku )
820  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
821  $ a( i, 1 ), lda )
822  10 CONTINUE
823 *
824  ELSE
825 *
826 * Insufficient workspace for a fast algorithm
827 *
828  ie = 1
829  itauq = 1
830  itaup = itauq + n
831  iwork = itaup + n
832 *
833 * Bidiagonalize A
834 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
835 * (RWorkspace: N)
836 *
837  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
838  $ work( itauq ), work( itaup ),
839  $ work( iwork ), lwork-iwork+1, ierr )
840 *
841 * Generate left vectors bidiagonalizing A
842 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
843 * (RWorkspace: 0)
844 *
845  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
846  $ work( iwork ), lwork-iwork+1, ierr )
847  irwork = ie + n
848 *
849 * Perform bidiagonal QR iteration, computing left
850 * singular vectors of A in A
851 * (CWorkspace: need 0)
852 * (RWorkspace: need BDSPAC)
853 *
854  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
855  $ a, lda, cdum, 1, rwork( irwork ), info )
856 *
857  END IF
858 *
859  ELSE IF( wntuo .AND. wntvas ) THEN
860 *
861 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
862 * N left singular vectors to be overwritten on A and
863 * N right singular vectors to be computed in VT
864 *
865  IF( lwork.GE.n*n+3*n ) THEN
866 *
867 * Sufficient workspace for a fast algorithm
868 *
869  ir = 1
870  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
871 *
872 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
873 *
874  ldwrku = lda
875  ldwrkr = lda
876  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
877 *
878 * WORK(IU) is LDA by N and WORK(IR) is N by N
879 *
880  ldwrku = lda
881  ldwrkr = n
882  ELSE
883 *
884 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
885 *
886  ldwrku = ( lwork-n*n ) / n
887  ldwrkr = n
888  END IF
889  itau = ir + ldwrkr*n
890  iwork = itau + n
891 *
892 * Compute A=Q*R
893 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
894 * (RWorkspace: 0)
895 *
896  CALL cgeqrf( m, n, a, lda, work( itau ),
897  $ work( iwork ), lwork-iwork+1, ierr )
898 *
899 * Copy R to VT, zeroing out below it
900 *
901  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
902  IF( n.GT.1 )
903  $ CALL claset( 'L', n-1, n-1, czero, czero,
904  $ vt( 2, 1 ), ldvt )
905 *
906 * Generate Q in A
907 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
908 * (RWorkspace: 0)
909 *
910  CALL cungqr( m, n, n, a, lda, work( itau ),
911  $ work( iwork ), lwork-iwork+1, ierr )
912  ie = 1
913  itauq = itau
914  itaup = itauq + n
915  iwork = itaup + n
916 *
917 * Bidiagonalize R in VT, copying result to WORK(IR)
918 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
919 * (RWorkspace: need N)
920 *
921  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
922  $ work( itauq ), work( itaup ),
923  $ work( iwork ), lwork-iwork+1, ierr )
924  CALL clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
925 *
926 * Generate left vectors bidiagonalizing R in WORK(IR)
927 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
928 * (RWorkspace: 0)
929 *
930  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
931  $ work( itauq ), work( iwork ),
932  $ lwork-iwork+1, ierr )
933 *
934 * Generate right vectors bidiagonalizing R in VT
935 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
936 * (RWorkspace: 0)
937 *
938  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
939  $ work( iwork ), lwork-iwork+1, ierr )
940  irwork = ie + n
941 *
942 * Perform bidiagonal QR iteration, computing left
943 * singular vectors of R in WORK(IR) and computing right
944 * singular vectors of R in VT
945 * (CWorkspace: need N*N)
946 * (RWorkspace: need BDSPAC)
947 *
948  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
949  $ ldvt, work( ir ), ldwrkr, cdum, 1,
950  $ rwork( irwork ), info )
951  iu = itauq
952 *
953 * Multiply Q in A by left singular vectors of R in
954 * WORK(IR), storing result in WORK(IU) and copying to A
955 * (CWorkspace: need N*N+N, prefer N*N+M*N)
956 * (RWorkspace: 0)
957 *
958  DO 20 i = 1, m, ldwrku
959  chunk = min( m-i+1, ldwrku )
960  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
961  $ lda, work( ir ), ldwrkr, czero,
962  $ work( iu ), ldwrku )
963  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
964  $ a( i, 1 ), lda )
965  20 CONTINUE
966 *
967  ELSE
968 *
969 * Insufficient workspace for a fast algorithm
970 *
971  itau = 1
972  iwork = itau + n
973 *
974 * Compute A=Q*R
975 * (CWorkspace: need 2*N, prefer N+N*NB)
976 * (RWorkspace: 0)
977 *
978  CALL cgeqrf( m, n, a, lda, work( itau ),
979  $ work( iwork ), lwork-iwork+1, ierr )
980 *
981 * Copy R to VT, zeroing out below it
982 *
983  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
984  IF( n.GT.1 )
985  $ CALL claset( 'L', n-1, n-1, czero, czero,
986  $ vt( 2, 1 ), ldvt )
987 *
988 * Generate Q in A
989 * (CWorkspace: need 2*N, prefer N+N*NB)
990 * (RWorkspace: 0)
991 *
992  CALL cungqr( m, n, n, a, lda, work( itau ),
993  $ work( iwork ), lwork-iwork+1, ierr )
994  ie = 1
995  itauq = itau
996  itaup = itauq + n
997  iwork = itaup + n
998 *
999 * Bidiagonalize R in VT
1000 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1001 * (RWorkspace: N)
1002 *
1003  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1004  $ work( itauq ), work( itaup ),
1005  $ work( iwork ), lwork-iwork+1, ierr )
1006 *
1007 * Multiply Q in A by left vectors bidiagonalizing R
1008 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1009 * (RWorkspace: 0)
1010 *
1011  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1012  $ work( itauq ), a, lda, work( iwork ),
1013  $ lwork-iwork+1, ierr )
1014 *
1015 * Generate right vectors bidiagonalizing R in VT
1016 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1017 * (RWorkspace: 0)
1018 *
1019  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1020  $ work( iwork ), lwork-iwork+1, ierr )
1021  irwork = ie + n
1022 *
1023 * Perform bidiagonal QR iteration, computing left
1024 * singular vectors of A in A and computing right
1025 * singular vectors of A in VT
1026 * (CWorkspace: 0)
1027 * (RWorkspace: need BDSPAC)
1028 *
1029  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1030  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1031  $ info )
1032 *
1033  END IF
1034 *
1035  ELSE IF( wntus ) THEN
1036 *
1037  IF( wntvn ) THEN
1038 *
1039 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1040 * N left singular vectors to be computed in U and
1041 * no right singular vectors to be computed
1042 *
1043  IF( lwork.GE.n*n+3*n ) THEN
1044 *
1045 * Sufficient workspace for a fast algorithm
1046 *
1047  ir = 1
1048  IF( lwork.GE.wrkbl+lda*n ) THEN
1049 *
1050 * WORK(IR) is LDA by N
1051 *
1052  ldwrkr = lda
1053  ELSE
1054 *
1055 * WORK(IR) is N by N
1056 *
1057  ldwrkr = n
1058  END IF
1059  itau = ir + ldwrkr*n
1060  iwork = itau + n
1061 *
1062 * Compute A=Q*R
1063 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1064 * (RWorkspace: 0)
1065 *
1066  CALL cgeqrf( m, n, a, lda, work( itau ),
1067  $ work( iwork ), lwork-iwork+1, ierr )
1068 *
1069 * Copy R to WORK(IR), zeroing out below it
1070 *
1071  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1072  $ ldwrkr )
1073  CALL claset( 'L', n-1, n-1, czero, czero,
1074  $ work( ir+1 ), ldwrkr )
1075 *
1076 * Generate Q in A
1077 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1078 * (RWorkspace: 0)
1079 *
1080  CALL cungqr( m, n, n, a, lda, work( itau ),
1081  $ work( iwork ), lwork-iwork+1, ierr )
1082  ie = 1
1083  itauq = itau
1084  itaup = itauq + n
1085  iwork = itaup + n
1086 *
1087 * Bidiagonalize R in WORK(IR)
1088 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1089 * (RWorkspace: need N)
1090 *
1091  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1092  $ rwork( ie ), work( itauq ),
1093  $ work( itaup ), work( iwork ),
1094  $ lwork-iwork+1, ierr )
1095 *
1096 * Generate left vectors bidiagonalizing R in WORK(IR)
1097 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1098 * (RWorkspace: 0)
1099 *
1100  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1101  $ work( itauq ), work( iwork ),
1102  $ lwork-iwork+1, ierr )
1103  irwork = ie + n
1104 *
1105 * Perform bidiagonal QR iteration, computing left
1106 * singular vectors of R in WORK(IR)
1107 * (CWorkspace: need N*N)
1108 * (RWorkspace: need BDSPAC)
1109 *
1110  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1111  $ 1, work( ir ), ldwrkr, cdum, 1,
1112  $ rwork( irwork ), info )
1113 *
1114 * Multiply Q in A by left singular vectors of R in
1115 * WORK(IR), storing result in U
1116 * (CWorkspace: need N*N)
1117 * (RWorkspace: 0)
1118 *
1119  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1120  $ work( ir ), ldwrkr, czero, u, ldu )
1121 *
1122  ELSE
1123 *
1124 * Insufficient workspace for a fast algorithm
1125 *
1126  itau = 1
1127  iwork = itau + n
1128 *
1129 * Compute A=Q*R, copying result to U
1130 * (CWorkspace: need 2*N, prefer N+N*NB)
1131 * (RWorkspace: 0)
1132 *
1133  CALL cgeqrf( m, n, a, lda, work( itau ),
1134  $ work( iwork ), lwork-iwork+1, ierr )
1135  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1136 *
1137 * Generate Q in U
1138 * (CWorkspace: need 2*N, prefer N+N*NB)
1139 * (RWorkspace: 0)
1140 *
1141  CALL cungqr( m, n, n, u, ldu, work( itau ),
1142  $ work( iwork ), lwork-iwork+1, ierr )
1143  ie = 1
1144  itauq = itau
1145  itaup = itauq + n
1146  iwork = itaup + n
1147 *
1148 * Zero out below R in A
1149 *
1150  IF( n .GT. 1 ) THEN
1151  CALL claset( 'L', n-1, n-1, czero, czero,
1152  $ a( 2, 1 ), lda )
1153  END IF
1154 *
1155 * Bidiagonalize R in A
1156 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1157 * (RWorkspace: need N)
1158 *
1159  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1160  $ work( itauq ), work( itaup ),
1161  $ work( iwork ), lwork-iwork+1, ierr )
1162 *
1163 * Multiply Q in U by left vectors bidiagonalizing R
1164 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1165 * (RWorkspace: 0)
1166 *
1167  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1168  $ work( itauq ), u, ldu, work( iwork ),
1169  $ lwork-iwork+1, ierr )
1170  irwork = ie + n
1171 *
1172 * Perform bidiagonal QR iteration, computing left
1173 * singular vectors of A in U
1174 * (CWorkspace: 0)
1175 * (RWorkspace: need BDSPAC)
1176 *
1177  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1178  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1179  $ info )
1180 *
1181  END IF
1182 *
1183  ELSE IF( wntvo ) THEN
1184 *
1185 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1186 * N left singular vectors to be computed in U and
1187 * N right singular vectors to be overwritten on A
1188 *
1189  IF( lwork.GE.2*n*n+3*n ) THEN
1190 *
1191 * Sufficient workspace for a fast algorithm
1192 *
1193  iu = 1
1194  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1195 *
1196 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1197 *
1198  ldwrku = lda
1199  ir = iu + ldwrku*n
1200  ldwrkr = lda
1201  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1202 *
1203 * WORK(IU) is LDA by N and WORK(IR) is N by N
1204 *
1205  ldwrku = lda
1206  ir = iu + ldwrku*n
1207  ldwrkr = n
1208  ELSE
1209 *
1210 * WORK(IU) is N by N and WORK(IR) is N by N
1211 *
1212  ldwrku = n
1213  ir = iu + ldwrku*n
1214  ldwrkr = n
1215  END IF
1216  itau = ir + ldwrkr*n
1217  iwork = itau + n
1218 *
1219 * Compute A=Q*R
1220 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1221 * (RWorkspace: 0)
1222 *
1223  CALL cgeqrf( m, n, a, lda, work( itau ),
1224  $ work( iwork ), lwork-iwork+1, ierr )
1225 *
1226 * Copy R to WORK(IU), zeroing out below it
1227 *
1228  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1229  $ ldwrku )
1230  CALL claset( 'L', n-1, n-1, czero, czero,
1231  $ work( iu+1 ), ldwrku )
1232 *
1233 * Generate Q in A
1234 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1235 * (RWorkspace: 0)
1236 *
1237  CALL cungqr( m, n, n, a, lda, work( itau ),
1238  $ work( iwork ), lwork-iwork+1, ierr )
1239  ie = 1
1240  itauq = itau
1241  itaup = itauq + n
1242  iwork = itaup + n
1243 *
1244 * Bidiagonalize R in WORK(IU), copying result to
1245 * WORK(IR)
1246 * (CWorkspace: need 2*N*N+3*N,
1247 * prefer 2*N*N+2*N+2*N*NB)
1248 * (RWorkspace: need N)
1249 *
1250  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1251  $ rwork( ie ), work( itauq ),
1252  $ work( itaup ), work( iwork ),
1253  $ lwork-iwork+1, ierr )
1254  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1255  $ work( ir ), ldwrkr )
1256 *
1257 * Generate left bidiagonalizing vectors in WORK(IU)
1258 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1259 * (RWorkspace: 0)
1260 *
1261  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1262  $ work( itauq ), work( iwork ),
1263  $ lwork-iwork+1, ierr )
1264 *
1265 * Generate right bidiagonalizing vectors in WORK(IR)
1266 * (CWorkspace: need 2*N*N+3*N-1,
1267 * prefer 2*N*N+2*N+(N-1)*NB)
1268 * (RWorkspace: 0)
1269 *
1270  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1271  $ work( itaup ), work( iwork ),
1272  $ lwork-iwork+1, ierr )
1273  irwork = ie + n
1274 *
1275 * Perform bidiagonal QR iteration, computing left
1276 * singular vectors of R in WORK(IU) and computing
1277 * right singular vectors of R in WORK(IR)
1278 * (CWorkspace: need 2*N*N)
1279 * (RWorkspace: need BDSPAC)
1280 *
1281  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1282  $ work( ir ), ldwrkr, work( iu ),
1283  $ ldwrku, cdum, 1, rwork( irwork ),
1284  $ info )
1285 *
1286 * Multiply Q in A by left singular vectors of R in
1287 * WORK(IU), storing result in U
1288 * (CWorkspace: need N*N)
1289 * (RWorkspace: 0)
1290 *
1291  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1292  $ work( iu ), ldwrku, czero, u, ldu )
1293 *
1294 * Copy right singular vectors of R to A
1295 * (CWorkspace: need N*N)
1296 * (RWorkspace: 0)
1297 *
1298  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1299  $ lda )
1300 *
1301  ELSE
1302 *
1303 * Insufficient workspace for a fast algorithm
1304 *
1305  itau = 1
1306  iwork = itau + n
1307 *
1308 * Compute A=Q*R, copying result to U
1309 * (CWorkspace: need 2*N, prefer N+N*NB)
1310 * (RWorkspace: 0)
1311 *
1312  CALL cgeqrf( m, n, a, lda, work( itau ),
1313  $ work( iwork ), lwork-iwork+1, ierr )
1314  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1315 *
1316 * Generate Q in U
1317 * (CWorkspace: need 2*N, prefer N+N*NB)
1318 * (RWorkspace: 0)
1319 *
1320  CALL cungqr( m, n, n, u, ldu, work( itau ),
1321  $ work( iwork ), lwork-iwork+1, ierr )
1322  ie = 1
1323  itauq = itau
1324  itaup = itauq + n
1325  iwork = itaup + n
1326 *
1327 * Zero out below R in A
1328 *
1329  IF( n .GT. 1 ) THEN
1330  CALL claset( 'L', n-1, n-1, czero, czero,
1331  $ a( 2, 1 ), lda )
1332  END IF
1333 *
1334 * Bidiagonalize R in A
1335 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1336 * (RWorkspace: need N)
1337 *
1338  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1339  $ work( itauq ), work( itaup ),
1340  $ work( iwork ), lwork-iwork+1, ierr )
1341 *
1342 * Multiply Q in U by left vectors bidiagonalizing R
1343 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1344 * (RWorkspace: 0)
1345 *
1346  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1347  $ work( itauq ), u, ldu, work( iwork ),
1348  $ lwork-iwork+1, ierr )
1349 *
1350 * Generate right vectors bidiagonalizing R in A
1351 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1352 * (RWorkspace: 0)
1353 *
1354  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1355  $ work( iwork ), lwork-iwork+1, ierr )
1356  irwork = ie + n
1357 *
1358 * Perform bidiagonal QR iteration, computing left
1359 * singular vectors of A in U and computing right
1360 * singular vectors of A in A
1361 * (CWorkspace: 0)
1362 * (RWorkspace: need BDSPAC)
1363 *
1364  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1365  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1366  $ info )
1367 *
1368  END IF
1369 *
1370  ELSE IF( wntvas ) THEN
1371 *
1372 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1373 * or 'A')
1374 * N left singular vectors to be computed in U and
1375 * N right singular vectors to be computed in VT
1376 *
1377  IF( lwork.GE.n*n+3*n ) THEN
1378 *
1379 * Sufficient workspace for a fast algorithm
1380 *
1381  iu = 1
1382  IF( lwork.GE.wrkbl+lda*n ) THEN
1383 *
1384 * WORK(IU) is LDA by N
1385 *
1386  ldwrku = lda
1387  ELSE
1388 *
1389 * WORK(IU) is N by N
1390 *
1391  ldwrku = n
1392  END IF
1393  itau = iu + ldwrku*n
1394  iwork = itau + n
1395 *
1396 * Compute A=Q*R
1397 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1398 * (RWorkspace: 0)
1399 *
1400  CALL cgeqrf( m, n, a, lda, work( itau ),
1401  $ work( iwork ), lwork-iwork+1, ierr )
1402 *
1403 * Copy R to WORK(IU), zeroing out below it
1404 *
1405  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1406  $ ldwrku )
1407  CALL claset( 'L', n-1, n-1, czero, czero,
1408  $ work( iu+1 ), ldwrku )
1409 *
1410 * Generate Q in A
1411 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1412 * (RWorkspace: 0)
1413 *
1414  CALL cungqr( m, n, n, a, lda, work( itau ),
1415  $ work( iwork ), lwork-iwork+1, ierr )
1416  ie = 1
1417  itauq = itau
1418  itaup = itauq + n
1419  iwork = itaup + n
1420 *
1421 * Bidiagonalize R in WORK(IU), copying result to VT
1422 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1423 * (RWorkspace: need N)
1424 *
1425  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1426  $ rwork( ie ), work( itauq ),
1427  $ work( itaup ), work( iwork ),
1428  $ lwork-iwork+1, ierr )
1429  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1430  $ ldvt )
1431 *
1432 * Generate left bidiagonalizing vectors in WORK(IU)
1433 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1434 * (RWorkspace: 0)
1435 *
1436  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1437  $ work( itauq ), work( iwork ),
1438  $ lwork-iwork+1, ierr )
1439 *
1440 * Generate right bidiagonalizing vectors in VT
1441 * (CWorkspace: need N*N+3*N-1,
1442 * prefer N*N+2*N+(N-1)*NB)
1443 * (RWorkspace: 0)
1444 *
1445  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1446  $ work( iwork ), lwork-iwork+1, ierr )
1447  irwork = ie + n
1448 *
1449 * Perform bidiagonal QR iteration, computing left
1450 * singular vectors of R in WORK(IU) and computing
1451 * right singular vectors of R in VT
1452 * (CWorkspace: need N*N)
1453 * (RWorkspace: need BDSPAC)
1454 *
1455  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1456  $ ldvt, work( iu ), ldwrku, cdum, 1,
1457  $ rwork( irwork ), info )
1458 *
1459 * Multiply Q in A by left singular vectors of R in
1460 * WORK(IU), storing result in U
1461 * (CWorkspace: need N*N)
1462 * (RWorkspace: 0)
1463 *
1464  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1465  $ work( iu ), ldwrku, czero, u, ldu )
1466 *
1467  ELSE
1468 *
1469 * Insufficient workspace for a fast algorithm
1470 *
1471  itau = 1
1472  iwork = itau + n
1473 *
1474 * Compute A=Q*R, copying result to U
1475 * (CWorkspace: need 2*N, prefer N+N*NB)
1476 * (RWorkspace: 0)
1477 *
1478  CALL cgeqrf( m, n, a, lda, work( itau ),
1479  $ work( iwork ), lwork-iwork+1, ierr )
1480  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1481 *
1482 * Generate Q in U
1483 * (CWorkspace: need 2*N, prefer N+N*NB)
1484 * (RWorkspace: 0)
1485 *
1486  CALL cungqr( m, n, n, u, ldu, work( itau ),
1487  $ work( iwork ), lwork-iwork+1, ierr )
1488 *
1489 * Copy R to VT, zeroing out below it
1490 *
1491  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1492  IF( n.GT.1 )
1493  $ CALL claset( 'L', n-1, n-1, czero, czero,
1494  $ vt( 2, 1 ), ldvt )
1495  ie = 1
1496  itauq = itau
1497  itaup = itauq + n
1498  iwork = itaup + n
1499 *
1500 * Bidiagonalize R in VT
1501 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1502 * (RWorkspace: need N)
1503 *
1504  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1505  $ work( itauq ), work( itaup ),
1506  $ work( iwork ), lwork-iwork+1, ierr )
1507 *
1508 * Multiply Q in U by left bidiagonalizing vectors
1509 * in VT
1510 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1511 * (RWorkspace: 0)
1512 *
1513  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1514  $ work( itauq ), u, ldu, work( iwork ),
1515  $ lwork-iwork+1, ierr )
1516 *
1517 * Generate right bidiagonalizing vectors in VT
1518 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1519 * (RWorkspace: 0)
1520 *
1521  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1522  $ work( iwork ), lwork-iwork+1, ierr )
1523  irwork = ie + n
1524 *
1525 * Perform bidiagonal QR iteration, computing left
1526 * singular vectors of A in U and computing right
1527 * singular vectors of A in VT
1528 * (CWorkspace: 0)
1529 * (RWorkspace: need BDSPAC)
1530 *
1531  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1532  $ ldvt, u, ldu, cdum, 1,
1533  $ rwork( irwork ), info )
1534 *
1535  END IF
1536 *
1537  END IF
1538 *
1539  ELSE IF( wntua ) THEN
1540 *
1541  IF( wntvn ) THEN
1542 *
1543 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1544 * M left singular vectors to be computed in U and
1545 * no right singular vectors to be computed
1546 *
1547  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1548 *
1549 * Sufficient workspace for a fast algorithm
1550 *
1551  ir = 1
1552  IF( lwork.GE.wrkbl+lda*n ) THEN
1553 *
1554 * WORK(IR) is LDA by N
1555 *
1556  ldwrkr = lda
1557  ELSE
1558 *
1559 * WORK(IR) is N by N
1560 *
1561  ldwrkr = n
1562  END IF
1563  itau = ir + ldwrkr*n
1564  iwork = itau + n
1565 *
1566 * Compute A=Q*R, copying result to U
1567 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1568 * (RWorkspace: 0)
1569 *
1570  CALL cgeqrf( m, n, a, lda, work( itau ),
1571  $ work( iwork ), lwork-iwork+1, ierr )
1572  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1573 *
1574 * Copy R to WORK(IR), zeroing out below it
1575 *
1576  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1577  $ ldwrkr )
1578  CALL claset( 'L', n-1, n-1, czero, czero,
1579  $ work( ir+1 ), ldwrkr )
1580 *
1581 * Generate Q in U
1582 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1583 * (RWorkspace: 0)
1584 *
1585  CALL cungqr( m, m, n, u, ldu, work( itau ),
1586  $ work( iwork ), lwork-iwork+1, ierr )
1587  ie = 1
1588  itauq = itau
1589  itaup = itauq + n
1590  iwork = itaup + n
1591 *
1592 * Bidiagonalize R in WORK(IR)
1593 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1594 * (RWorkspace: need N)
1595 *
1596  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1597  $ rwork( ie ), work( itauq ),
1598  $ work( itaup ), work( iwork ),
1599  $ lwork-iwork+1, ierr )
1600 *
1601 * Generate left bidiagonalizing vectors in WORK(IR)
1602 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1603 * (RWorkspace: 0)
1604 *
1605  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1606  $ work( itauq ), work( iwork ),
1607  $ lwork-iwork+1, ierr )
1608  irwork = ie + n
1609 *
1610 * Perform bidiagonal QR iteration, computing left
1611 * singular vectors of R in WORK(IR)
1612 * (CWorkspace: need N*N)
1613 * (RWorkspace: need BDSPAC)
1614 *
1615  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1616  $ 1, work( ir ), ldwrkr, cdum, 1,
1617  $ rwork( irwork ), info )
1618 *
1619 * Multiply Q in U by left singular vectors of R in
1620 * WORK(IR), storing result in A
1621 * (CWorkspace: need N*N)
1622 * (RWorkspace: 0)
1623 *
1624  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1625  $ work( ir ), ldwrkr, czero, a, lda )
1626 *
1627 * Copy left singular vectors of A from A to U
1628 *
1629  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1630 *
1631  ELSE
1632 *
1633 * Insufficient workspace for a fast algorithm
1634 *
1635  itau = 1
1636  iwork = itau + n
1637 *
1638 * Compute A=Q*R, copying result to U
1639 * (CWorkspace: need 2*N, prefer N+N*NB)
1640 * (RWorkspace: 0)
1641 *
1642  CALL cgeqrf( m, n, a, lda, work( itau ),
1643  $ work( iwork ), lwork-iwork+1, ierr )
1644  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1645 *
1646 * Generate Q in U
1647 * (CWorkspace: need N+M, prefer N+M*NB)
1648 * (RWorkspace: 0)
1649 *
1650  CALL cungqr( m, m, n, u, ldu, work( itau ),
1651  $ work( iwork ), lwork-iwork+1, ierr )
1652  ie = 1
1653  itauq = itau
1654  itaup = itauq + n
1655  iwork = itaup + n
1656 *
1657 * Zero out below R in A
1658 *
1659  IF( n .GT. 1 ) THEN
1660  CALL claset( 'L', n-1, n-1, czero, czero,
1661  $ a( 2, 1 ), lda )
1662  END IF
1663 *
1664 * Bidiagonalize R in A
1665 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1666 * (RWorkspace: need N)
1667 *
1668  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1669  $ work( itauq ), work( itaup ),
1670  $ work( iwork ), lwork-iwork+1, ierr )
1671 *
1672 * Multiply Q in U by left bidiagonalizing vectors
1673 * in A
1674 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1675 * (RWorkspace: 0)
1676 *
1677  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1678  $ work( itauq ), u, ldu, work( iwork ),
1679  $ lwork-iwork+1, ierr )
1680  irwork = ie + n
1681 *
1682 * Perform bidiagonal QR iteration, computing left
1683 * singular vectors of A in U
1684 * (CWorkspace: 0)
1685 * (RWorkspace: need BDSPAC)
1686 *
1687  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1688  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1689  $ info )
1690 *
1691  END IF
1692 *
1693  ELSE IF( wntvo ) THEN
1694 *
1695 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1696 * M left singular vectors to be computed in U and
1697 * N right singular vectors to be overwritten on A
1698 *
1699  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1700 *
1701 * Sufficient workspace for a fast algorithm
1702 *
1703  iu = 1
1704  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1705 *
1706 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1707 *
1708  ldwrku = lda
1709  ir = iu + ldwrku*n
1710  ldwrkr = lda
1711  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1712 *
1713 * WORK(IU) is LDA by N and WORK(IR) is N by N
1714 *
1715  ldwrku = lda
1716  ir = iu + ldwrku*n
1717  ldwrkr = n
1718  ELSE
1719 *
1720 * WORK(IU) is N by N and WORK(IR) is N by N
1721 *
1722  ldwrku = n
1723  ir = iu + ldwrku*n
1724  ldwrkr = n
1725  END IF
1726  itau = ir + ldwrkr*n
1727  iwork = itau + n
1728 *
1729 * Compute A=Q*R, copying result to U
1730 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1731 * (RWorkspace: 0)
1732 *
1733  CALL cgeqrf( m, n, a, lda, work( itau ),
1734  $ work( iwork ), lwork-iwork+1, ierr )
1735  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1736 *
1737 * Generate Q in U
1738 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1739 * (RWorkspace: 0)
1740 *
1741  CALL cungqr( m, m, n, u, ldu, work( itau ),
1742  $ work( iwork ), lwork-iwork+1, ierr )
1743 *
1744 * Copy R to WORK(IU), zeroing out below it
1745 *
1746  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1747  $ ldwrku )
1748  CALL claset( 'L', n-1, n-1, czero, czero,
1749  $ work( iu+1 ), ldwrku )
1750  ie = 1
1751  itauq = itau
1752  itaup = itauq + n
1753  iwork = itaup + n
1754 *
1755 * Bidiagonalize R in WORK(IU), copying result to
1756 * WORK(IR)
1757 * (CWorkspace: need 2*N*N+3*N,
1758 * prefer 2*N*N+2*N+2*N*NB)
1759 * (RWorkspace: need N)
1760 *
1761  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1762  $ rwork( ie ), work( itauq ),
1763  $ work( itaup ), work( iwork ),
1764  $ lwork-iwork+1, ierr )
1765  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1766  $ work( ir ), ldwrkr )
1767 *
1768 * Generate left bidiagonalizing vectors in WORK(IU)
1769 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1770 * (RWorkspace: 0)
1771 *
1772  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1773  $ work( itauq ), work( iwork ),
1774  $ lwork-iwork+1, ierr )
1775 *
1776 * Generate right bidiagonalizing vectors in WORK(IR)
1777 * (CWorkspace: need 2*N*N+3*N-1,
1778 * prefer 2*N*N+2*N+(N-1)*NB)
1779 * (RWorkspace: 0)
1780 *
1781  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1782  $ work( itaup ), work( iwork ),
1783  $ lwork-iwork+1, ierr )
1784  irwork = ie + n
1785 *
1786 * Perform bidiagonal QR iteration, computing left
1787 * singular vectors of R in WORK(IU) and computing
1788 * right singular vectors of R in WORK(IR)
1789 * (CWorkspace: need 2*N*N)
1790 * (RWorkspace: need BDSPAC)
1791 *
1792  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1793  $ work( ir ), ldwrkr, work( iu ),
1794  $ ldwrku, cdum, 1, rwork( irwork ),
1795  $ info )
1796 *
1797 * Multiply Q in U by left singular vectors of R in
1798 * WORK(IU), storing result in A
1799 * (CWorkspace: need N*N)
1800 * (RWorkspace: 0)
1801 *
1802  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1803  $ work( iu ), ldwrku, czero, a, lda )
1804 *
1805 * Copy left singular vectors of A from A to U
1806 *
1807  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1808 *
1809 * Copy right singular vectors of R from WORK(IR) to A
1810 *
1811  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1812  $ lda )
1813 *
1814  ELSE
1815 *
1816 * Insufficient workspace for a fast algorithm
1817 *
1818  itau = 1
1819  iwork = itau + n
1820 *
1821 * Compute A=Q*R, copying result to U
1822 * (CWorkspace: need 2*N, prefer N+N*NB)
1823 * (RWorkspace: 0)
1824 *
1825  CALL cgeqrf( m, n, a, lda, work( itau ),
1826  $ work( iwork ), lwork-iwork+1, ierr )
1827  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1828 *
1829 * Generate Q in U
1830 * (CWorkspace: need N+M, prefer N+M*NB)
1831 * (RWorkspace: 0)
1832 *
1833  CALL cungqr( m, m, n, u, ldu, work( itau ),
1834  $ work( iwork ), lwork-iwork+1, ierr )
1835  ie = 1
1836  itauq = itau
1837  itaup = itauq + n
1838  iwork = itaup + n
1839 *
1840 * Zero out below R in A
1841 *
1842  IF( n .GT. 1 ) THEN
1843  CALL claset( 'L', n-1, n-1, czero, czero,
1844  $ a( 2, 1 ), lda )
1845  END IF
1846 *
1847 * Bidiagonalize R in A
1848 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1849 * (RWorkspace: need N)
1850 *
1851  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1852  $ work( itauq ), work( itaup ),
1853  $ work( iwork ), lwork-iwork+1, ierr )
1854 *
1855 * Multiply Q in U by left bidiagonalizing vectors
1856 * in A
1857 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1858 * (RWorkspace: 0)
1859 *
1860  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1861  $ work( itauq ), u, ldu, work( iwork ),
1862  $ lwork-iwork+1, ierr )
1863 *
1864 * Generate right bidiagonalizing vectors in A
1865 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1866 * (RWorkspace: 0)
1867 *
1868  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1869  $ work( iwork ), lwork-iwork+1, ierr )
1870  irwork = ie + n
1871 *
1872 * Perform bidiagonal QR iteration, computing left
1873 * singular vectors of A in U and computing right
1874 * singular vectors of A in A
1875 * (CWorkspace: 0)
1876 * (RWorkspace: need BDSPAC)
1877 *
1878  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1879  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1880  $ info )
1881 *
1882  END IF
1883 *
1884  ELSE IF( wntvas ) THEN
1885 *
1886 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1887 * or 'A')
1888 * M left singular vectors to be computed in U and
1889 * N right singular vectors to be computed in VT
1890 *
1891  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1892 *
1893 * Sufficient workspace for a fast algorithm
1894 *
1895  iu = 1
1896  IF( lwork.GE.wrkbl+lda*n ) THEN
1897 *
1898 * WORK(IU) is LDA by N
1899 *
1900  ldwrku = lda
1901  ELSE
1902 *
1903 * WORK(IU) is N by N
1904 *
1905  ldwrku = n
1906  END IF
1907  itau = iu + ldwrku*n
1908  iwork = itau + n
1909 *
1910 * Compute A=Q*R, copying result to U
1911 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1912 * (RWorkspace: 0)
1913 *
1914  CALL cgeqrf( m, n, a, lda, work( itau ),
1915  $ work( iwork ), lwork-iwork+1, ierr )
1916  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1917 *
1918 * Generate Q in U
1919 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1920 * (RWorkspace: 0)
1921 *
1922  CALL cungqr( m, m, n, u, ldu, work( itau ),
1923  $ work( iwork ), lwork-iwork+1, ierr )
1924 *
1925 * Copy R to WORK(IU), zeroing out below it
1926 *
1927  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1928  $ ldwrku )
1929  CALL claset( 'L', n-1, n-1, czero, czero,
1930  $ work( iu+1 ), ldwrku )
1931  ie = 1
1932  itauq = itau
1933  itaup = itauq + n
1934  iwork = itaup + n
1935 *
1936 * Bidiagonalize R in WORK(IU), copying result to VT
1937 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1938 * (RWorkspace: need N)
1939 *
1940  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1941  $ rwork( ie ), work( itauq ),
1942  $ work( itaup ), work( iwork ),
1943  $ lwork-iwork+1, ierr )
1944  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1945  $ ldvt )
1946 *
1947 * Generate left bidiagonalizing vectors in WORK(IU)
1948 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1949 * (RWorkspace: 0)
1950 *
1951  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1952  $ work( itauq ), work( iwork ),
1953  $ lwork-iwork+1, ierr )
1954 *
1955 * Generate right bidiagonalizing vectors in VT
1956 * (CWorkspace: need N*N+3*N-1,
1957 * prefer N*N+2*N+(N-1)*NB)
1958 * (RWorkspace: need 0)
1959 *
1960  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1961  $ work( iwork ), lwork-iwork+1, ierr )
1962  irwork = ie + n
1963 *
1964 * Perform bidiagonal QR iteration, computing left
1965 * singular vectors of R in WORK(IU) and computing
1966 * right singular vectors of R in VT
1967 * (CWorkspace: need N*N)
1968 * (RWorkspace: need BDSPAC)
1969 *
1970  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1971  $ ldvt, work( iu ), ldwrku, cdum, 1,
1972  $ rwork( irwork ), info )
1973 *
1974 * Multiply Q in U by left singular vectors of R in
1975 * WORK(IU), storing result in A
1976 * (CWorkspace: need N*N)
1977 * (RWorkspace: 0)
1978 *
1979  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1980  $ work( iu ), ldwrku, czero, a, lda )
1981 *
1982 * Copy left singular vectors of A from A to U
1983 *
1984  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1985 *
1986  ELSE
1987 *
1988 * Insufficient workspace for a fast algorithm
1989 *
1990  itau = 1
1991  iwork = itau + n
1992 *
1993 * Compute A=Q*R, copying result to U
1994 * (CWorkspace: need 2*N, prefer N+N*NB)
1995 * (RWorkspace: 0)
1996 *
1997  CALL cgeqrf( m, n, a, lda, work( itau ),
1998  $ work( iwork ), lwork-iwork+1, ierr )
1999  CALL clacpy( 'L', m, n, a, lda, u, ldu )
2000 *
2001 * Generate Q in U
2002 * (CWorkspace: need N+M, prefer N+M*NB)
2003 * (RWorkspace: 0)
2004 *
2005  CALL cungqr( m, m, n, u, ldu, work( itau ),
2006  $ work( iwork ), lwork-iwork+1, ierr )
2007 *
2008 * Copy R from A to VT, zeroing out below it
2009 *
2010  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2011  IF( n.GT.1 )
2012  $ CALL claset( 'L', n-1, n-1, czero, czero,
2013  $ vt( 2, 1 ), ldvt )
2014  ie = 1
2015  itauq = itau
2016  itaup = itauq + n
2017  iwork = itaup + n
2018 *
2019 * Bidiagonalize R in VT
2020 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2021 * (RWorkspace: need N)
2022 *
2023  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
2024  $ work( itauq ), work( itaup ),
2025  $ work( iwork ), lwork-iwork+1, ierr )
2026 *
2027 * Multiply Q in U by left bidiagonalizing vectors
2028 * in VT
2029 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2030 * (RWorkspace: 0)
2031 *
2032  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2033  $ work( itauq ), u, ldu, work( iwork ),
2034  $ lwork-iwork+1, ierr )
2035 *
2036 * Generate right bidiagonalizing vectors in VT
2037 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2038 * (RWorkspace: 0)
2039 *
2040  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2041  $ work( iwork ), lwork-iwork+1, ierr )
2042  irwork = ie + n
2043 *
2044 * Perform bidiagonal QR iteration, computing left
2045 * singular vectors of A in U and computing right
2046 * singular vectors of A in VT
2047 * (CWorkspace: 0)
2048 * (RWorkspace: need BDSPAC)
2049 *
2050  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2051  $ ldvt, u, ldu, cdum, 1,
2052  $ rwork( irwork ), info )
2053 *
2054  END IF
2055 *
2056  END IF
2057 *
2058  END IF
2059 *
2060  ELSE
2061 *
2062 * M .LT. MNTHR
2063 *
2064 * Path 10 (M at least N, but not much larger)
2065 * Reduce to bidiagonal form without QR decomposition
2066 *
2067  ie = 1
2068  itauq = 1
2069  itaup = itauq + n
2070  iwork = itaup + n
2071 *
2072 * Bidiagonalize A
2073 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2074 * (RWorkspace: need N)
2075 *
2076  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2077  $ work( itaup ), work( iwork ), lwork-iwork+1,
2078  $ ierr )
2079  IF( wntuas ) THEN
2080 *
2081 * If left singular vectors desired in U, copy result to U
2082 * and generate left bidiagonalizing vectors in U
2083 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2084 * (RWorkspace: 0)
2085 *
2086  CALL clacpy( 'L', m, n, a, lda, u, ldu )
2087  IF( wntus )
2088  $ ncu = n
2089  IF( wntua )
2090  $ ncu = m
2091  CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2092  $ work( iwork ), lwork-iwork+1, ierr )
2093  END IF
2094  IF( wntvas ) THEN
2095 *
2096 * If right singular vectors desired in VT, copy result to
2097 * VT and generate right bidiagonalizing vectors in VT
2098 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2099 * (RWorkspace: 0)
2100 *
2101  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2102  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2103  $ work( iwork ), lwork-iwork+1, ierr )
2104  END IF
2105  IF( wntuo ) THEN
2106 *
2107 * If left singular vectors desired in A, generate left
2108 * bidiagonalizing vectors in A
2109 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2110 * (RWorkspace: 0)
2111 *
2112  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
2113  $ work( iwork ), lwork-iwork+1, ierr )
2114  END IF
2115  IF( wntvo ) THEN
2116 *
2117 * If right singular vectors desired in A, generate right
2118 * bidiagonalizing vectors in A
2119 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2120 * (RWorkspace: 0)
2121 *
2122  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
2123  $ work( iwork ), lwork-iwork+1, ierr )
2124  END IF
2125  irwork = ie + n
2126  IF( wntuas .OR. wntuo )
2127  $ nru = m
2128  IF( wntun )
2129  $ nru = 0
2130  IF( wntvas .OR. wntvo )
2131  $ ncvt = n
2132  IF( wntvn )
2133  $ ncvt = 0
2134  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2135 *
2136 * Perform bidiagonal QR iteration, if desired, computing
2137 * left singular vectors in U and computing right singular
2138 * vectors in VT
2139 * (CWorkspace: 0)
2140 * (RWorkspace: need BDSPAC)
2141 *
2142  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2143  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2144  $ info )
2145  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2146 *
2147 * Perform bidiagonal QR iteration, if desired, computing
2148 * left singular vectors in U and computing right singular
2149 * vectors in A
2150 * (CWorkspace: 0)
2151 * (RWorkspace: need BDSPAC)
2152 *
2153  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2154  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2155  $ info )
2156  ELSE
2157 *
2158 * Perform bidiagonal QR iteration, if desired, computing
2159 * left singular vectors in A and computing right singular
2160 * vectors in VT
2161 * (CWorkspace: 0)
2162 * (RWorkspace: need BDSPAC)
2163 *
2164  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2165  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2166  $ info )
2167  END IF
2168 *
2169  END IF
2170 *
2171  ELSE
2172 *
2173 * A has more columns than rows. If A has sufficiently more
2174 * columns than rows, first reduce using the LQ decomposition (if
2175 * sufficient workspace available)
2176 *
2177  IF( n.GE.mnthr ) THEN
2178 *
2179  IF( wntvn ) THEN
2180 *
2181 * Path 1t(N much larger than M, JOBVT='N')
2182 * No right singular vectors to be computed
2183 *
2184  itau = 1
2185  iwork = itau + m
2186 *
2187 * Compute A=L*Q
2188 * (CWorkspace: need 2*M, prefer M+M*NB)
2189 * (RWorkspace: 0)
2190 *
2191  CALL cgelqf( m, n, a, lda, work( itau ), work( iwork ),
2192  $ lwork-iwork+1, ierr )
2193 *
2194 * Zero out above L
2195 *
2196  CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2197  $ lda )
2198  ie = 1
2199  itauq = 1
2200  itaup = itauq + m
2201  iwork = itaup + m
2202 *
2203 * Bidiagonalize L in A
2204 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2205 * (RWorkspace: need M)
2206 *
2207  CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2208  $ work( itaup ), work( iwork ), lwork-iwork+1,
2209  $ ierr )
2210  IF( wntuo .OR. wntuas ) THEN
2211 *
2212 * If left singular vectors desired, generate Q
2213 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2214 * (RWorkspace: 0)
2215 *
2216  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2217  $ work( iwork ), lwork-iwork+1, ierr )
2218  END IF
2219  irwork = ie + m
2220  nru = 0
2221  IF( wntuo .OR. wntuas )
2222  $ nru = m
2223 *
2224 * Perform bidiagonal QR iteration, computing left singular
2225 * vectors of A in A if desired
2226 * (CWorkspace: 0)
2227 * (RWorkspace: need BDSPAC)
2228 *
2229  CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2230  $ a, lda, cdum, 1, rwork( irwork ), info )
2231 *
2232 * If left singular vectors desired in U, copy them there
2233 *
2234  IF( wntuas )
2235  $ CALL clacpy( 'F', m, m, a, lda, u, ldu )
2236 *
2237  ELSE IF( wntvo .AND. wntun ) THEN
2238 *
2239 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2240 * M right singular vectors to be overwritten on A and
2241 * no left singular vectors to be computed
2242 *
2243  IF( lwork.GE.m*m+3*m ) THEN
2244 *
2245 * Sufficient workspace for a fast algorithm
2246 *
2247  ir = 1
2248  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2249 *
2250 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2251 *
2252  ldwrku = lda
2253  chunk = n
2254  ldwrkr = lda
2255  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2256 *
2257 * WORK(IU) is LDA by N and WORK(IR) is M by M
2258 *
2259  ldwrku = lda
2260  chunk = n
2261  ldwrkr = m
2262  ELSE
2263 *
2264 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2265 *
2266  ldwrku = m
2267  chunk = ( lwork-m*m ) / m
2268  ldwrkr = m
2269  END IF
2270  itau = ir + ldwrkr*m
2271  iwork = itau + m
2272 *
2273 * Compute A=L*Q
2274 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2275 * (RWorkspace: 0)
2276 *
2277  CALL cgelqf( m, n, a, lda, work( itau ),
2278  $ work( iwork ), lwork-iwork+1, ierr )
2279 *
2280 * Copy L to WORK(IR) and zero out above it
2281 *
2282  CALL clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2283  CALL claset( 'U', m-1, m-1, czero, czero,
2284  $ work( ir+ldwrkr ), ldwrkr )
2285 *
2286 * Generate Q in A
2287 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2288 * (RWorkspace: 0)
2289 *
2290  CALL cunglq( m, n, m, a, lda, work( itau ),
2291  $ work( iwork ), lwork-iwork+1, ierr )
2292  ie = 1
2293  itauq = itau
2294  itaup = itauq + m
2295  iwork = itaup + m
2296 *
2297 * Bidiagonalize L in WORK(IR)
2298 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2299 * (RWorkspace: need M)
2300 *
2301  CALL cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2302  $ work( itauq ), work( itaup ),
2303  $ work( iwork ), lwork-iwork+1, ierr )
2304 *
2305 * Generate right vectors bidiagonalizing L
2306 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2307 * (RWorkspace: 0)
2308 *
2309  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2310  $ work( itaup ), work( iwork ),
2311  $ lwork-iwork+1, ierr )
2312  irwork = ie + m
2313 *
2314 * Perform bidiagonal QR iteration, computing right
2315 * singular vectors of L in WORK(IR)
2316 * (CWorkspace: need M*M)
2317 * (RWorkspace: need BDSPAC)
2318 *
2319  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2320  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2321  $ rwork( irwork ), info )
2322  iu = itauq
2323 *
2324 * Multiply right singular vectors of L in WORK(IR) by Q
2325 * in A, storing result in WORK(IU) and copying to A
2326 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2327 * (RWorkspace: 0)
2328 *
2329  DO 30 i = 1, n, chunk
2330  blk = min( n-i+1, chunk )
2331  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2332  $ ldwrkr, a( 1, i ), lda, czero,
2333  $ work( iu ), ldwrku )
2334  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2335  $ a( 1, i ), lda )
2336  30 CONTINUE
2337 *
2338  ELSE
2339 *
2340 * Insufficient workspace for a fast algorithm
2341 *
2342  ie = 1
2343  itauq = 1
2344  itaup = itauq + m
2345  iwork = itaup + m
2346 *
2347 * Bidiagonalize A
2348 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2349 * (RWorkspace: need M)
2350 *
2351  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
2352  $ work( itauq ), work( itaup ),
2353  $ work( iwork ), lwork-iwork+1, ierr )
2354 *
2355 * Generate right vectors bidiagonalizing A
2356 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2357 * (RWorkspace: 0)
2358 *
2359  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
2360  $ work( iwork ), lwork-iwork+1, ierr )
2361  irwork = ie + m
2362 *
2363 * Perform bidiagonal QR iteration, computing right
2364 * singular vectors of A in A
2365 * (CWorkspace: 0)
2366 * (RWorkspace: need BDSPAC)
2367 *
2368  CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2369  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2370 *
2371  END IF
2372 *
2373  ELSE IF( wntvo .AND. wntuas ) THEN
2374 *
2375 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2376 * M right singular vectors to be overwritten on A and
2377 * M left singular vectors to be computed in U
2378 *
2379  IF( lwork.GE.m*m+3*m ) THEN
2380 *
2381 * Sufficient workspace for a fast algorithm
2382 *
2383  ir = 1
2384  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2385 *
2386 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2387 *
2388  ldwrku = lda
2389  chunk = n
2390  ldwrkr = lda
2391  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2392 *
2393 * WORK(IU) is LDA by N and WORK(IR) is M by M
2394 *
2395  ldwrku = lda
2396  chunk = n
2397  ldwrkr = m
2398  ELSE
2399 *
2400 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2401 *
2402  ldwrku = m
2403  chunk = ( lwork-m*m ) / m
2404  ldwrkr = m
2405  END IF
2406  itau = ir + ldwrkr*m
2407  iwork = itau + m
2408 *
2409 * Compute A=L*Q
2410 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2411 * (RWorkspace: 0)
2412 *
2413  CALL cgelqf( m, n, a, lda, work( itau ),
2414  $ work( iwork ), lwork-iwork+1, ierr )
2415 *
2416 * Copy L to U, zeroing about above it
2417 *
2418  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2419  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2420  $ ldu )
2421 *
2422 * Generate Q in A
2423 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2424 * (RWorkspace: 0)
2425 *
2426  CALL cunglq( m, n, m, a, lda, work( itau ),
2427  $ work( iwork ), lwork-iwork+1, ierr )
2428  ie = 1
2429  itauq = itau
2430  itaup = itauq + m
2431  iwork = itaup + m
2432 *
2433 * Bidiagonalize L in U, copying result to WORK(IR)
2434 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2435 * (RWorkspace: need M)
2436 *
2437  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2438  $ work( itauq ), work( itaup ),
2439  $ work( iwork ), lwork-iwork+1, ierr )
2440  CALL clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2441 *
2442 * Generate right vectors bidiagonalizing L in WORK(IR)
2443 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2444 * (RWorkspace: 0)
2445 *
2446  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2447  $ work( itaup ), work( iwork ),
2448  $ lwork-iwork+1, ierr )
2449 *
2450 * Generate left vectors bidiagonalizing L in U
2451 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2452 * (RWorkspace: 0)
2453 *
2454  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2455  $ work( iwork ), lwork-iwork+1, ierr )
2456  irwork = ie + m
2457 *
2458 * Perform bidiagonal QR iteration, computing left
2459 * singular vectors of L in U, and computing right
2460 * singular vectors of L in WORK(IR)
2461 * (CWorkspace: need M*M)
2462 * (RWorkspace: need BDSPAC)
2463 *
2464  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2465  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2466  $ rwork( irwork ), info )
2467  iu = itauq
2468 *
2469 * Multiply right singular vectors of L in WORK(IR) by Q
2470 * in A, storing result in WORK(IU) and copying to A
2471 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2472 * (RWorkspace: 0)
2473 *
2474  DO 40 i = 1, n, chunk
2475  blk = min( n-i+1, chunk )
2476  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2477  $ ldwrkr, a( 1, i ), lda, czero,
2478  $ work( iu ), ldwrku )
2479  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2480  $ a( 1, i ), lda )
2481  40 CONTINUE
2482 *
2483  ELSE
2484 *
2485 * Insufficient workspace for a fast algorithm
2486 *
2487  itau = 1
2488  iwork = itau + m
2489 *
2490 * Compute A=L*Q
2491 * (CWorkspace: need 2*M, prefer M+M*NB)
2492 * (RWorkspace: 0)
2493 *
2494  CALL cgelqf( m, n, a, lda, work( itau ),
2495  $ work( iwork ), lwork-iwork+1, ierr )
2496 *
2497 * Copy L to U, zeroing out above it
2498 *
2499  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2500  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2501  $ ldu )
2502 *
2503 * Generate Q in A
2504 * (CWorkspace: need 2*M, prefer M+M*NB)
2505 * (RWorkspace: 0)
2506 *
2507  CALL cunglq( m, n, m, a, lda, work( itau ),
2508  $ work( iwork ), lwork-iwork+1, ierr )
2509  ie = 1
2510  itauq = itau
2511  itaup = itauq + m
2512  iwork = itaup + m
2513 *
2514 * Bidiagonalize L in U
2515 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2516 * (RWorkspace: need M)
2517 *
2518  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2519  $ work( itauq ), work( itaup ),
2520  $ work( iwork ), lwork-iwork+1, ierr )
2521 *
2522 * Multiply right vectors bidiagonalizing L by Q in A
2523 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2524 * (RWorkspace: 0)
2525 *
2526  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2527  $ work( itaup ), a, lda, work( iwork ),
2528  $ lwork-iwork+1, ierr )
2529 *
2530 * Generate left vectors bidiagonalizing L in U
2531 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2532 * (RWorkspace: 0)
2533 *
2534  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2535  $ work( iwork ), lwork-iwork+1, ierr )
2536  irwork = ie + m
2537 *
2538 * Perform bidiagonal QR iteration, computing left
2539 * singular vectors of A in U and computing right
2540 * singular vectors of A in A
2541 * (CWorkspace: 0)
2542 * (RWorkspace: need BDSPAC)
2543 *
2544  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2545  $ u, ldu, cdum, 1, rwork( irwork ), info )
2546 *
2547  END IF
2548 *
2549  ELSE IF( wntvs ) THEN
2550 *
2551  IF( wntun ) THEN
2552 *
2553 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2554 * M right singular vectors to be computed in VT and
2555 * no left singular vectors to be computed
2556 *
2557  IF( lwork.GE.m*m+3*m ) THEN
2558 *
2559 * Sufficient workspace for a fast algorithm
2560 *
2561  ir = 1
2562  IF( lwork.GE.wrkbl+lda*m ) THEN
2563 *
2564 * WORK(IR) is LDA by M
2565 *
2566  ldwrkr = lda
2567  ELSE
2568 *
2569 * WORK(IR) is M by M
2570 *
2571  ldwrkr = m
2572  END IF
2573  itau = ir + ldwrkr*m
2574  iwork = itau + m
2575 *
2576 * Compute A=L*Q
2577 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2578 * (RWorkspace: 0)
2579 *
2580  CALL cgelqf( m, n, a, lda, work( itau ),
2581  $ work( iwork ), lwork-iwork+1, ierr )
2582 *
2583 * Copy L to WORK(IR), zeroing out above it
2584 *
2585  CALL clacpy( 'L', m, m, a, lda, work( ir ),
2586  $ ldwrkr )
2587  CALL claset( 'U', m-1, m-1, czero, czero,
2588  $ work( ir+ldwrkr ), ldwrkr )
2589 *
2590 * Generate Q in A
2591 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2592 * (RWorkspace: 0)
2593 *
2594  CALL cunglq( m, n, m, a, lda, work( itau ),
2595  $ work( iwork ), lwork-iwork+1, ierr )
2596  ie = 1
2597  itauq = itau
2598  itaup = itauq + m
2599  iwork = itaup + m
2600 *
2601 * Bidiagonalize L in WORK(IR)
2602 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2603 * (RWorkspace: need M)
2604 *
2605  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
2606  $ rwork( ie ), work( itauq ),
2607  $ work( itaup ), work( iwork ),
2608  $ lwork-iwork+1, ierr )
2609 *
2610 * Generate right vectors bidiagonalizing L in
2611 * WORK(IR)
2612 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2613 * (RWorkspace: 0)
2614 *
2615  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2616  $ work( itaup ), work( iwork ),
2617  $ lwork-iwork+1, ierr )
2618  irwork = ie + m
2619 *
2620 * Perform bidiagonal QR iteration, computing right
2621 * singular vectors of L in WORK(IR)
2622 * (CWorkspace: need M*M)
2623 * (RWorkspace: need BDSPAC)
2624 *
2625  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2626  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2627  $ rwork( irwork ), info )
2628 *
2629 * Multiply right singular vectors of L in WORK(IR) by
2630 * Q in A, storing result in VT
2631 * (CWorkspace: need M*M)
2632 * (RWorkspace: 0)
2633 *
2634  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
2635  $ ldwrkr, a, lda, czero, vt, ldvt )
2636 *
2637  ELSE
2638 *
2639 * Insufficient workspace for a fast algorithm
2640 *
2641  itau = 1
2642  iwork = itau + m
2643 *
2644 * Compute A=L*Q
2645 * (CWorkspace: need 2*M, prefer M+M*NB)
2646 * (RWorkspace: 0)
2647 *
2648  CALL cgelqf( m, n, a, lda, work( itau ),
2649  $ work( iwork ), lwork-iwork+1, ierr )
2650 *
2651 * Copy result to VT
2652 *
2653  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2654 *
2655 * Generate Q in VT
2656 * (CWorkspace: need 2*M, prefer M+M*NB)
2657 * (RWorkspace: 0)
2658 *
2659  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2660  $ work( iwork ), lwork-iwork+1, ierr )
2661  ie = 1
2662  itauq = itau
2663  itaup = itauq + m
2664  iwork = itaup + m
2665 *
2666 * Zero out above L in A
2667 *
2668  CALL claset( 'U', m-1, m-1, czero, czero,
2669  $ a( 1, 2 ), lda )
2670 *
2671 * Bidiagonalize L in A
2672 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2673 * (RWorkspace: need M)
2674 *
2675  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2676  $ work( itauq ), work( itaup ),
2677  $ work( iwork ), lwork-iwork+1, ierr )
2678 *
2679 * Multiply right vectors bidiagonalizing L by Q in VT
2680 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2681 * (RWorkspace: 0)
2682 *
2683  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2684  $ work( itaup ), vt, ldvt,
2685  $ work( iwork ), lwork-iwork+1, ierr )
2686  irwork = ie + m
2687 *
2688 * Perform bidiagonal QR iteration, computing right
2689 * singular vectors of A in VT
2690 * (CWorkspace: 0)
2691 * (RWorkspace: need BDSPAC)
2692 *
2693  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2694  $ ldvt, cdum, 1, cdum, 1,
2695  $ rwork( irwork ), info )
2696 *
2697  END IF
2698 *
2699  ELSE IF( wntuo ) THEN
2700 *
2701 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2702 * M right singular vectors to be computed in VT and
2703 * M left singular vectors to be overwritten on A
2704 *
2705  IF( lwork.GE.2*m*m+3*m ) THEN
2706 *
2707 * Sufficient workspace for a fast algorithm
2708 *
2709  iu = 1
2710  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2711 *
2712 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2713 *
2714  ldwrku = lda
2715  ir = iu + ldwrku*m
2716  ldwrkr = lda
2717  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2718 *
2719 * WORK(IU) is LDA by M and WORK(IR) is M by M
2720 *
2721  ldwrku = lda
2722  ir = iu + ldwrku*m
2723  ldwrkr = m
2724  ELSE
2725 *
2726 * WORK(IU) is M by M and WORK(IR) is M by M
2727 *
2728  ldwrku = m
2729  ir = iu + ldwrku*m
2730  ldwrkr = m
2731  END IF
2732  itau = ir + ldwrkr*m
2733  iwork = itau + m
2734 *
2735 * Compute A=L*Q
2736 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2737 * (RWorkspace: 0)
2738 *
2739  CALL cgelqf( m, n, a, lda, work( itau ),
2740  $ work( iwork ), lwork-iwork+1, ierr )
2741 *
2742 * Copy L to WORK(IU), zeroing out below it
2743 *
2744  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2745  $ ldwrku )
2746  CALL claset( 'U', m-1, m-1, czero, czero,
2747  $ work( iu+ldwrku ), ldwrku )
2748 *
2749 * Generate Q in A
2750 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2751 * (RWorkspace: 0)
2752 *
2753  CALL cunglq( m, n, m, a, lda, work( itau ),
2754  $ work( iwork ), lwork-iwork+1, ierr )
2755  ie = 1
2756  itauq = itau
2757  itaup = itauq + m
2758  iwork = itaup + m
2759 *
2760 * Bidiagonalize L in WORK(IU), copying result to
2761 * WORK(IR)
2762 * (CWorkspace: need 2*M*M+3*M,
2763 * prefer 2*M*M+2*M+2*M*NB)
2764 * (RWorkspace: need M)
2765 *
2766  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2767  $ rwork( ie ), work( itauq ),
2768  $ work( itaup ), work( iwork ),
2769  $ lwork-iwork+1, ierr )
2770  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
2771  $ work( ir ), ldwrkr )
2772 *
2773 * Generate right bidiagonalizing vectors in WORK(IU)
2774 * (CWorkspace: need 2*M*M+3*M-1,
2775 * prefer 2*M*M+2*M+(M-1)*NB)
2776 * (RWorkspace: 0)
2777 *
2778  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2779  $ work( itaup ), work( iwork ),
2780  $ lwork-iwork+1, ierr )
2781 *
2782 * Generate left bidiagonalizing vectors in WORK(IR)
2783 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2784 * (RWorkspace: 0)
2785 *
2786  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2787  $ work( itauq ), work( iwork ),
2788  $ lwork-iwork+1, ierr )
2789  irwork = ie + m
2790 *
2791 * Perform bidiagonal QR iteration, computing left
2792 * singular vectors of L in WORK(IR) and computing
2793 * right singular vectors of L in WORK(IU)
2794 * (CWorkspace: need 2*M*M)
2795 * (RWorkspace: need BDSPAC)
2796 *
2797  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2798  $ work( iu ), ldwrku, work( ir ),
2799  $ ldwrkr, cdum, 1, rwork( irwork ),
2800  $ info )
2801 *
2802 * Multiply right singular vectors of L in WORK(IU) by
2803 * Q in A, storing result in VT
2804 * (CWorkspace: need M*M)
2805 * (RWorkspace: 0)
2806 *
2807  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2808  $ ldwrku, a, lda, czero, vt, ldvt )
2809 *
2810 * Copy left singular vectors of L to A
2811 * (CWorkspace: need M*M)
2812 * (RWorkspace: 0)
2813 *
2814  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
2815  $ lda )
2816 *
2817  ELSE
2818 *
2819 * Insufficient workspace for a fast algorithm
2820 *
2821  itau = 1
2822  iwork = itau + m
2823 *
2824 * Compute A=L*Q, copying result to VT
2825 * (CWorkspace: need 2*M, prefer M+M*NB)
2826 * (RWorkspace: 0)
2827 *
2828  CALL cgelqf( m, n, a, lda, work( itau ),
2829  $ work( iwork ), lwork-iwork+1, ierr )
2830  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2831 *
2832 * Generate Q in VT
2833 * (CWorkspace: need 2*M, prefer M+M*NB)
2834 * (RWorkspace: 0)
2835 *
2836  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2837  $ work( iwork ), lwork-iwork+1, ierr )
2838  ie = 1
2839  itauq = itau
2840  itaup = itauq + m
2841  iwork = itaup + m
2842 *
2843 * Zero out above L in A
2844 *
2845  CALL claset( 'U', m-1, m-1, czero, czero,
2846  $ a( 1, 2 ), lda )
2847 *
2848 * Bidiagonalize L in A
2849 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2850 * (RWorkspace: need M)
2851 *
2852  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2853  $ work( itauq ), work( itaup ),
2854  $ work( iwork ), lwork-iwork+1, ierr )
2855 *
2856 * Multiply right vectors bidiagonalizing L by Q in VT
2857 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2858 * (RWorkspace: 0)
2859 *
2860  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2861  $ work( itaup ), vt, ldvt,
2862  $ work( iwork ), lwork-iwork+1, ierr )
2863 *
2864 * Generate left bidiagonalizing vectors of L in A
2865 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2866 * (RWorkspace: 0)
2867 *
2868  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2869  $ work( iwork ), lwork-iwork+1, ierr )
2870  irwork = ie + m
2871 *
2872 * Perform bidiagonal QR iteration, computing left
2873 * singular vectors of A in A and computing right
2874 * singular vectors of A in VT
2875 * (CWorkspace: 0)
2876 * (RWorkspace: need BDSPAC)
2877 *
2878  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2879  $ ldvt, a, lda, cdum, 1,
2880  $ rwork( irwork ), info )
2881 *
2882  END IF
2883 *
2884  ELSE IF( wntuas ) THEN
2885 *
2886 * Path 6t(N much larger than M, JOBU='S' or 'A',
2887 * JOBVT='S')
2888 * M right singular vectors to be computed in VT and
2889 * M left singular vectors to be computed in U
2890 *
2891  IF( lwork.GE.m*m+3*m ) THEN
2892 *
2893 * Sufficient workspace for a fast algorithm
2894 *
2895  iu = 1
2896  IF( lwork.GE.wrkbl+lda*m ) THEN
2897 *
2898 * WORK(IU) is LDA by N
2899 *
2900  ldwrku = lda
2901  ELSE
2902 *
2903 * WORK(IU) is LDA by M
2904 *
2905  ldwrku = m
2906  END IF
2907  itau = iu + ldwrku*m
2908  iwork = itau + m
2909 *
2910 * Compute A=L*Q
2911 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2912 * (RWorkspace: 0)
2913 *
2914  CALL cgelqf( m, n, a, lda, work( itau ),
2915  $ work( iwork ), lwork-iwork+1, ierr )
2916 *
2917 * Copy L to WORK(IU), zeroing out above it
2918 *
2919  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2920  $ ldwrku )
2921  CALL claset( 'U', m-1, m-1, czero, czero,
2922  $ work( iu+ldwrku ), ldwrku )
2923 *
2924 * Generate Q in A
2925 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2926 * (RWorkspace: 0)
2927 *
2928  CALL cunglq( m, n, m, a, lda, work( itau ),
2929  $ work( iwork ), lwork-iwork+1, ierr )
2930  ie = 1
2931  itauq = itau
2932  itaup = itauq + m
2933  iwork = itaup + m
2934 *
2935 * Bidiagonalize L in WORK(IU), copying result to U
2936 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2937 * (RWorkspace: need M)
2938 *
2939  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2940  $ rwork( ie ), work( itauq ),
2941  $ work( itaup ), work( iwork ),
2942  $ lwork-iwork+1, ierr )
2943  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
2944  $ ldu )
2945 *
2946 * Generate right bidiagonalizing vectors in WORK(IU)
2947 * (CWorkspace: need M*M+3*M-1,
2948 * prefer M*M+2*M+(M-1)*NB)
2949 * (RWorkspace: 0)
2950 *
2951  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2952  $ work( itaup ), work( iwork ),
2953  $ lwork-iwork+1, ierr )
2954 *
2955 * Generate left bidiagonalizing vectors in U
2956 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2957 * (RWorkspace: 0)
2958 *
2959  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2960  $ work( iwork ), lwork-iwork+1, ierr )
2961  irwork = ie + m
2962 *
2963 * Perform bidiagonal QR iteration, computing left
2964 * singular vectors of L in U and computing right
2965 * singular vectors of L in WORK(IU)
2966 * (CWorkspace: need M*M)
2967 * (RWorkspace: need BDSPAC)
2968 *
2969  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2970  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2971  $ rwork( irwork ), info )
2972 *
2973 * Multiply right singular vectors of L in WORK(IU) by
2974 * Q in A, storing result in VT
2975 * (CWorkspace: need M*M)
2976 * (RWorkspace: 0)
2977 *
2978  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2979  $ ldwrku, a, lda, czero, vt, ldvt )
2980 *
2981  ELSE
2982 *
2983 * Insufficient workspace for a fast algorithm
2984 *
2985  itau = 1
2986  iwork = itau + m
2987 *
2988 * Compute A=L*Q, copying result to VT
2989 * (CWorkspace: need 2*M, prefer M+M*NB)
2990 * (RWorkspace: 0)
2991 *
2992  CALL cgelqf( m, n, a, lda, work( itau ),
2993  $ work( iwork ), lwork-iwork+1, ierr )
2994  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2995 *
2996 * Generate Q in VT
2997 * (CWorkspace: need 2*M, prefer M+M*NB)
2998 * (RWorkspace: 0)
2999 *
3000  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
3001  $ work( iwork ), lwork-iwork+1, ierr )
3002 *
3003 * Copy L to U, zeroing out above it
3004 *
3005  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3006  CALL claset( 'U', m-1, m-1, czero, czero,
3007  $ u( 1, 2 ), ldu )
3008  ie = 1
3009  itauq = itau
3010  itaup = itauq + m
3011  iwork = itaup + m
3012 *
3013 * Bidiagonalize L in U
3014 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3015 * (RWorkspace: need M)
3016 *
3017  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3018  $ work( itauq ), work( itaup ),
3019  $ work( iwork ), lwork-iwork+1, ierr )
3020 *
3021 * Multiply right bidiagonalizing vectors in U by Q
3022 * in VT
3023 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3024 * (RWorkspace: 0)
3025 *
3026  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3027  $ work( itaup ), vt, ldvt,
3028  $ work( iwork ), lwork-iwork+1, ierr )
3029 *
3030 * Generate left bidiagonalizing vectors in U
3031 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3032 * (RWorkspace: 0)
3033 *
3034  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3035  $ work( iwork ), lwork-iwork+1, ierr )
3036  irwork = ie + m
3037 *
3038 * Perform bidiagonal QR iteration, computing left
3039 * singular vectors of A in U and computing right
3040 * singular vectors of A in VT
3041 * (CWorkspace: 0)
3042 * (RWorkspace: need BDSPAC)
3043 *
3044  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3045  $ ldvt, u, ldu, cdum, 1,
3046  $ rwork( irwork ), info )
3047 *
3048  END IF
3049 *
3050  END IF
3051 *
3052  ELSE IF( wntva ) THEN
3053 *
3054  IF( wntun ) THEN
3055 *
3056 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3057 * N right singular vectors to be computed in VT and
3058 * no left singular vectors to be computed
3059 *
3060  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3061 *
3062 * Sufficient workspace for a fast algorithm
3063 *
3064  ir = 1
3065  IF( lwork.GE.wrkbl+lda*m ) THEN
3066 *
3067 * WORK(IR) is LDA by M
3068 *
3069  ldwrkr = lda
3070  ELSE
3071 *
3072 * WORK(IR) is M by M
3073 *
3074  ldwrkr = m
3075  END IF
3076  itau = ir + ldwrkr*m
3077  iwork = itau + m
3078 *
3079 * Compute A=L*Q, copying result to VT
3080 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3081 * (RWorkspace: 0)
3082 *
3083  CALL cgelqf( m, n, a, lda, work( itau ),
3084  $ work( iwork ), lwork-iwork+1, ierr )
3085  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3086 *
3087 * Copy L to WORK(IR), zeroing out above it
3088 *
3089  CALL clacpy( 'L', m, m, a, lda, work( ir ),
3090  $ ldwrkr )
3091  CALL claset( 'U', m-1, m-1, czero, czero,
3092  $ work( ir+ldwrkr ), ldwrkr )
3093 *
3094 * Generate Q in VT
3095 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3096 * (RWorkspace: 0)
3097 *
3098  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3099  $ work( iwork ), lwork-iwork+1, ierr )
3100  ie = 1
3101  itauq = itau
3102  itaup = itauq + m
3103  iwork = itaup + m
3104 *
3105 * Bidiagonalize L in WORK(IR)
3106 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3107 * (RWorkspace: need M)
3108 *
3109  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
3110  $ rwork( ie ), work( itauq ),
3111  $ work( itaup ), work( iwork ),
3112  $ lwork-iwork+1, ierr )
3113 *
3114 * Generate right bidiagonalizing vectors in WORK(IR)
3115 * (CWorkspace: need M*M+3*M-1,
3116 * prefer M*M+2*M+(M-1)*NB)
3117 * (RWorkspace: 0)
3118 *
3119  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
3120  $ work( itaup ), work( iwork ),
3121  $ lwork-iwork+1, ierr )
3122  irwork = ie + m
3123 *
3124 * Perform bidiagonal QR iteration, computing right
3125 * singular vectors of L in WORK(IR)
3126 * (CWorkspace: need M*M)
3127 * (RWorkspace: need BDSPAC)
3128 *
3129  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3130  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3131  $ rwork( irwork ), info )
3132 *
3133 * Multiply right singular vectors of L in WORK(IR) by
3134 * Q in VT, storing result in A
3135 * (CWorkspace: need M*M)
3136 * (RWorkspace: 0)
3137 *
3138  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
3139  $ ldwrkr, vt, ldvt, czero, a, lda )
3140 *
3141 * Copy right singular vectors of A from A to VT
3142 *
3143  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3144 *
3145  ELSE
3146 *
3147 * Insufficient workspace for a fast algorithm
3148 *
3149  itau = 1
3150  iwork = itau + m
3151 *
3152 * Compute A=L*Q, copying result to VT
3153 * (CWorkspace: need 2*M, prefer M+M*NB)
3154 * (RWorkspace: 0)
3155 *
3156  CALL cgelqf( m, n, a, lda, work( itau ),
3157  $ work( iwork ), lwork-iwork+1, ierr )
3158  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3159 *
3160 * Generate Q in VT
3161 * (CWorkspace: need M+N, prefer M+N*NB)
3162 * (RWorkspace: 0)
3163 *
3164  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3165  $ work( iwork ), lwork-iwork+1, ierr )
3166  ie = 1
3167  itauq = itau
3168  itaup = itauq + m
3169  iwork = itaup + m
3170 *
3171 * Zero out above L in A
3172 *
3173  CALL claset( 'U', m-1, m-1, czero, czero,
3174  $ a( 1, 2 ), lda )
3175 *
3176 * Bidiagonalize L in A
3177 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3178 * (RWorkspace: need M)
3179 *
3180  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3181  $ work( itauq ), work( itaup ),
3182  $ work( iwork ), lwork-iwork+1, ierr )
3183 *
3184 * Multiply right bidiagonalizing vectors in A by Q
3185 * in VT
3186 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3187 * (RWorkspace: 0)
3188 *
3189  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3190  $ work( itaup ), vt, ldvt,
3191  $ work( iwork ), lwork-iwork+1, ierr )
3192  irwork = ie + m
3193 *
3194 * Perform bidiagonal QR iteration, computing right
3195 * singular vectors of A in VT
3196 * (CWorkspace: 0)
3197 * (RWorkspace: need BDSPAC)
3198 *
3199  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3200  $ ldvt, cdum, 1, cdum, 1,
3201  $ rwork( irwork ), info )
3202 *
3203  END IF
3204 *
3205  ELSE IF( wntuo ) THEN
3206 *
3207 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3208 * N right singular vectors to be computed in VT and
3209 * M left singular vectors to be overwritten on A
3210 *
3211  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3212 *
3213 * Sufficient workspace for a fast algorithm
3214 *
3215  iu = 1
3216  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3217 *
3218 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3219 *
3220  ldwrku = lda
3221  ir = iu + ldwrku*m
3222  ldwrkr = lda
3223  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3224 *
3225 * WORK(IU) is LDA by M and WORK(IR) is M by M
3226 *
3227  ldwrku = lda
3228  ir = iu + ldwrku*m
3229  ldwrkr = m
3230  ELSE
3231 *
3232 * WORK(IU) is M by M and WORK(IR) is M by M
3233 *
3234  ldwrku = m
3235  ir = iu + ldwrku*m
3236  ldwrkr = m
3237  END IF
3238  itau = ir + ldwrkr*m
3239  iwork = itau + m
3240 *
3241 * Compute A=L*Q, copying result to VT
3242 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3243 * (RWorkspace: 0)
3244 *
3245  CALL cgelqf( m, n, a, lda, work( itau ),
3246  $ work( iwork ), lwork-iwork+1, ierr )
3247  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3248 *
3249 * Generate Q in VT
3250 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3251 * (RWorkspace: 0)
3252 *
3253  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3254  $ work( iwork ), lwork-iwork+1, ierr )
3255 *
3256 * Copy L to WORK(IU), zeroing out above it
3257 *
3258  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3259  $ ldwrku )
3260  CALL claset( 'U', m-1, m-1, czero, czero,
3261  $ work( iu+ldwrku ), ldwrku )
3262  ie = 1
3263  itauq = itau
3264  itaup = itauq + m
3265  iwork = itaup + m
3266 *
3267 * Bidiagonalize L in WORK(IU), copying result to
3268 * WORK(IR)
3269 * (CWorkspace: need 2*M*M+3*M,
3270 * prefer 2*M*M+2*M+2*M*NB)
3271 * (RWorkspace: need M)
3272 *
3273  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3274  $ rwork( ie ), work( itauq ),
3275  $ work( itaup ), work( iwork ),
3276  $ lwork-iwork+1, ierr )
3277  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
3278  $ work( ir ), ldwrkr )
3279 *
3280 * Generate right bidiagonalizing vectors in WORK(IU)
3281 * (CWorkspace: need 2*M*M+3*M-1,
3282 * prefer 2*M*M+2*M+(M-1)*NB)
3283 * (RWorkspace: 0)
3284 *
3285  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3286  $ work( itaup ), work( iwork ),
3287  $ lwork-iwork+1, ierr )
3288 *
3289 * Generate left bidiagonalizing vectors in WORK(IR)
3290 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3291 * (RWorkspace: 0)
3292 *
3293  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3294  $ work( itauq ), work( iwork ),
3295  $ lwork-iwork+1, ierr )
3296  irwork = ie + m
3297 *
3298 * Perform bidiagonal QR iteration, computing left
3299 * singular vectors of L in WORK(IR) and computing
3300 * right singular vectors of L in WORK(IU)
3301 * (CWorkspace: need 2*M*M)
3302 * (RWorkspace: need BDSPAC)
3303 *
3304  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3305  $ work( iu ), ldwrku, work( ir ),
3306  $ ldwrkr, cdum, 1, rwork( irwork ),
3307  $ info )
3308 *
3309 * Multiply right singular vectors of L in WORK(IU) by
3310 * Q in VT, storing result in A
3311 * (CWorkspace: need M*M)
3312 * (RWorkspace: 0)
3313 *
3314  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3315  $ ldwrku, vt, ldvt, czero, a, lda )
3316 *
3317 * Copy right singular vectors of A from A to VT
3318 *
3319  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3320 *
3321 * Copy left singular vectors of A from WORK(IR) to A
3322 *
3323  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
3324  $ lda )
3325 *
3326  ELSE
3327 *
3328 * Insufficient workspace for a fast algorithm
3329 *
3330  itau = 1
3331  iwork = itau + m
3332 *
3333 * Compute A=L*Q, copying result to VT
3334 * (CWorkspace: need 2*M, prefer M+M*NB)
3335 * (RWorkspace: 0)
3336 *
3337  CALL cgelqf( m, n, a, lda, work( itau ),
3338  $ work( iwork ), lwork-iwork+1, ierr )
3339  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3340 *
3341 * Generate Q in VT
3342 * (CWorkspace: need M+N, prefer M+N*NB)
3343 * (RWorkspace: 0)
3344 *
3345  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3346  $ work( iwork ), lwork-iwork+1, ierr )
3347  ie = 1
3348  itauq = itau
3349  itaup = itauq + m
3350  iwork = itaup + m
3351 *
3352 * Zero out above L in A
3353 *
3354  CALL claset( 'U', m-1, m-1, czero, czero,
3355  $ a( 1, 2 ), lda )
3356 *
3357 * Bidiagonalize L in A
3358 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3359 * (RWorkspace: need M)
3360 *
3361  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3362  $ work( itauq ), work( itaup ),
3363  $ work( iwork ), lwork-iwork+1, ierr )
3364 *
3365 * Multiply right bidiagonalizing vectors in A by Q
3366 * in VT
3367 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3368 * (RWorkspace: 0)
3369 *
3370  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3371  $ work( itaup ), vt, ldvt,
3372  $ work( iwork ), lwork-iwork+1, ierr )
3373 *
3374 * Generate left bidiagonalizing vectors in A
3375 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3376 * (RWorkspace: 0)
3377 *
3378  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
3379  $ work( iwork ), lwork-iwork+1, ierr )
3380  irwork = ie + m
3381 *
3382 * Perform bidiagonal QR iteration, computing left
3383 * singular vectors of A in A and computing right
3384 * singular vectors of A in VT
3385 * (CWorkspace: 0)
3386 * (RWorkspace: need BDSPAC)
3387 *
3388  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3389  $ ldvt, a, lda, cdum, 1,
3390  $ rwork( irwork ), info )
3391 *
3392  END IF
3393 *
3394  ELSE IF( wntuas ) THEN
3395 *
3396 * Path 9t(N much larger than M, JOBU='S' or 'A',
3397 * JOBVT='A')
3398 * N right singular vectors to be computed in VT and
3399 * M left singular vectors to be computed in U
3400 *
3401  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3402 *
3403 * Sufficient workspace for a fast algorithm
3404 *
3405  iu = 1
3406  IF( lwork.GE.wrkbl+lda*m ) THEN
3407 *
3408 * WORK(IU) is LDA by M
3409 *
3410  ldwrku = lda
3411  ELSE
3412 *
3413 * WORK(IU) is M by M
3414 *
3415  ldwrku = m
3416  END IF
3417  itau = iu + ldwrku*m
3418  iwork = itau + m
3419 *
3420 * Compute A=L*Q, copying result to VT
3421 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3422 * (RWorkspace: 0)
3423 *
3424  CALL cgelqf( m, n, a, lda, work( itau ),
3425  $ work( iwork ), lwork-iwork+1, ierr )
3426  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3427 *
3428 * Generate Q in VT
3429 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3430 * (RWorkspace: 0)
3431 *
3432  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3433  $ work( iwork ), lwork-iwork+1, ierr )
3434 *
3435 * Copy L to WORK(IU), zeroing out above it
3436 *
3437  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3438  $ ldwrku )
3439  CALL claset( 'U', m-1, m-1, czero, czero,
3440  $ work( iu+ldwrku ), ldwrku )
3441  ie = 1
3442  itauq = itau
3443  itaup = itauq + m
3444  iwork = itaup + m
3445 *
3446 * Bidiagonalize L in WORK(IU), copying result to U
3447 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3448 * (RWorkspace: need M)
3449 *
3450  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3451  $ rwork( ie ), work( itauq ),
3452  $ work( itaup ), work( iwork ),
3453  $ lwork-iwork+1, ierr )
3454  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
3455  $ ldu )
3456 *
3457 * Generate right bidiagonalizing vectors in WORK(IU)
3458 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3459 * (RWorkspace: 0)
3460 *
3461  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3462  $ work( itaup ), work( iwork ),
3463  $ lwork-iwork+1, ierr )
3464 *
3465 * Generate left bidiagonalizing vectors in U
3466 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3467 * (RWorkspace: 0)
3468 *
3469  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3470  $ work( iwork ), lwork-iwork+1, ierr )
3471  irwork = ie + m
3472 *
3473 * Perform bidiagonal QR iteration, computing left
3474 * singular vectors of L in U and computing right
3475 * singular vectors of L in WORK(IU)
3476 * (CWorkspace: need M*M)
3477 * (RWorkspace: need BDSPAC)
3478 *
3479  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3480  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3481  $ rwork( irwork ), info )
3482 *
3483 * Multiply right singular vectors of L in WORK(IU) by
3484 * Q in VT, storing result in A
3485 * (CWorkspace: need M*M)
3486 * (RWorkspace: 0)
3487 *
3488  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3489  $ ldwrku, vt, ldvt, czero, a, lda )
3490 *
3491 * Copy right singular vectors of A from A to VT
3492 *
3493  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3494 *
3495  ELSE
3496 *
3497 * Insufficient workspace for a fast algorithm
3498 *
3499  itau = 1
3500  iwork = itau + m
3501 *
3502 * Compute A=L*Q, copying result to VT
3503 * (CWorkspace: need 2*M, prefer M+M*NB)
3504 * (RWorkspace: 0)
3505 *
3506  CALL cgelqf( m, n, a, lda, work( itau ),
3507  $ work( iwork ), lwork-iwork+1, ierr )
3508  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3509 *
3510 * Generate Q in VT
3511 * (CWorkspace: need M+N, prefer M+N*NB)
3512 * (RWorkspace: 0)
3513 *
3514  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3515  $ work( iwork ), lwork-iwork+1, ierr )
3516 *
3517 * Copy L to U, zeroing out above it
3518 *
3519  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3520  CALL claset( 'U', m-1, m-1, czero, czero,
3521  $ u( 1, 2 ), ldu )
3522  ie = 1
3523  itauq = itau
3524  itaup = itauq + m
3525  iwork = itaup + m
3526 *
3527 * Bidiagonalize L in U
3528 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3529 * (RWorkspace: need M)
3530 *
3531  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3532  $ work( itauq ), work( itaup ),
3533  $ work( iwork ), lwork-iwork+1, ierr )
3534 *
3535 * Multiply right bidiagonalizing vectors in U by Q
3536 * in VT
3537 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3538 * (RWorkspace: 0)
3539 *
3540  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3541  $ work( itaup ), vt, ldvt,
3542  $ work( iwork ), lwork-iwork+1, ierr )
3543 *
3544 * Generate left bidiagonalizing vectors in U
3545 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3546 * (RWorkspace: 0)
3547 *
3548  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3549  $ work( iwork ), lwork-iwork+1, ierr )
3550  irwork = ie + m
3551 *
3552 * Perform bidiagonal QR iteration, computing left
3553 * singular vectors of A in U and computing right
3554 * singular vectors of A in VT
3555 * (CWorkspace: 0)
3556 * (RWorkspace: need BDSPAC)
3557 *
3558  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3559  $ ldvt, u, ldu, cdum, 1,
3560  $ rwork( irwork ), info )
3561 *
3562  END IF
3563 *
3564  END IF
3565 *
3566  END IF
3567 *
3568  ELSE
3569 *
3570 * N .LT. MNTHR
3571 *
3572 * Path 10t(N greater than M, but not much larger)
3573 * Reduce to bidiagonal form without LQ decomposition
3574 *
3575  ie = 1
3576  itauq = 1
3577  itaup = itauq + m
3578  iwork = itaup + m
3579 *
3580 * Bidiagonalize A
3581 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3582 * (RWorkspace: M)
3583 *
3584  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3585  $ work( itaup ), work( iwork ), lwork-iwork+1,
3586  $ ierr )
3587  IF( wntuas ) THEN
3588 *
3589 * If left singular vectors desired in U, copy result to U
3590 * and generate left bidiagonalizing vectors in U
3591 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3592 * (RWorkspace: 0)
3593 *
3594  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3595  CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3596  $ work( iwork ), lwork-iwork+1, ierr )
3597  END IF
3598  IF( wntvas ) THEN
3599 *
3600 * If right singular vectors desired in VT, copy result to
3601 * VT and generate right bidiagonalizing vectors in VT
3602 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3603 * (RWorkspace: 0)
3604 *
3605  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3606  IF( wntva )
3607  $ nrvt = n
3608  IF( wntvs )
3609  $ nrvt = m
3610  CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3611  $ work( iwork ), lwork-iwork+1, ierr )
3612  END IF
3613  IF( wntuo ) THEN
3614 *
3615 * If left singular vectors desired in A, generate left
3616 * bidiagonalizing vectors in A
3617 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3618 * (RWorkspace: 0)
3619 *
3620  CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),
3621  $ work( iwork ), lwork-iwork+1, ierr )
3622  END IF
3623  IF( wntvo ) THEN
3624 *
3625 * If right singular vectors desired in A, generate right
3626 * bidiagonalizing vectors in A
3627 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3628 * (RWorkspace: 0)
3629 *
3630  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
3631  $ work( iwork ), lwork-iwork+1, ierr )
3632  END IF
3633  irwork = ie + m
3634  IF( wntuas .OR. wntuo )
3635  $ nru = m
3636  IF( wntun )
3637  $ nru = 0
3638  IF( wntvas .OR. wntvo )
3639  $ ncvt = n
3640  IF( wntvn )
3641  $ ncvt = 0
3642  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3643 *
3644 * Perform bidiagonal QR iteration, if desired, computing
3645 * left singular vectors in U and computing right singular
3646 * vectors in VT
3647 * (CWorkspace: 0)
3648 * (RWorkspace: need BDSPAC)
3649 *
3650  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3651  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3652  $ info )
3653  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3654 *
3655 * Perform bidiagonal QR iteration, if desired, computing
3656 * left singular vectors in U and computing right singular
3657 * vectors in A
3658 * (CWorkspace: 0)
3659 * (RWorkspace: need BDSPAC)
3660 *
3661  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3662  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3663  $ info )
3664  ELSE
3665 *
3666 * Perform bidiagonal QR iteration, if desired, computing
3667 * left singular vectors in A and computing right singular
3668 * vectors in VT
3669 * (CWorkspace: 0)
3670 * (RWorkspace: need BDSPAC)
3671 *
3672  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3673  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3674  $ info )
3675  END IF
3676 *
3677  END IF
3678 *
3679  END IF
3680 *
3681 * Undo scaling if necessary
3682 *
3683  IF( iscl.EQ.1 ) THEN
3684  IF( anrm.GT.bignum )
3685  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3686  $ ierr )
3687  IF( info.NE.0 .AND. anrm.GT.bignum )
3688  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3689  $ rwork( ie ), minmn, ierr )
3690  IF( anrm.LT.smlnum )
3691  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3692  $ ierr )
3693  IF( info.NE.0 .AND. anrm.LT.smlnum )
3694  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3695  $ rwork( ie ), minmn, ierr )
3696  END IF
3697 *
3698 * Return optimal workspace in WORK(1)
3699 *
3700  work( 1 ) = maxwrk
3701 *
3702  RETURN
3703 *
3704 * End of CGESVD
3705 *
3706  END
subroutine cungbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGBR
Definition: cungbr.f:159
subroutine cbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, RWORK, INFO)
CBDSQR
Definition: cbdsqr.f:224
subroutine claset(UPLO, M, N, ALPHA, BETA, A, LDA)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: claset.f:108
subroutine cungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGQR
Definition: cungqr.f:130
subroutine clascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: clascl.f:145
subroutine cgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGELQF
Definition: cgelqf.f:137
subroutine cunmbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMBR
Definition: cunmbr.f:199
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine cgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGEQRF
Definition: cgeqrf.f:138
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:145
subroutine cgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
CGEBRD
Definition: cgebrd.f:208
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
Definition: clacpy.f:105
subroutine cgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO)
CGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: cgesvd.f:216
subroutine cunglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGLQ
Definition: cunglq.f:129
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189