LAPACK  3.5.0
LAPACK: Linear Algebra PACKage
 All Classes Files Functions Variables Typedefs Macros
cgesvd.f
Go to the documentation of this file.
1 *> \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL RWORK( * ), S( * )
30 * COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> CGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is REAL array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is REAL array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if CBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \date April 2012
210 *
211 *> \ingroup complexGEsing
212 *
213 * =====================================================================
214  SUBROUTINE cgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
215  $ work, lwork, rwork, info )
216 *
217 * -- LAPACK driver routine (version 3.4.1) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220 * April 2012
221 *
222 * .. Scalar Arguments ..
223  CHARACTER jobu, jobvt
224  INTEGER info, lda, ldu, ldvt, lwork, m, n
225 * ..
226 * .. Array Arguments ..
227  REAL rwork( * ), s( * )
228  COMPLEX a( lda, * ), u( ldu, * ), vt( ldvt, * ),
229  $ work( * )
230 * ..
231 *
232 * =====================================================================
233 *
234 * .. Parameters ..
235  COMPLEX czero, cone
236  parameter( czero = ( 0.0e0, 0.0e0 ),
237  $ cone = ( 1.0e0, 0.0e0 ) )
238  REAL zero, one
239  parameter( zero = 0.0e0, one = 1.0e0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL lquery, wntua, wntuas, wntun, wntuo, wntus,
243  $ wntva, wntvas, wntvn, wntvo, wntvs
244  INTEGER blk, chunk, i, ie, ierr, ir, irwork, iscl,
245  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
246  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
247  $ nrvt, wrkbl
248  INTEGER lwork_cgeqrf, lwork_cungqr_n, lwork_cungqr_m,
249  $ lwork_cgebrd, lwork_cungbr_p, lwork_cungbr_q,
250  $ lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m
251  REAL anrm, bignum, eps, smlnum
252 * ..
253 * .. Local Arrays ..
254  REAL dum( 1 )
255  COMPLEX cdum( 1 )
256 * ..
257 * .. External Subroutines ..
258  EXTERNAL cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf, clacpy,
260  $ slascl, xerbla
261 * ..
262 * .. External Functions ..
263  LOGICAL lsame
264  INTEGER ilaenv
265  REAL clange, slamch
266  EXTERNAL lsame, ilaenv, clange, slamch
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, min, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input arguments
274 *
275  info = 0
276  minmn = min( m, n )
277  wntua = lsame( jobu, 'A' )
278  wntus = lsame( jobu, 'S' )
279  wntuas = wntua .OR. wntus
280  wntuo = lsame( jobu, 'O' )
281  wntun = lsame( jobu, 'N' )
282  wntva = lsame( jobvt, 'A' )
283  wntvs = lsame( jobvt, 'S' )
284  wntvas = wntva .OR. wntvs
285  wntvo = lsame( jobvt, 'O' )
286  wntvn = lsame( jobvt, 'N' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
290  info = -1
291  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
292  $ ( wntvo .AND. wntuo ) ) THEN
293  info = -2
294  ELSE IF( m.LT.0 ) THEN
295  info = -3
296  ELSE IF( n.LT.0 ) THEN
297  info = -4
298  ELSE IF( lda.LT.max( 1, m ) ) THEN
299  info = -6
300  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
301  info = -9
302  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
303  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
304  info = -11
305  END IF
306 *
307 * Compute workspace
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  IF( m.GE.n .AND. minmn.GT.0 ) THEN
319 *
320 * Space needed for ZBDSQR is BDSPAC = 5*N
321 *
322  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
323 * Compute space needed for CGEQRF
324  CALL cgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
325  lwork_cgeqrf=dum(1)
326 * Compute space needed for CUNGQR
327  CALL cungqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
328  lwork_cungqr_n=dum(1)
329  CALL cungqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
330  lwork_cungqr_m=dum(1)
331 * Compute space needed for CGEBRD
332  CALL cgebrd( n, n, a, lda, s, dum(1), dum(1),
333  $ dum(1), dum(1), -1, ierr )
334  lwork_cgebrd=dum(1)
335 * Compute space needed for CUNGBR
336  CALL cungbr( 'P', n, n, n, a, lda, dum(1),
337  $ dum(1), -1, ierr )
338  lwork_cungbr_p=dum(1)
339  CALL cungbr( 'Q', n, n, n, a, lda, dum(1),
340  $ dum(1), -1, ierr )
341  lwork_cungbr_q=dum(1)
342 *
343  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
344  IF( m.GE.mnthr ) THEN
345  IF( wntun ) THEN
346 *
347 * Path 1 (M much larger than N, JOBU='N')
348 *
349  maxwrk = n + lwork_cgeqrf
350  maxwrk = max( maxwrk, 2*n+lwork_cgebrd )
351  IF( wntvo .OR. wntvas )
352  $ maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
353  minwrk = 3*n
354  ELSE IF( wntuo .AND. wntvn ) THEN
355 *
356 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
357 *
358  wrkbl = n + lwork_cgeqrf
359  wrkbl = max( wrkbl, n+lwork_cungqr_n )
360  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
361  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
362  maxwrk = max( n*n+wrkbl, n*n+m*n )
363  minwrk = 2*n + m
364  ELSE IF( wntuo .AND. wntvas ) THEN
365 *
366 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
367 * 'A')
368 *
369  wrkbl = n + lwork_cgeqrf
370  wrkbl = max( wrkbl, n+lwork_cungqr_n )
371  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
372  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
373  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
374  maxwrk = max( n*n+wrkbl, n*n+m*n )
375  minwrk = 2*n + m
376  ELSE IF( wntus .AND. wntvn ) THEN
377 *
378 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
379 *
380  wrkbl = n + lwork_cgeqrf
381  wrkbl = max( wrkbl, n+lwork_cungqr_n )
382  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
383  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
384  maxwrk = n*n + wrkbl
385  minwrk = 2*n + m
386  ELSE IF( wntus .AND. wntvo ) THEN
387 *
388 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
389 *
390  wrkbl = n + lwork_cgeqrf
391  wrkbl = max( wrkbl, n+lwork_cungqr_n )
392  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
393  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
394  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
395  maxwrk = 2*n*n + wrkbl
396  minwrk = 2*n + m
397  ELSE IF( wntus .AND. wntvas ) THEN
398 *
399 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
400 * 'A')
401 *
402  wrkbl = n + lwork_cgeqrf
403  wrkbl = max( wrkbl, n+lwork_cungqr_n )
404  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
405  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
406  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
407  maxwrk = n*n + wrkbl
408  minwrk = 2*n + m
409  ELSE IF( wntua .AND. wntvn ) THEN
410 *
411 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
412 *
413  wrkbl = n + lwork_cgeqrf
414  wrkbl = max( wrkbl, n+lwork_cungqr_m )
415  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
416  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
417  maxwrk = n*n + wrkbl
418  minwrk = 2*n + m
419  ELSE IF( wntua .AND. wntvo ) THEN
420 *
421 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
422 *
423  wrkbl = n + lwork_cgeqrf
424  wrkbl = max( wrkbl, n+lwork_cungqr_m )
425  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
426  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
427  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = 2*n + m
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_cgeqrf
436  wrkbl = max( wrkbl, n+lwork_cungqr_m )
437  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
438  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
439  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
440  maxwrk = n*n + wrkbl
441  minwrk = 2*n + m
442  END IF
443  ELSE
444 *
445 * Path 10 (M at least N, but not much larger)
446 *
447  CALL cgebrd( m, n, a, lda, s, dum(1), dum(1),
448  $ dum(1), dum(1), -1, ierr )
449  lwork_cgebrd=dum(1)
450  maxwrk = 2*n + lwork_cgebrd
451  IF( wntus .OR. wntuo ) THEN
452  CALL cungbr( 'Q', m, n, n, a, lda, dum(1),
453  $ dum(1), -1, ierr )
454  lwork_cungbr_q=dum(1)
455  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
456  END IF
457  IF( wntua ) THEN
458  CALL cungbr( 'Q', m, m, n, a, lda, dum(1),
459  $ dum(1), -1, ierr )
460  lwork_cungbr_q=dum(1)
461  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
462  END IF
463  IF( .NOT.wntvn ) THEN
464  maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
465  minwrk = 2*n + m
466  END IF
467  END IF
468  ELSE IF( minmn.GT.0 ) THEN
469 *
470 * Space needed for CBDSQR is BDSPAC = 5*M
471 *
472  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
473 * Compute space needed for CGELQF
474  CALL cgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475  lwork_cgelqf=dum(1)
476 * Compute space needed for CUNGLQ
477  CALL cunglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478  lwork_cunglq_n=dum(1)
479  CALL cunglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480  lwork_cunglq_m=dum(1)
481 * Compute space needed for CGEBRD
482  CALL cgebrd( m, m, a, lda, s, dum(1), dum(1),
483  $ dum(1), dum(1), -1, ierr )
484  lwork_cgebrd=dum(1)
485 * Compute space needed for CUNGBR P
486  CALL cungbr( 'P', m, m, m, a, n, dum(1),
487  $ dum(1), -1, ierr )
488  lwork_cungbr_p=dum(1)
489 * Compute space needed for CUNGBR Q
490  CALL cungbr( 'Q', m, m, m, a, n, dum(1),
491  $ dum(1), -1, ierr )
492  lwork_cungbr_q=dum(1)
493  IF( n.GE.mnthr ) THEN
494  IF( wntvn ) THEN
495 *
496 * Path 1t(N much larger than M, JOBVT='N')
497 *
498  maxwrk = m + lwork_cgelqf
499  maxwrk = max( maxwrk, 2*m+lwork_cgebrd )
500  IF( wntuo .OR. wntuas )
501  $ maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
502  minwrk = 3*m
503  ELSE IF( wntvo .AND. wntun ) THEN
504 *
505 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
506 *
507  wrkbl = m + lwork_cgelqf
508  wrkbl = max( wrkbl, m+lwork_cunglq_m )
509  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
510  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
511  maxwrk = max( m*m+wrkbl, m*m+m*n )
512  minwrk = 2*m + n
513  ELSE IF( wntvo .AND. wntuas ) THEN
514 *
515 * Path 3t(N much larger than M, JOBU='S' or 'A',
516 * JOBVT='O')
517 *
518  wrkbl = m + lwork_cgelqf
519  wrkbl = max( wrkbl, m+lwork_cunglq_m )
520  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
521  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
522  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
523  maxwrk = max( m*m+wrkbl, m*m+m*n )
524  minwrk = 2*m + n
525  ELSE IF( wntvs .AND. wntun ) THEN
526 *
527 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
528 *
529  wrkbl = m + lwork_cgelqf
530  wrkbl = max( wrkbl, m+lwork_cunglq_m )
531  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
532  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
533  maxwrk = m*m + wrkbl
534  minwrk = 2*m + n
535  ELSE IF( wntvs .AND. wntuo ) THEN
536 *
537 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
538 *
539  wrkbl = m + lwork_cgelqf
540  wrkbl = max( wrkbl, m+lwork_cunglq_m )
541  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
542  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
543  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
544  maxwrk = 2*m*m + wrkbl
545  minwrk = 2*m + n
546  ELSE IF( wntvs .AND. wntuas ) THEN
547 *
548 * Path 6t(N much larger than M, JOBU='S' or 'A',
549 * JOBVT='S')
550 *
551  wrkbl = m + lwork_cgelqf
552  wrkbl = max( wrkbl, m+lwork_cunglq_m )
553  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
554  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
555  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
556  maxwrk = m*m + wrkbl
557  minwrk = 2*m + n
558  ELSE IF( wntva .AND. wntun ) THEN
559 *
560 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
561 *
562  wrkbl = m + lwork_cgelqf
563  wrkbl = max( wrkbl, m+lwork_cunglq_n )
564  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
565  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
566  maxwrk = m*m + wrkbl
567  minwrk = 2*m + n
568  ELSE IF( wntva .AND. wntuo ) THEN
569 *
570 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
571 *
572  wrkbl = m + lwork_cgelqf
573  wrkbl = max( wrkbl, m+lwork_cunglq_n )
574  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
575  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
576  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
577  maxwrk = 2*m*m + wrkbl
578  minwrk = 2*m + n
579  ELSE IF( wntva .AND. wntuas ) THEN
580 *
581 * Path 9t(N much larger than M, JOBU='S' or 'A',
582 * JOBVT='A')
583 *
584  wrkbl = m + lwork_cgelqf
585  wrkbl = max( wrkbl, m+lwork_cunglq_n )
586  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
587  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
588  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
589  maxwrk = m*m + wrkbl
590  minwrk = 2*m + n
591  END IF
592  ELSE
593 *
594 * Path 10t(N greater than M, but not much larger)
595 *
596  CALL cgebrd( m, n, a, lda, s, dum(1), dum(1),
597  $ dum(1), dum(1), -1, ierr )
598  lwork_cgebrd=dum(1)
599  maxwrk = 2*m + lwork_cgebrd
600  IF( wntvs .OR. wntvo ) THEN
601 * Compute space needed for CUNGBR P
602  CALL cungbr( 'P', m, n, m, a, n, dum(1),
603  $ dum(1), -1, ierr )
604  lwork_cungbr_p=dum(1)
605  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
606  END IF
607  IF( wntva ) THEN
608  CALL cungbr( 'P', n, n, m, a, n, dum(1),
609  $ dum(1), -1, ierr )
610  lwork_cungbr_p=dum(1)
611  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
612  END IF
613  IF( .NOT.wntun ) THEN
614  maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
615  minwrk = 2*m + n
616  END IF
617  END IF
618  END IF
619  maxwrk = max( minwrk, maxwrk )
620  work( 1 ) = maxwrk
621 *
622  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
623  info = -13
624  END IF
625  END IF
626 *
627  IF( info.NE.0 ) THEN
628  CALL xerbla( 'CGESVD', -info )
629  RETURN
630  ELSE IF( lquery ) THEN
631  RETURN
632  END IF
633 *
634 * Quick return if possible
635 *
636  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
637  RETURN
638  END IF
639 *
640 * Get machine constants
641 *
642  eps = slamch( 'P' )
643  smlnum = sqrt( slamch( 'S' ) ) / eps
644  bignum = one / smlnum
645 *
646 * Scale A if max element outside range [SMLNUM,BIGNUM]
647 *
648  anrm = clange( 'M', m, n, a, lda, dum )
649  iscl = 0
650  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
651  iscl = 1
652  CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
653  ELSE IF( anrm.GT.bignum ) THEN
654  iscl = 1
655  CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
656  END IF
657 *
658  IF( m.GE.n ) THEN
659 *
660 * A has at least as many rows as columns. If A has sufficiently
661 * more rows than columns, first reduce using the QR
662 * decomposition (if sufficient workspace available)
663 *
664  IF( m.GE.mnthr ) THEN
665 *
666  IF( wntun ) THEN
667 *
668 * Path 1 (M much larger than N, JOBU='N')
669 * No left singular vectors to be computed
670 *
671  itau = 1
672  iwork = itau + n
673 *
674 * Compute A=Q*R
675 * (CWorkspace: need 2*N, prefer N+N*NB)
676 * (RWorkspace: need 0)
677 *
678  CALL cgeqrf( m, n, a, lda, work( itau ), work( iwork ),
679  $ lwork-iwork+1, ierr )
680 *
681 * Zero out below R
682 *
683  CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
684  $ lda )
685  ie = 1
686  itauq = 1
687  itaup = itauq + n
688  iwork = itaup + n
689 *
690 * Bidiagonalize R in A
691 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
692 * (RWorkspace: need N)
693 *
694  CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
695  $ work( itaup ), work( iwork ), lwork-iwork+1,
696  $ ierr )
697  ncvt = 0
698  IF( wntvo .OR. wntvas ) THEN
699 *
700 * If right singular vectors desired, generate P'.
701 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
702 * (RWorkspace: 0)
703 *
704  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
705  $ work( iwork ), lwork-iwork+1, ierr )
706  ncvt = n
707  END IF
708  irwork = ie + n
709 *
710 * Perform bidiagonal QR iteration, computing right
711 * singular vectors of A in A if desired
712 * (CWorkspace: 0)
713 * (RWorkspace: need BDSPAC)
714 *
715  CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
716  $ cdum, 1, cdum, 1, rwork( irwork ), info )
717 *
718 * If right singular vectors desired in VT, copy them there
719 *
720  IF( wntvas )
721  $ CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
722 *
723  ELSE IF( wntuo .AND. wntvn ) THEN
724 *
725 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
726 * N left singular vectors to be overwritten on A and
727 * no right singular vectors to be computed
728 *
729  IF( lwork.GE.n*n+3*n ) THEN
730 *
731 * Sufficient workspace for a fast algorithm
732 *
733  ir = 1
734  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
735 *
736 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
737 *
738  ldwrku = lda
739  ldwrkr = lda
740  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
741 *
742 * WORK(IU) is LDA by N, WORK(IR) is N by N
743 *
744  ldwrku = lda
745  ldwrkr = n
746  ELSE
747 *
748 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
749 *
750  ldwrku = ( lwork-n*n ) / n
751  ldwrkr = n
752  END IF
753  itau = ir + ldwrkr*n
754  iwork = itau + n
755 *
756 * Compute A=Q*R
757 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
758 * (RWorkspace: 0)
759 *
760  CALL cgeqrf( m, n, a, lda, work( itau ),
761  $ work( iwork ), lwork-iwork+1, ierr )
762 *
763 * Copy R to WORK(IR) and zero out below it
764 *
765  CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
766  CALL claset( 'L', n-1, n-1, czero, czero,
767  $ work( ir+1 ), ldwrkr )
768 *
769 * Generate Q in A
770 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
771 * (RWorkspace: 0)
772 *
773  CALL cungqr( m, n, n, a, lda, work( itau ),
774  $ work( iwork ), lwork-iwork+1, ierr )
775  ie = 1
776  itauq = itau
777  itaup = itauq + n
778  iwork = itaup + n
779 *
780 * Bidiagonalize R in WORK(IR)
781 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
782 * (RWorkspace: need N)
783 *
784  CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
785  $ work( itauq ), work( itaup ),
786  $ work( iwork ), lwork-iwork+1, ierr )
787 *
788 * Generate left vectors bidiagonalizing R
789 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
790 * (RWorkspace: need 0)
791 *
792  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
793  $ work( itauq ), work( iwork ),
794  $ lwork-iwork+1, ierr )
795  irwork = ie + n
796 *
797 * Perform bidiagonal QR iteration, computing left
798 * singular vectors of R in WORK(IR)
799 * (CWorkspace: need N*N)
800 * (RWorkspace: need BDSPAC)
801 *
802  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
803  $ work( ir ), ldwrkr, cdum, 1,
804  $ rwork( irwork ), info )
805  iu = itauq
806 *
807 * Multiply Q in A by left singular vectors of R in
808 * WORK(IR), storing result in WORK(IU) and copying to A
809 * (CWorkspace: need N*N+N, prefer N*N+M*N)
810 * (RWorkspace: 0)
811 *
812  DO 10 i = 1, m, ldwrku
813  chunk = min( m-i+1, ldwrku )
814  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
815  $ lda, work( ir ), ldwrkr, czero,
816  $ work( iu ), ldwrku )
817  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
818  $ a( i, 1 ), lda )
819  10 CONTINUE
820 *
821  ELSE
822 *
823 * Insufficient workspace for a fast algorithm
824 *
825  ie = 1
826  itauq = 1
827  itaup = itauq + n
828  iwork = itaup + n
829 *
830 * Bidiagonalize A
831 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
832 * (RWorkspace: N)
833 *
834  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
835  $ work( itauq ), work( itaup ),
836  $ work( iwork ), lwork-iwork+1, ierr )
837 *
838 * Generate left vectors bidiagonalizing A
839 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
840 * (RWorkspace: 0)
841 *
842  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
843  $ work( iwork ), lwork-iwork+1, ierr )
844  irwork = ie + n
845 *
846 * Perform bidiagonal QR iteration, computing left
847 * singular vectors of A in A
848 * (CWorkspace: need 0)
849 * (RWorkspace: need BDSPAC)
850 *
851  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
852  $ a, lda, cdum, 1, rwork( irwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+3*n ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
891 * (RWorkspace: 0)
892 *
893  CALL cgeqrf( m, n, a, lda, work( itau ),
894  $ work( iwork ), lwork-iwork+1, ierr )
895 *
896 * Copy R to VT, zeroing out below it
897 *
898  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
899  IF( n.GT.1 )
900  $ CALL claset( 'L', n-1, n-1, czero, czero,
901  $ vt( 2, 1 ), ldvt )
902 *
903 * Generate Q in A
904 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
905 * (RWorkspace: 0)
906 *
907  CALL cungqr( m, n, n, a, lda, work( itau ),
908  $ work( iwork ), lwork-iwork+1, ierr )
909  ie = 1
910  itauq = itau
911  itaup = itauq + n
912  iwork = itaup + n
913 *
914 * Bidiagonalize R in VT, copying result to WORK(IR)
915 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
916 * (RWorkspace: need N)
917 *
918  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
919  $ work( itauq ), work( itaup ),
920  $ work( iwork ), lwork-iwork+1, ierr )
921  CALL clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
922 *
923 * Generate left vectors bidiagonalizing R in WORK(IR)
924 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
925 * (RWorkspace: 0)
926 *
927  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
928  $ work( itauq ), work( iwork ),
929  $ lwork-iwork+1, ierr )
930 *
931 * Generate right vectors bidiagonalizing R in VT
932 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
933 * (RWorkspace: 0)
934 *
935  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
936  $ work( iwork ), lwork-iwork+1, ierr )
937  irwork = ie + n
938 *
939 * Perform bidiagonal QR iteration, computing left
940 * singular vectors of R in WORK(IR) and computing right
941 * singular vectors of R in VT
942 * (CWorkspace: need N*N)
943 * (RWorkspace: need BDSPAC)
944 *
945  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
946  $ ldvt, work( ir ), ldwrkr, cdum, 1,
947  $ rwork( irwork ), info )
948  iu = itauq
949 *
950 * Multiply Q in A by left singular vectors of R in
951 * WORK(IR), storing result in WORK(IU) and copying to A
952 * (CWorkspace: need N*N+N, prefer N*N+M*N)
953 * (RWorkspace: 0)
954 *
955  DO 20 i = 1, m, ldwrku
956  chunk = min( m-i+1, ldwrku )
957  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
958  $ lda, work( ir ), ldwrkr, czero,
959  $ work( iu ), ldwrku )
960  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
961  $ a( i, 1 ), lda )
962  20 CONTINUE
963 *
964  ELSE
965 *
966 * Insufficient workspace for a fast algorithm
967 *
968  itau = 1
969  iwork = itau + n
970 *
971 * Compute A=Q*R
972 * (CWorkspace: need 2*N, prefer N+N*NB)
973 * (RWorkspace: 0)
974 *
975  CALL cgeqrf( m, n, a, lda, work( itau ),
976  $ work( iwork ), lwork-iwork+1, ierr )
977 *
978 * Copy R to VT, zeroing out below it
979 *
980  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
981  IF( n.GT.1 )
982  $ CALL claset( 'L', n-1, n-1, czero, czero,
983  $ vt( 2, 1 ), ldvt )
984 *
985 * Generate Q in A
986 * (CWorkspace: need 2*N, prefer N+N*NB)
987 * (RWorkspace: 0)
988 *
989  CALL cungqr( m, n, n, a, lda, work( itau ),
990  $ work( iwork ), lwork-iwork+1, ierr )
991  ie = 1
992  itauq = itau
993  itaup = itauq + n
994  iwork = itaup + n
995 *
996 * Bidiagonalize R in VT
997 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
998 * (RWorkspace: N)
999 *
1000  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1001  $ work( itauq ), work( itaup ),
1002  $ work( iwork ), lwork-iwork+1, ierr )
1003 *
1004 * Multiply Q in A by left vectors bidiagonalizing R
1005 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1006 * (RWorkspace: 0)
1007 *
1008  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1009  $ work( itauq ), a, lda, work( iwork ),
1010  $ lwork-iwork+1, ierr )
1011 *
1012 * Generate right vectors bidiagonalizing R in VT
1013 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1014 * (RWorkspace: 0)
1015 *
1016  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1017  $ work( iwork ), lwork-iwork+1, ierr )
1018  irwork = ie + n
1019 *
1020 * Perform bidiagonal QR iteration, computing left
1021 * singular vectors of A in A and computing right
1022 * singular vectors of A in VT
1023 * (CWorkspace: 0)
1024 * (RWorkspace: need BDSPAC)
1025 *
1026  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1027  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1028  $ info )
1029 *
1030  END IF
1031 *
1032  ELSE IF( wntus ) THEN
1033 *
1034  IF( wntvn ) THEN
1035 *
1036 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1037 * N left singular vectors to be computed in U and
1038 * no right singular vectors to be computed
1039 *
1040  IF( lwork.GE.n*n+3*n ) THEN
1041 *
1042 * Sufficient workspace for a fast algorithm
1043 *
1044  ir = 1
1045  IF( lwork.GE.wrkbl+lda*n ) THEN
1046 *
1047 * WORK(IR) is LDA by N
1048 *
1049  ldwrkr = lda
1050  ELSE
1051 *
1052 * WORK(IR) is N by N
1053 *
1054  ldwrkr = n
1055  END IF
1056  itau = ir + ldwrkr*n
1057  iwork = itau + n
1058 *
1059 * Compute A=Q*R
1060 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1061 * (RWorkspace: 0)
1062 *
1063  CALL cgeqrf( m, n, a, lda, work( itau ),
1064  $ work( iwork ), lwork-iwork+1, ierr )
1065 *
1066 * Copy R to WORK(IR), zeroing out below it
1067 *
1068  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1069  $ ldwrkr )
1070  CALL claset( 'L', n-1, n-1, czero, czero,
1071  $ work( ir+1 ), ldwrkr )
1072 *
1073 * Generate Q in A
1074 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1075 * (RWorkspace: 0)
1076 *
1077  CALL cungqr( m, n, n, a, lda, work( itau ),
1078  $ work( iwork ), lwork-iwork+1, ierr )
1079  ie = 1
1080  itauq = itau
1081  itaup = itauq + n
1082  iwork = itaup + n
1083 *
1084 * Bidiagonalize R in WORK(IR)
1085 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1086 * (RWorkspace: need N)
1087 *
1088  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1089  $ rwork( ie ), work( itauq ),
1090  $ work( itaup ), work( iwork ),
1091  $ lwork-iwork+1, ierr )
1092 *
1093 * Generate left vectors bidiagonalizing R in WORK(IR)
1094 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1095 * (RWorkspace: 0)
1096 *
1097  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1098  $ work( itauq ), work( iwork ),
1099  $ lwork-iwork+1, ierr )
1100  irwork = ie + n
1101 *
1102 * Perform bidiagonal QR iteration, computing left
1103 * singular vectors of R in WORK(IR)
1104 * (CWorkspace: need N*N)
1105 * (RWorkspace: need BDSPAC)
1106 *
1107  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1108  $ 1, work( ir ), ldwrkr, cdum, 1,
1109  $ rwork( irwork ), info )
1110 *
1111 * Multiply Q in A by left singular vectors of R in
1112 * WORK(IR), storing result in U
1113 * (CWorkspace: need N*N)
1114 * (RWorkspace: 0)
1115 *
1116  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1117  $ work( ir ), ldwrkr, czero, u, ldu )
1118 *
1119  ELSE
1120 *
1121 * Insufficient workspace for a fast algorithm
1122 *
1123  itau = 1
1124  iwork = itau + n
1125 *
1126 * Compute A=Q*R, copying result to U
1127 * (CWorkspace: need 2*N, prefer N+N*NB)
1128 * (RWorkspace: 0)
1129 *
1130  CALL cgeqrf( m, n, a, lda, work( itau ),
1131  $ work( iwork ), lwork-iwork+1, ierr )
1132  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1133 *
1134 * Generate Q in U
1135 * (CWorkspace: need 2*N, prefer N+N*NB)
1136 * (RWorkspace: 0)
1137 *
1138  CALL cungqr( m, n, n, u, ldu, work( itau ),
1139  $ work( iwork ), lwork-iwork+1, ierr )
1140  ie = 1
1141  itauq = itau
1142  itaup = itauq + n
1143  iwork = itaup + n
1144 *
1145 * Zero out below R in A
1146 *
1147  CALL claset( 'L', n-1, n-1, czero, czero,
1148  $ a( 2, 1 ), lda )
1149 *
1150 * Bidiagonalize R in A
1151 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1152 * (RWorkspace: need N)
1153 *
1154  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1155  $ work( itauq ), work( itaup ),
1156  $ work( iwork ), lwork-iwork+1, ierr )
1157 *
1158 * Multiply Q in U by left vectors bidiagonalizing R
1159 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1160 * (RWorkspace: 0)
1161 *
1162  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1163  $ work( itauq ), u, ldu, work( iwork ),
1164  $ lwork-iwork+1, ierr )
1165  irwork = ie + n
1166 *
1167 * Perform bidiagonal QR iteration, computing left
1168 * singular vectors of A in U
1169 * (CWorkspace: 0)
1170 * (RWorkspace: need BDSPAC)
1171 *
1172  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1173  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1174  $ info )
1175 *
1176  END IF
1177 *
1178  ELSE IF( wntvo ) THEN
1179 *
1180 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1181 * N left singular vectors to be computed in U and
1182 * N right singular vectors to be overwritten on A
1183 *
1184  IF( lwork.GE.2*n*n+3*n ) THEN
1185 *
1186 * Sufficient workspace for a fast algorithm
1187 *
1188  iu = 1
1189  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1190 *
1191 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1192 *
1193  ldwrku = lda
1194  ir = iu + ldwrku*n
1195  ldwrkr = lda
1196  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1197 *
1198 * WORK(IU) is LDA by N and WORK(IR) is N by N
1199 *
1200  ldwrku = lda
1201  ir = iu + ldwrku*n
1202  ldwrkr = n
1203  ELSE
1204 *
1205 * WORK(IU) is N by N and WORK(IR) is N by N
1206 *
1207  ldwrku = n
1208  ir = iu + ldwrku*n
1209  ldwrkr = n
1210  END IF
1211  itau = ir + ldwrkr*n
1212  iwork = itau + n
1213 *
1214 * Compute A=Q*R
1215 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1216 * (RWorkspace: 0)
1217 *
1218  CALL cgeqrf( m, n, a, lda, work( itau ),
1219  $ work( iwork ), lwork-iwork+1, ierr )
1220 *
1221 * Copy R to WORK(IU), zeroing out below it
1222 *
1223  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1224  $ ldwrku )
1225  CALL claset( 'L', n-1, n-1, czero, czero,
1226  $ work( iu+1 ), ldwrku )
1227 *
1228 * Generate Q in A
1229 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1230 * (RWorkspace: 0)
1231 *
1232  CALL cungqr( m, n, n, a, lda, work( itau ),
1233  $ work( iwork ), lwork-iwork+1, ierr )
1234  ie = 1
1235  itauq = itau
1236  itaup = itauq + n
1237  iwork = itaup + n
1238 *
1239 * Bidiagonalize R in WORK(IU), copying result to
1240 * WORK(IR)
1241 * (CWorkspace: need 2*N*N+3*N,
1242 * prefer 2*N*N+2*N+2*N*NB)
1243 * (RWorkspace: need N)
1244 *
1245  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1246  $ rwork( ie ), work( itauq ),
1247  $ work( itaup ), work( iwork ),
1248  $ lwork-iwork+1, ierr )
1249  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1250  $ work( ir ), ldwrkr )
1251 *
1252 * Generate left bidiagonalizing vectors in WORK(IU)
1253 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1254 * (RWorkspace: 0)
1255 *
1256  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1257  $ work( itauq ), work( iwork ),
1258  $ lwork-iwork+1, ierr )
1259 *
1260 * Generate right bidiagonalizing vectors in WORK(IR)
1261 * (CWorkspace: need 2*N*N+3*N-1,
1262 * prefer 2*N*N+2*N+(N-1)*NB)
1263 * (RWorkspace: 0)
1264 *
1265  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1266  $ work( itaup ), work( iwork ),
1267  $ lwork-iwork+1, ierr )
1268  irwork = ie + n
1269 *
1270 * Perform bidiagonal QR iteration, computing left
1271 * singular vectors of R in WORK(IU) and computing
1272 * right singular vectors of R in WORK(IR)
1273 * (CWorkspace: need 2*N*N)
1274 * (RWorkspace: need BDSPAC)
1275 *
1276  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1277  $ work( ir ), ldwrkr, work( iu ),
1278  $ ldwrku, cdum, 1, rwork( irwork ),
1279  $ info )
1280 *
1281 * Multiply Q in A by left singular vectors of R in
1282 * WORK(IU), storing result in U
1283 * (CWorkspace: need N*N)
1284 * (RWorkspace: 0)
1285 *
1286  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1287  $ work( iu ), ldwrku, czero, u, ldu )
1288 *
1289 * Copy right singular vectors of R to A
1290 * (CWorkspace: need N*N)
1291 * (RWorkspace: 0)
1292 *
1293  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1294  $ lda )
1295 *
1296  ELSE
1297 *
1298 * Insufficient workspace for a fast algorithm
1299 *
1300  itau = 1
1301  iwork = itau + n
1302 *
1303 * Compute A=Q*R, copying result to U
1304 * (CWorkspace: need 2*N, prefer N+N*NB)
1305 * (RWorkspace: 0)
1306 *
1307  CALL cgeqrf( m, n, a, lda, work( itau ),
1308  $ work( iwork ), lwork-iwork+1, ierr )
1309  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1310 *
1311 * Generate Q in U
1312 * (CWorkspace: need 2*N, prefer N+N*NB)
1313 * (RWorkspace: 0)
1314 *
1315  CALL cungqr( m, n, n, u, ldu, work( itau ),
1316  $ work( iwork ), lwork-iwork+1, ierr )
1317  ie = 1
1318  itauq = itau
1319  itaup = itauq + n
1320  iwork = itaup + n
1321 *
1322 * Zero out below R in A
1323 *
1324  CALL claset( 'L', n-1, n-1, czero, czero,
1325  $ a( 2, 1 ), lda )
1326 *
1327 * Bidiagonalize R in A
1328 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1329 * (RWorkspace: need N)
1330 *
1331  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1332  $ work( itauq ), work( itaup ),
1333  $ work( iwork ), lwork-iwork+1, ierr )
1334 *
1335 * Multiply Q in U by left vectors bidiagonalizing R
1336 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1337 * (RWorkspace: 0)
1338 *
1339  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1340  $ work( itauq ), u, ldu, work( iwork ),
1341  $ lwork-iwork+1, ierr )
1342 *
1343 * Generate right vectors bidiagonalizing R in A
1344 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1345 * (RWorkspace: 0)
1346 *
1347  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1348  $ work( iwork ), lwork-iwork+1, ierr )
1349  irwork = ie + n
1350 *
1351 * Perform bidiagonal QR iteration, computing left
1352 * singular vectors of A in U and computing right
1353 * singular vectors of A in A
1354 * (CWorkspace: 0)
1355 * (RWorkspace: need BDSPAC)
1356 *
1357  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1358  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1359  $ info )
1360 *
1361  END IF
1362 *
1363  ELSE IF( wntvas ) THEN
1364 *
1365 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1366 * or 'A')
1367 * N left singular vectors to be computed in U and
1368 * N right singular vectors to be computed in VT
1369 *
1370  IF( lwork.GE.n*n+3*n ) THEN
1371 *
1372 * Sufficient workspace for a fast algorithm
1373 *
1374  iu = 1
1375  IF( lwork.GE.wrkbl+lda*n ) THEN
1376 *
1377 * WORK(IU) is LDA by N
1378 *
1379  ldwrku = lda
1380  ELSE
1381 *
1382 * WORK(IU) is N by N
1383 *
1384  ldwrku = n
1385  END IF
1386  itau = iu + ldwrku*n
1387  iwork = itau + n
1388 *
1389 * Compute A=Q*R
1390 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1391 * (RWorkspace: 0)
1392 *
1393  CALL cgeqrf( m, n, a, lda, work( itau ),
1394  $ work( iwork ), lwork-iwork+1, ierr )
1395 *
1396 * Copy R to WORK(IU), zeroing out below it
1397 *
1398  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1399  $ ldwrku )
1400  CALL claset( 'L', n-1, n-1, czero, czero,
1401  $ work( iu+1 ), ldwrku )
1402 *
1403 * Generate Q in A
1404 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1405 * (RWorkspace: 0)
1406 *
1407  CALL cungqr( m, n, n, a, lda, work( itau ),
1408  $ work( iwork ), lwork-iwork+1, ierr )
1409  ie = 1
1410  itauq = itau
1411  itaup = itauq + n
1412  iwork = itaup + n
1413 *
1414 * Bidiagonalize R in WORK(IU), copying result to VT
1415 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1416 * (RWorkspace: need N)
1417 *
1418  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1419  $ rwork( ie ), work( itauq ),
1420  $ work( itaup ), work( iwork ),
1421  $ lwork-iwork+1, ierr )
1422  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1423  $ ldvt )
1424 *
1425 * Generate left bidiagonalizing vectors in WORK(IU)
1426 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1427 * (RWorkspace: 0)
1428 *
1429  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1430  $ work( itauq ), work( iwork ),
1431  $ lwork-iwork+1, ierr )
1432 *
1433 * Generate right bidiagonalizing vectors in VT
1434 * (CWorkspace: need N*N+3*N-1,
1435 * prefer N*N+2*N+(N-1)*NB)
1436 * (RWorkspace: 0)
1437 *
1438  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1439  $ work( iwork ), lwork-iwork+1, ierr )
1440  irwork = ie + n
1441 *
1442 * Perform bidiagonal QR iteration, computing left
1443 * singular vectors of R in WORK(IU) and computing
1444 * right singular vectors of R in VT
1445 * (CWorkspace: need N*N)
1446 * (RWorkspace: need BDSPAC)
1447 *
1448  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1449  $ ldvt, work( iu ), ldwrku, cdum, 1,
1450  $ rwork( irwork ), info )
1451 *
1452 * Multiply Q in A by left singular vectors of R in
1453 * WORK(IU), storing result in U
1454 * (CWorkspace: need N*N)
1455 * (RWorkspace: 0)
1456 *
1457  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1458  $ work( iu ), ldwrku, czero, u, ldu )
1459 *
1460  ELSE
1461 *
1462 * Insufficient workspace for a fast algorithm
1463 *
1464  itau = 1
1465  iwork = itau + n
1466 *
1467 * Compute A=Q*R, copying result to U
1468 * (CWorkspace: need 2*N, prefer N+N*NB)
1469 * (RWorkspace: 0)
1470 *
1471  CALL cgeqrf( m, n, a, lda, work( itau ),
1472  $ work( iwork ), lwork-iwork+1, ierr )
1473  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1474 *
1475 * Generate Q in U
1476 * (CWorkspace: need 2*N, prefer N+N*NB)
1477 * (RWorkspace: 0)
1478 *
1479  CALL cungqr( m, n, n, u, ldu, work( itau ),
1480  $ work( iwork ), lwork-iwork+1, ierr )
1481 *
1482 * Copy R to VT, zeroing out below it
1483 *
1484  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1485  IF( n.GT.1 )
1486  $ CALL claset( 'L', n-1, n-1, czero, czero,
1487  $ vt( 2, 1 ), ldvt )
1488  ie = 1
1489  itauq = itau
1490  itaup = itauq + n
1491  iwork = itaup + n
1492 *
1493 * Bidiagonalize R in VT
1494 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1495 * (RWorkspace: need N)
1496 *
1497  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1498  $ work( itauq ), work( itaup ),
1499  $ work( iwork ), lwork-iwork+1, ierr )
1500 *
1501 * Multiply Q in U by left bidiagonalizing vectors
1502 * in VT
1503 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1504 * (RWorkspace: 0)
1505 *
1506  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1507  $ work( itauq ), u, ldu, work( iwork ),
1508  $ lwork-iwork+1, ierr )
1509 *
1510 * Generate right bidiagonalizing vectors in VT
1511 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1512 * (RWorkspace: 0)
1513 *
1514  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1515  $ work( iwork ), lwork-iwork+1, ierr )
1516  irwork = ie + n
1517 *
1518 * Perform bidiagonal QR iteration, computing left
1519 * singular vectors of A in U and computing right
1520 * singular vectors of A in VT
1521 * (CWorkspace: 0)
1522 * (RWorkspace: need BDSPAC)
1523 *
1524  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1525  $ ldvt, u, ldu, cdum, 1,
1526  $ rwork( irwork ), info )
1527 *
1528  END IF
1529 *
1530  END IF
1531 *
1532  ELSE IF( wntua ) THEN
1533 *
1534  IF( wntvn ) THEN
1535 *
1536 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1537 * M left singular vectors to be computed in U and
1538 * no right singular vectors to be computed
1539 *
1540  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1541 *
1542 * Sufficient workspace for a fast algorithm
1543 *
1544  ir = 1
1545  IF( lwork.GE.wrkbl+lda*n ) THEN
1546 *
1547 * WORK(IR) is LDA by N
1548 *
1549  ldwrkr = lda
1550  ELSE
1551 *
1552 * WORK(IR) is N by N
1553 *
1554  ldwrkr = n
1555  END IF
1556  itau = ir + ldwrkr*n
1557  iwork = itau + n
1558 *
1559 * Compute A=Q*R, copying result to U
1560 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1561 * (RWorkspace: 0)
1562 *
1563  CALL cgeqrf( m, n, a, lda, work( itau ),
1564  $ work( iwork ), lwork-iwork+1, ierr )
1565  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1566 *
1567 * Copy R to WORK(IR), zeroing out below it
1568 *
1569  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1570  $ ldwrkr )
1571  CALL claset( 'L', n-1, n-1, czero, czero,
1572  $ work( ir+1 ), ldwrkr )
1573 *
1574 * Generate Q in U
1575 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1576 * (RWorkspace: 0)
1577 *
1578  CALL cungqr( m, m, n, u, ldu, work( itau ),
1579  $ work( iwork ), lwork-iwork+1, ierr )
1580  ie = 1
1581  itauq = itau
1582  itaup = itauq + n
1583  iwork = itaup + n
1584 *
1585 * Bidiagonalize R in WORK(IR)
1586 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1587 * (RWorkspace: need N)
1588 *
1589  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1590  $ rwork( ie ), work( itauq ),
1591  $ work( itaup ), work( iwork ),
1592  $ lwork-iwork+1, ierr )
1593 *
1594 * Generate left bidiagonalizing vectors in WORK(IR)
1595 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1596 * (RWorkspace: 0)
1597 *
1598  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1599  $ work( itauq ), work( iwork ),
1600  $ lwork-iwork+1, ierr )
1601  irwork = ie + n
1602 *
1603 * Perform bidiagonal QR iteration, computing left
1604 * singular vectors of R in WORK(IR)
1605 * (CWorkspace: need N*N)
1606 * (RWorkspace: need BDSPAC)
1607 *
1608  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1609  $ 1, work( ir ), ldwrkr, cdum, 1,
1610  $ rwork( irwork ), info )
1611 *
1612 * Multiply Q in U by left singular vectors of R in
1613 * WORK(IR), storing result in A
1614 * (CWorkspace: need N*N)
1615 * (RWorkspace: 0)
1616 *
1617  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1618  $ work( ir ), ldwrkr, czero, a, lda )
1619 *
1620 * Copy left singular vectors of A from A to U
1621 *
1622  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1623 *
1624  ELSE
1625 *
1626 * Insufficient workspace for a fast algorithm
1627 *
1628  itau = 1
1629  iwork = itau + n
1630 *
1631 * Compute A=Q*R, copying result to U
1632 * (CWorkspace: need 2*N, prefer N+N*NB)
1633 * (RWorkspace: 0)
1634 *
1635  CALL cgeqrf( m, n, a, lda, work( itau ),
1636  $ work( iwork ), lwork-iwork+1, ierr )
1637  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1638 *
1639 * Generate Q in U
1640 * (CWorkspace: need N+M, prefer N+M*NB)
1641 * (RWorkspace: 0)
1642 *
1643  CALL cungqr( m, m, n, u, ldu, work( itau ),
1644  $ work( iwork ), lwork-iwork+1, ierr )
1645  ie = 1
1646  itauq = itau
1647  itaup = itauq + n
1648  iwork = itaup + n
1649 *
1650 * Zero out below R in A
1651 *
1652  CALL claset( 'L', n-1, n-1, czero, czero,
1653  $ a( 2, 1 ), lda )
1654 *
1655 * Bidiagonalize R in A
1656 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1657 * (RWorkspace: need N)
1658 *
1659  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1660  $ work( itauq ), work( itaup ),
1661  $ work( iwork ), lwork-iwork+1, ierr )
1662 *
1663 * Multiply Q in U by left bidiagonalizing vectors
1664 * in A
1665 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1666 * (RWorkspace: 0)
1667 *
1668  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1669  $ work( itauq ), u, ldu, work( iwork ),
1670  $ lwork-iwork+1, ierr )
1671  irwork = ie + n
1672 *
1673 * Perform bidiagonal QR iteration, computing left
1674 * singular vectors of A in U
1675 * (CWorkspace: 0)
1676 * (RWorkspace: need BDSPAC)
1677 *
1678  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1679  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1680  $ info )
1681 *
1682  END IF
1683 *
1684  ELSE IF( wntvo ) THEN
1685 *
1686 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1687 * M left singular vectors to be computed in U and
1688 * N right singular vectors to be overwritten on A
1689 *
1690  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1691 *
1692 * Sufficient workspace for a fast algorithm
1693 *
1694  iu = 1
1695  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1696 *
1697 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1698 *
1699  ldwrku = lda
1700  ir = iu + ldwrku*n
1701  ldwrkr = lda
1702  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1703 *
1704 * WORK(IU) is LDA by N and WORK(IR) is N by N
1705 *
1706  ldwrku = lda
1707  ir = iu + ldwrku*n
1708  ldwrkr = n
1709  ELSE
1710 *
1711 * WORK(IU) is N by N and WORK(IR) is N by N
1712 *
1713  ldwrku = n
1714  ir = iu + ldwrku*n
1715  ldwrkr = n
1716  END IF
1717  itau = ir + ldwrkr*n
1718  iwork = itau + n
1719 *
1720 * Compute A=Q*R, copying result to U
1721 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1722 * (RWorkspace: 0)
1723 *
1724  CALL cgeqrf( m, n, a, lda, work( itau ),
1725  $ work( iwork ), lwork-iwork+1, ierr )
1726  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1727 *
1728 * Generate Q in U
1729 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1730 * (RWorkspace: 0)
1731 *
1732  CALL cungqr( m, m, n, u, ldu, work( itau ),
1733  $ work( iwork ), lwork-iwork+1, ierr )
1734 *
1735 * Copy R to WORK(IU), zeroing out below it
1736 *
1737  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1738  $ ldwrku )
1739  CALL claset( 'L', n-1, n-1, czero, czero,
1740  $ work( iu+1 ), ldwrku )
1741  ie = 1
1742  itauq = itau
1743  itaup = itauq + n
1744  iwork = itaup + n
1745 *
1746 * Bidiagonalize R in WORK(IU), copying result to
1747 * WORK(IR)
1748 * (CWorkspace: need 2*N*N+3*N,
1749 * prefer 2*N*N+2*N+2*N*NB)
1750 * (RWorkspace: need N)
1751 *
1752  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1753  $ rwork( ie ), work( itauq ),
1754  $ work( itaup ), work( iwork ),
1755  $ lwork-iwork+1, ierr )
1756  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1757  $ work( ir ), ldwrkr )
1758 *
1759 * Generate left bidiagonalizing vectors in WORK(IU)
1760 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1761 * (RWorkspace: 0)
1762 *
1763  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1764  $ work( itauq ), work( iwork ),
1765  $ lwork-iwork+1, ierr )
1766 *
1767 * Generate right bidiagonalizing vectors in WORK(IR)
1768 * (CWorkspace: need 2*N*N+3*N-1,
1769 * prefer 2*N*N+2*N+(N-1)*NB)
1770 * (RWorkspace: 0)
1771 *
1772  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1773  $ work( itaup ), work( iwork ),
1774  $ lwork-iwork+1, ierr )
1775  irwork = ie + n
1776 *
1777 * Perform bidiagonal QR iteration, computing left
1778 * singular vectors of R in WORK(IU) and computing
1779 * right singular vectors of R in WORK(IR)
1780 * (CWorkspace: need 2*N*N)
1781 * (RWorkspace: need BDSPAC)
1782 *
1783  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1784  $ work( ir ), ldwrkr, work( iu ),
1785  $ ldwrku, cdum, 1, rwork( irwork ),
1786  $ info )
1787 *
1788 * Multiply Q in U by left singular vectors of R in
1789 * WORK(IU), storing result in A
1790 * (CWorkspace: need N*N)
1791 * (RWorkspace: 0)
1792 *
1793  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1794  $ work( iu ), ldwrku, czero, a, lda )
1795 *
1796 * Copy left singular vectors of A from A to U
1797 *
1798  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1799 *
1800 * Copy right singular vectors of R from WORK(IR) to A
1801 *
1802  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1803  $ lda )
1804 *
1805  ELSE
1806 *
1807 * Insufficient workspace for a fast algorithm
1808 *
1809  itau = 1
1810  iwork = itau + n
1811 *
1812 * Compute A=Q*R, copying result to U
1813 * (CWorkspace: need 2*N, prefer N+N*NB)
1814 * (RWorkspace: 0)
1815 *
1816  CALL cgeqrf( m, n, a, lda, work( itau ),
1817  $ work( iwork ), lwork-iwork+1, ierr )
1818  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1819 *
1820 * Generate Q in U
1821 * (CWorkspace: need N+M, prefer N+M*NB)
1822 * (RWorkspace: 0)
1823 *
1824  CALL cungqr( m, m, n, u, ldu, work( itau ),
1825  $ work( iwork ), lwork-iwork+1, ierr )
1826  ie = 1
1827  itauq = itau
1828  itaup = itauq + n
1829  iwork = itaup + n
1830 *
1831 * Zero out below R in A
1832 *
1833  CALL claset( 'L', n-1, n-1, czero, czero,
1834  $ a( 2, 1 ), lda )
1835 *
1836 * Bidiagonalize R in A
1837 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1838 * (RWorkspace: need N)
1839 *
1840  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1841  $ work( itauq ), work( itaup ),
1842  $ work( iwork ), lwork-iwork+1, ierr )
1843 *
1844 * Multiply Q in U by left bidiagonalizing vectors
1845 * in A
1846 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1847 * (RWorkspace: 0)
1848 *
1849  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1850  $ work( itauq ), u, ldu, work( iwork ),
1851  $ lwork-iwork+1, ierr )
1852 *
1853 * Generate right bidiagonalizing vectors in A
1854 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1855 * (RWorkspace: 0)
1856 *
1857  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1858  $ work( iwork ), lwork-iwork+1, ierr )
1859  irwork = ie + n
1860 *
1861 * Perform bidiagonal QR iteration, computing left
1862 * singular vectors of A in U and computing right
1863 * singular vectors of A in A
1864 * (CWorkspace: 0)
1865 * (RWorkspace: need BDSPAC)
1866 *
1867  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1868  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1869  $ info )
1870 *
1871  END IF
1872 *
1873  ELSE IF( wntvas ) THEN
1874 *
1875 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1876 * or 'A')
1877 * M left singular vectors to be computed in U and
1878 * N right singular vectors to be computed in VT
1879 *
1880  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1881 *
1882 * Sufficient workspace for a fast algorithm
1883 *
1884  iu = 1
1885  IF( lwork.GE.wrkbl+lda*n ) THEN
1886 *
1887 * WORK(IU) is LDA by N
1888 *
1889  ldwrku = lda
1890  ELSE
1891 *
1892 * WORK(IU) is N by N
1893 *
1894  ldwrku = n
1895  END IF
1896  itau = iu + ldwrku*n
1897  iwork = itau + n
1898 *
1899 * Compute A=Q*R, copying result to U
1900 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1901 * (RWorkspace: 0)
1902 *
1903  CALL cgeqrf( m, n, a, lda, work( itau ),
1904  $ work( iwork ), lwork-iwork+1, ierr )
1905  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1906 *
1907 * Generate Q in U
1908 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1909 * (RWorkspace: 0)
1910 *
1911  CALL cungqr( m, m, n, u, ldu, work( itau ),
1912  $ work( iwork ), lwork-iwork+1, ierr )
1913 *
1914 * Copy R to WORK(IU), zeroing out below it
1915 *
1916  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1917  $ ldwrku )
1918  CALL claset( 'L', n-1, n-1, czero, czero,
1919  $ work( iu+1 ), ldwrku )
1920  ie = 1
1921  itauq = itau
1922  itaup = itauq + n
1923  iwork = itaup + n
1924 *
1925 * Bidiagonalize R in WORK(IU), copying result to VT
1926 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1927 * (RWorkspace: need N)
1928 *
1929  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1930  $ rwork( ie ), work( itauq ),
1931  $ work( itaup ), work( iwork ),
1932  $ lwork-iwork+1, ierr )
1933  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1934  $ ldvt )
1935 *
1936 * Generate left bidiagonalizing vectors in WORK(IU)
1937 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1938 * (RWorkspace: 0)
1939 *
1940  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1941  $ work( itauq ), work( iwork ),
1942  $ lwork-iwork+1, ierr )
1943 *
1944 * Generate right bidiagonalizing vectors in VT
1945 * (CWorkspace: need N*N+3*N-1,
1946 * prefer N*N+2*N+(N-1)*NB)
1947 * (RWorkspace: need 0)
1948 *
1949  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1950  $ work( iwork ), lwork-iwork+1, ierr )
1951  irwork = ie + n
1952 *
1953 * Perform bidiagonal QR iteration, computing left
1954 * singular vectors of R in WORK(IU) and computing
1955 * right singular vectors of R in VT
1956 * (CWorkspace: need N*N)
1957 * (RWorkspace: need BDSPAC)
1958 *
1959  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1960  $ ldvt, work( iu ), ldwrku, cdum, 1,
1961  $ rwork( irwork ), info )
1962 *
1963 * Multiply Q in U by left singular vectors of R in
1964 * WORK(IU), storing result in A
1965 * (CWorkspace: need N*N)
1966 * (RWorkspace: 0)
1967 *
1968  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1969  $ work( iu ), ldwrku, czero, a, lda )
1970 *
1971 * Copy left singular vectors of A from A to U
1972 *
1973  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1974 *
1975  ELSE
1976 *
1977 * Insufficient workspace for a fast algorithm
1978 *
1979  itau = 1
1980  iwork = itau + n
1981 *
1982 * Compute A=Q*R, copying result to U
1983 * (CWorkspace: need 2*N, prefer N+N*NB)
1984 * (RWorkspace: 0)
1985 *
1986  CALL cgeqrf( m, n, a, lda, work( itau ),
1987  $ work( iwork ), lwork-iwork+1, ierr )
1988  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1989 *
1990 * Generate Q in U
1991 * (CWorkspace: need N+M, prefer N+M*NB)
1992 * (RWorkspace: 0)
1993 *
1994  CALL cungqr( m, m, n, u, ldu, work( itau ),
1995  $ work( iwork ), lwork-iwork+1, ierr )
1996 *
1997 * Copy R from A to VT, zeroing out below it
1998 *
1999  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2000  IF( n.GT.1 )
2001  $ CALL claset( 'L', n-1, n-1, czero, czero,
2002  $ vt( 2, 1 ), ldvt )
2003  ie = 1
2004  itauq = itau
2005  itaup = itauq + n
2006  iwork = itaup + n
2007 *
2008 * Bidiagonalize R in VT
2009 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2010 * (RWorkspace: need N)
2011 *
2012  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
2013  $ work( itauq ), work( itaup ),
2014  $ work( iwork ), lwork-iwork+1, ierr )
2015 *
2016 * Multiply Q in U by left bidiagonalizing vectors
2017 * in VT
2018 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2019 * (RWorkspace: 0)
2020 *
2021  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2022  $ work( itauq ), u, ldu, work( iwork ),
2023  $ lwork-iwork+1, ierr )
2024 *
2025 * Generate right bidiagonalizing vectors in VT
2026 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2027 * (RWorkspace: 0)
2028 *
2029  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2030  $ work( iwork ), lwork-iwork+1, ierr )
2031  irwork = ie + n
2032 *
2033 * Perform bidiagonal QR iteration, computing left
2034 * singular vectors of A in U and computing right
2035 * singular vectors of A in VT
2036 * (CWorkspace: 0)
2037 * (RWorkspace: need BDSPAC)
2038 *
2039  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2040  $ ldvt, u, ldu, cdum, 1,
2041  $ rwork( irwork ), info )
2042 *
2043  END IF
2044 *
2045  END IF
2046 *
2047  END IF
2048 *
2049  ELSE
2050 *
2051 * M .LT. MNTHR
2052 *
2053 * Path 10 (M at least N, but not much larger)
2054 * Reduce to bidiagonal form without QR decomposition
2055 *
2056  ie = 1
2057  itauq = 1
2058  itaup = itauq + n
2059  iwork = itaup + n
2060 *
2061 * Bidiagonalize A
2062 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2063 * (RWorkspace: need N)
2064 *
2065  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2066  $ work( itaup ), work( iwork ), lwork-iwork+1,
2067  $ ierr )
2068  IF( wntuas ) THEN
2069 *
2070 * If left singular vectors desired in U, copy result to U
2071 * and generate left bidiagonalizing vectors in U
2072 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2073 * (RWorkspace: 0)
2074 *
2075  CALL clacpy( 'L', m, n, a, lda, u, ldu )
2076  IF( wntus )
2077  $ ncu = n
2078  IF( wntua )
2079  $ ncu = m
2080  CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2081  $ work( iwork ), lwork-iwork+1, ierr )
2082  END IF
2083  IF( wntvas ) THEN
2084 *
2085 * If right singular vectors desired in VT, copy result to
2086 * VT and generate right bidiagonalizing vectors in VT
2087 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2088 * (RWorkspace: 0)
2089 *
2090  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2091  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2092  $ work( iwork ), lwork-iwork+1, ierr )
2093  END IF
2094  IF( wntuo ) THEN
2095 *
2096 * If left singular vectors desired in A, generate left
2097 * bidiagonalizing vectors in A
2098 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2099 * (RWorkspace: 0)
2100 *
2101  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
2102  $ work( iwork ), lwork-iwork+1, ierr )
2103  END IF
2104  IF( wntvo ) THEN
2105 *
2106 * If right singular vectors desired in A, generate right
2107 * bidiagonalizing vectors in A
2108 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2109 * (RWorkspace: 0)
2110 *
2111  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
2112  $ work( iwork ), lwork-iwork+1, ierr )
2113  END IF
2114  irwork = ie + n
2115  IF( wntuas .OR. wntuo )
2116  $ nru = m
2117  IF( wntun )
2118  $ nru = 0
2119  IF( wntvas .OR. wntvo )
2120  $ ncvt = n
2121  IF( wntvn )
2122  $ ncvt = 0
2123  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2124 *
2125 * Perform bidiagonal QR iteration, if desired, computing
2126 * left singular vectors in U and computing right singular
2127 * vectors in VT
2128 * (CWorkspace: 0)
2129 * (RWorkspace: need BDSPAC)
2130 *
2131  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2132  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2133  $ info )
2134  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2135 *
2136 * Perform bidiagonal QR iteration, if desired, computing
2137 * left singular vectors in U and computing right singular
2138 * vectors in A
2139 * (CWorkspace: 0)
2140 * (RWorkspace: need BDSPAC)
2141 *
2142  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2143  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2144  $ info )
2145  ELSE
2146 *
2147 * Perform bidiagonal QR iteration, if desired, computing
2148 * left singular vectors in A and computing right singular
2149 * vectors in VT
2150 * (CWorkspace: 0)
2151 * (RWorkspace: need BDSPAC)
2152 *
2153  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2154  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2155  $ info )
2156  END IF
2157 *
2158  END IF
2159 *
2160  ELSE
2161 *
2162 * A has more columns than rows. If A has sufficiently more
2163 * columns than rows, first reduce using the LQ decomposition (if
2164 * sufficient workspace available)
2165 *
2166  IF( n.GE.mnthr ) THEN
2167 *
2168  IF( wntvn ) THEN
2169 *
2170 * Path 1t(N much larger than M, JOBVT='N')
2171 * No right singular vectors to be computed
2172 *
2173  itau = 1
2174  iwork = itau + m
2175 *
2176 * Compute A=L*Q
2177 * (CWorkspace: need 2*M, prefer M+M*NB)
2178 * (RWorkspace: 0)
2179 *
2180  CALL cgelqf( m, n, a, lda, work( itau ), work( iwork ),
2181  $ lwork-iwork+1, ierr )
2182 *
2183 * Zero out above L
2184 *
2185  CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2186  $ lda )
2187  ie = 1
2188  itauq = 1
2189  itaup = itauq + m
2190  iwork = itaup + m
2191 *
2192 * Bidiagonalize L in A
2193 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2194 * (RWorkspace: need M)
2195 *
2196  CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2197  $ work( itaup ), work( iwork ), lwork-iwork+1,
2198  $ ierr )
2199  IF( wntuo .OR. wntuas ) THEN
2200 *
2201 * If left singular vectors desired, generate Q
2202 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2203 * (RWorkspace: 0)
2204 *
2205  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2206  $ work( iwork ), lwork-iwork+1, ierr )
2207  END IF
2208  irwork = ie + m
2209  nru = 0
2210  IF( wntuo .OR. wntuas )
2211  $ nru = m
2212 *
2213 * Perform bidiagonal QR iteration, computing left singular
2214 * vectors of A in A if desired
2215 * (CWorkspace: 0)
2216 * (RWorkspace: need BDSPAC)
2217 *
2218  CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2219  $ a, lda, cdum, 1, rwork( irwork ), info )
2220 *
2221 * If left singular vectors desired in U, copy them there
2222 *
2223  IF( wntuas )
2224  $ CALL clacpy( 'F', m, m, a, lda, u, ldu )
2225 *
2226  ELSE IF( wntvo .AND. wntun ) THEN
2227 *
2228 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2229 * M right singular vectors to be overwritten on A and
2230 * no left singular vectors to be computed
2231 *
2232  IF( lwork.GE.m*m+3*m ) THEN
2233 *
2234 * Sufficient workspace for a fast algorithm
2235 *
2236  ir = 1
2237  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2238 *
2239 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2240 *
2241  ldwrku = lda
2242  chunk = n
2243  ldwrkr = lda
2244  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2245 *
2246 * WORK(IU) is LDA by N and WORK(IR) is M by M
2247 *
2248  ldwrku = lda
2249  chunk = n
2250  ldwrkr = m
2251  ELSE
2252 *
2253 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2254 *
2255  ldwrku = m
2256  chunk = ( lwork-m*m ) / m
2257  ldwrkr = m
2258  END IF
2259  itau = ir + ldwrkr*m
2260  iwork = itau + m
2261 *
2262 * Compute A=L*Q
2263 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2264 * (RWorkspace: 0)
2265 *
2266  CALL cgelqf( m, n, a, lda, work( itau ),
2267  $ work( iwork ), lwork-iwork+1, ierr )
2268 *
2269 * Copy L to WORK(IR) and zero out above it
2270 *
2271  CALL clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2272  CALL claset( 'U', m-1, m-1, czero, czero,
2273  $ work( ir+ldwrkr ), ldwrkr )
2274 *
2275 * Generate Q in A
2276 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2277 * (RWorkspace: 0)
2278 *
2279  CALL cunglq( m, n, m, a, lda, work( itau ),
2280  $ work( iwork ), lwork-iwork+1, ierr )
2281  ie = 1
2282  itauq = itau
2283  itaup = itauq + m
2284  iwork = itaup + m
2285 *
2286 * Bidiagonalize L in WORK(IR)
2287 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2288 * (RWorkspace: need M)
2289 *
2290  CALL cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2291  $ work( itauq ), work( itaup ),
2292  $ work( iwork ), lwork-iwork+1, ierr )
2293 *
2294 * Generate right vectors bidiagonalizing L
2295 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2296 * (RWorkspace: 0)
2297 *
2298  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2299  $ work( itaup ), work( iwork ),
2300  $ lwork-iwork+1, ierr )
2301  irwork = ie + m
2302 *
2303 * Perform bidiagonal QR iteration, computing right
2304 * singular vectors of L in WORK(IR)
2305 * (CWorkspace: need M*M)
2306 * (RWorkspace: need BDSPAC)
2307 *
2308  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2309  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2310  $ rwork( irwork ), info )
2311  iu = itauq
2312 *
2313 * Multiply right singular vectors of L in WORK(IR) by Q
2314 * in A, storing result in WORK(IU) and copying to A
2315 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2316 * (RWorkspace: 0)
2317 *
2318  DO 30 i = 1, n, chunk
2319  blk = min( n-i+1, chunk )
2320  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2321  $ ldwrkr, a( 1, i ), lda, czero,
2322  $ work( iu ), ldwrku )
2323  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2324  $ a( 1, i ), lda )
2325  30 CONTINUE
2326 *
2327  ELSE
2328 *
2329 * Insufficient workspace for a fast algorithm
2330 *
2331  ie = 1
2332  itauq = 1
2333  itaup = itauq + m
2334  iwork = itaup + m
2335 *
2336 * Bidiagonalize A
2337 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2338 * (RWorkspace: need M)
2339 *
2340  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
2341  $ work( itauq ), work( itaup ),
2342  $ work( iwork ), lwork-iwork+1, ierr )
2343 *
2344 * Generate right vectors bidiagonalizing A
2345 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2346 * (RWorkspace: 0)
2347 *
2348  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
2349  $ work( iwork ), lwork-iwork+1, ierr )
2350  irwork = ie + m
2351 *
2352 * Perform bidiagonal QR iteration, computing right
2353 * singular vectors of A in A
2354 * (CWorkspace: 0)
2355 * (RWorkspace: need BDSPAC)
2356 *
2357  CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2358  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2359 *
2360  END IF
2361 *
2362  ELSE IF( wntvo .AND. wntuas ) THEN
2363 *
2364 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2365 * M right singular vectors to be overwritten on A and
2366 * M left singular vectors to be computed in U
2367 *
2368  IF( lwork.GE.m*m+3*m ) THEN
2369 *
2370 * Sufficient workspace for a fast algorithm
2371 *
2372  ir = 1
2373  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2374 *
2375 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2376 *
2377  ldwrku = lda
2378  chunk = n
2379  ldwrkr = lda
2380  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2381 *
2382 * WORK(IU) is LDA by N and WORK(IR) is M by M
2383 *
2384  ldwrku = lda
2385  chunk = n
2386  ldwrkr = m
2387  ELSE
2388 *
2389 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2390 *
2391  ldwrku = m
2392  chunk = ( lwork-m*m ) / m
2393  ldwrkr = m
2394  END IF
2395  itau = ir + ldwrkr*m
2396  iwork = itau + m
2397 *
2398 * Compute A=L*Q
2399 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2400 * (RWorkspace: 0)
2401 *
2402  CALL cgelqf( m, n, a, lda, work( itau ),
2403  $ work( iwork ), lwork-iwork+1, ierr )
2404 *
2405 * Copy L to U, zeroing about above it
2406 *
2407  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2408  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2409  $ ldu )
2410 *
2411 * Generate Q in A
2412 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2413 * (RWorkspace: 0)
2414 *
2415  CALL cunglq( m, n, m, a, lda, work( itau ),
2416  $ work( iwork ), lwork-iwork+1, ierr )
2417  ie = 1
2418  itauq = itau
2419  itaup = itauq + m
2420  iwork = itaup + m
2421 *
2422 * Bidiagonalize L in U, copying result to WORK(IR)
2423 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2424 * (RWorkspace: need M)
2425 *
2426  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2427  $ work( itauq ), work( itaup ),
2428  $ work( iwork ), lwork-iwork+1, ierr )
2429  CALL clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2430 *
2431 * Generate right vectors bidiagonalizing L in WORK(IR)
2432 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2433 * (RWorkspace: 0)
2434 *
2435  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2436  $ work( itaup ), work( iwork ),
2437  $ lwork-iwork+1, ierr )
2438 *
2439 * Generate left vectors bidiagonalizing L in U
2440 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2441 * (RWorkspace: 0)
2442 *
2443  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2444  $ work( iwork ), lwork-iwork+1, ierr )
2445  irwork = ie + m
2446 *
2447 * Perform bidiagonal QR iteration, computing left
2448 * singular vectors of L in U, and computing right
2449 * singular vectors of L in WORK(IR)
2450 * (CWorkspace: need M*M)
2451 * (RWorkspace: need BDSPAC)
2452 *
2453  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2454  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2455  $ rwork( irwork ), info )
2456  iu = itauq
2457 *
2458 * Multiply right singular vectors of L in WORK(IR) by Q
2459 * in A, storing result in WORK(IU) and copying to A
2460 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2461 * (RWorkspace: 0)
2462 *
2463  DO 40 i = 1, n, chunk
2464  blk = min( n-i+1, chunk )
2465  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2466  $ ldwrkr, a( 1, i ), lda, czero,
2467  $ work( iu ), ldwrku )
2468  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2469  $ a( 1, i ), lda )
2470  40 CONTINUE
2471 *
2472  ELSE
2473 *
2474 * Insufficient workspace for a fast algorithm
2475 *
2476  itau = 1
2477  iwork = itau + m
2478 *
2479 * Compute A=L*Q
2480 * (CWorkspace: need 2*M, prefer M+M*NB)
2481 * (RWorkspace: 0)
2482 *
2483  CALL cgelqf( m, n, a, lda, work( itau ),
2484  $ work( iwork ), lwork-iwork+1, ierr )
2485 *
2486 * Copy L to U, zeroing out above it
2487 *
2488  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2489  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2490  $ ldu )
2491 *
2492 * Generate Q in A
2493 * (CWorkspace: need 2*M, prefer M+M*NB)
2494 * (RWorkspace: 0)
2495 *
2496  CALL cunglq( m, n, m, a, lda, work( itau ),
2497  $ work( iwork ), lwork-iwork+1, ierr )
2498  ie = 1
2499  itauq = itau
2500  itaup = itauq + m
2501  iwork = itaup + m
2502 *
2503 * Bidiagonalize L in U
2504 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2505 * (RWorkspace: need M)
2506 *
2507  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2508  $ work( itauq ), work( itaup ),
2509  $ work( iwork ), lwork-iwork+1, ierr )
2510 *
2511 * Multiply right vectors bidiagonalizing L by Q in A
2512 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2513 * (RWorkspace: 0)
2514 *
2515  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2516  $ work( itaup ), a, lda, work( iwork ),
2517  $ lwork-iwork+1, ierr )
2518 *
2519 * Generate left vectors bidiagonalizing L in U
2520 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2521 * (RWorkspace: 0)
2522 *
2523  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2524  $ work( iwork ), lwork-iwork+1, ierr )
2525  irwork = ie + m
2526 *
2527 * Perform bidiagonal QR iteration, computing left
2528 * singular vectors of A in U and computing right
2529 * singular vectors of A in A
2530 * (CWorkspace: 0)
2531 * (RWorkspace: need BDSPAC)
2532 *
2533  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2534  $ u, ldu, cdum, 1, rwork( irwork ), info )
2535 *
2536  END IF
2537 *
2538  ELSE IF( wntvs ) THEN
2539 *
2540  IF( wntun ) THEN
2541 *
2542 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2543 * M right singular vectors to be computed in VT and
2544 * no left singular vectors to be computed
2545 *
2546  IF( lwork.GE.m*m+3*m ) THEN
2547 *
2548 * Sufficient workspace for a fast algorithm
2549 *
2550  ir = 1
2551  IF( lwork.GE.wrkbl+lda*m ) THEN
2552 *
2553 * WORK(IR) is LDA by M
2554 *
2555  ldwrkr = lda
2556  ELSE
2557 *
2558 * WORK(IR) is M by M
2559 *
2560  ldwrkr = m
2561  END IF
2562  itau = ir + ldwrkr*m
2563  iwork = itau + m
2564 *
2565 * Compute A=L*Q
2566 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2567 * (RWorkspace: 0)
2568 *
2569  CALL cgelqf( m, n, a, lda, work( itau ),
2570  $ work( iwork ), lwork-iwork+1, ierr )
2571 *
2572 * Copy L to WORK(IR), zeroing out above it
2573 *
2574  CALL clacpy( 'L', m, m, a, lda, work( ir ),
2575  $ ldwrkr )
2576  CALL claset( 'U', m-1, m-1, czero, czero,
2577  $ work( ir+ldwrkr ), ldwrkr )
2578 *
2579 * Generate Q in A
2580 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2581 * (RWorkspace: 0)
2582 *
2583  CALL cunglq( m, n, m, a, lda, work( itau ),
2584  $ work( iwork ), lwork-iwork+1, ierr )
2585  ie = 1
2586  itauq = itau
2587  itaup = itauq + m
2588  iwork = itaup + m
2589 *
2590 * Bidiagonalize L in WORK(IR)
2591 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2592 * (RWorkspace: need M)
2593 *
2594  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
2595  $ rwork( ie ), work( itauq ),
2596  $ work( itaup ), work( iwork ),
2597  $ lwork-iwork+1, ierr )
2598 *
2599 * Generate right vectors bidiagonalizing L in
2600 * WORK(IR)
2601 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2602 * (RWorkspace: 0)
2603 *
2604  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2605  $ work( itaup ), work( iwork ),
2606  $ lwork-iwork+1, ierr )
2607  irwork = ie + m
2608 *
2609 * Perform bidiagonal QR iteration, computing right
2610 * singular vectors of L in WORK(IR)
2611 * (CWorkspace: need M*M)
2612 * (RWorkspace: need BDSPAC)
2613 *
2614  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2615  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2616  $ rwork( irwork ), info )
2617 *
2618 * Multiply right singular vectors of L in WORK(IR) by
2619 * Q in A, storing result in VT
2620 * (CWorkspace: need M*M)
2621 * (RWorkspace: 0)
2622 *
2623  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
2624  $ ldwrkr, a, lda, czero, vt, ldvt )
2625 *
2626  ELSE
2627 *
2628 * Insufficient workspace for a fast algorithm
2629 *
2630  itau = 1
2631  iwork = itau + m
2632 *
2633 * Compute A=L*Q
2634 * (CWorkspace: need 2*M, prefer M+M*NB)
2635 * (RWorkspace: 0)
2636 *
2637  CALL cgelqf( m, n, a, lda, work( itau ),
2638  $ work( iwork ), lwork-iwork+1, ierr )
2639 *
2640 * Copy result to VT
2641 *
2642  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2643 *
2644 * Generate Q in VT
2645 * (CWorkspace: need 2*M, prefer M+M*NB)
2646 * (RWorkspace: 0)
2647 *
2648  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2649  $ work( iwork ), lwork-iwork+1, ierr )
2650  ie = 1
2651  itauq = itau
2652  itaup = itauq + m
2653  iwork = itaup + m
2654 *
2655 * Zero out above L in A
2656 *
2657  CALL claset( 'U', m-1, m-1, czero, czero,
2658  $ a( 1, 2 ), lda )
2659 *
2660 * Bidiagonalize L in A
2661 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2662 * (RWorkspace: need M)
2663 *
2664  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2665  $ work( itauq ), work( itaup ),
2666  $ work( iwork ), lwork-iwork+1, ierr )
2667 *
2668 * Multiply right vectors bidiagonalizing L by Q in VT
2669 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2670 * (RWorkspace: 0)
2671 *
2672  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2673  $ work( itaup ), vt, ldvt,
2674  $ work( iwork ), lwork-iwork+1, ierr )
2675  irwork = ie + m
2676 *
2677 * Perform bidiagonal QR iteration, computing right
2678 * singular vectors of A in VT
2679 * (CWorkspace: 0)
2680 * (RWorkspace: need BDSPAC)
2681 *
2682  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2683  $ ldvt, cdum, 1, cdum, 1,
2684  $ rwork( irwork ), info )
2685 *
2686  END IF
2687 *
2688  ELSE IF( wntuo ) THEN
2689 *
2690 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2691 * M right singular vectors to be computed in VT and
2692 * M left singular vectors to be overwritten on A
2693 *
2694  IF( lwork.GE.2*m*m+3*m ) THEN
2695 *
2696 * Sufficient workspace for a fast algorithm
2697 *
2698  iu = 1
2699  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2700 *
2701 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2702 *
2703  ldwrku = lda
2704  ir = iu + ldwrku*m
2705  ldwrkr = lda
2706  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2707 *
2708 * WORK(IU) is LDA by M and WORK(IR) is M by M
2709 *
2710  ldwrku = lda
2711  ir = iu + ldwrku*m
2712  ldwrkr = m
2713  ELSE
2714 *
2715 * WORK(IU) is M by M and WORK(IR) is M by M
2716 *
2717  ldwrku = m
2718  ir = iu + ldwrku*m
2719  ldwrkr = m
2720  END IF
2721  itau = ir + ldwrkr*m
2722  iwork = itau + m
2723 *
2724 * Compute A=L*Q
2725 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2726 * (RWorkspace: 0)
2727 *
2728  CALL cgelqf( m, n, a, lda, work( itau ),
2729  $ work( iwork ), lwork-iwork+1, ierr )
2730 *
2731 * Copy L to WORK(IU), zeroing out below it
2732 *
2733  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2734  $ ldwrku )
2735  CALL claset( 'U', m-1, m-1, czero, czero,
2736  $ work( iu+ldwrku ), ldwrku )
2737 *
2738 * Generate Q in A
2739 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2740 * (RWorkspace: 0)
2741 *
2742  CALL cunglq( m, n, m, a, lda, work( itau ),
2743  $ work( iwork ), lwork-iwork+1, ierr )
2744  ie = 1
2745  itauq = itau
2746  itaup = itauq + m
2747  iwork = itaup + m
2748 *
2749 * Bidiagonalize L in WORK(IU), copying result to
2750 * WORK(IR)
2751 * (CWorkspace: need 2*M*M+3*M,
2752 * prefer 2*M*M+2*M+2*M*NB)
2753 * (RWorkspace: need M)
2754 *
2755  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2756  $ rwork( ie ), work( itauq ),
2757  $ work( itaup ), work( iwork ),
2758  $ lwork-iwork+1, ierr )
2759  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
2760  $ work( ir ), ldwrkr )
2761 *
2762 * Generate right bidiagonalizing vectors in WORK(IU)
2763 * (CWorkspace: need 2*M*M+3*M-1,
2764 * prefer 2*M*M+2*M+(M-1)*NB)
2765 * (RWorkspace: 0)
2766 *
2767  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2768  $ work( itaup ), work( iwork ),
2769  $ lwork-iwork+1, ierr )
2770 *
2771 * Generate left bidiagonalizing vectors in WORK(IR)
2772 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2773 * (RWorkspace: 0)
2774 *
2775  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2776  $ work( itauq ), work( iwork ),
2777  $ lwork-iwork+1, ierr )
2778  irwork = ie + m
2779 *
2780 * Perform bidiagonal QR iteration, computing left
2781 * singular vectors of L in WORK(IR) and computing
2782 * right singular vectors of L in WORK(IU)
2783 * (CWorkspace: need 2*M*M)
2784 * (RWorkspace: need BDSPAC)
2785 *
2786  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2787  $ work( iu ), ldwrku, work( ir ),
2788  $ ldwrkr, cdum, 1, rwork( irwork ),
2789  $ info )
2790 *
2791 * Multiply right singular vectors of L in WORK(IU) by
2792 * Q in A, storing result in VT
2793 * (CWorkspace: need M*M)
2794 * (RWorkspace: 0)
2795 *
2796  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2797  $ ldwrku, a, lda, czero, vt, ldvt )
2798 *
2799 * Copy left singular vectors of L to A
2800 * (CWorkspace: need M*M)
2801 * (RWorkspace: 0)
2802 *
2803  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
2804  $ lda )
2805 *
2806  ELSE
2807 *
2808 * Insufficient workspace for a fast algorithm
2809 *
2810  itau = 1
2811  iwork = itau + m
2812 *
2813 * Compute A=L*Q, copying result to VT
2814 * (CWorkspace: need 2*M, prefer M+M*NB)
2815 * (RWorkspace: 0)
2816 *
2817  CALL cgelqf( m, n, a, lda, work( itau ),
2818  $ work( iwork ), lwork-iwork+1, ierr )
2819  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2820 *
2821 * Generate Q in VT
2822 * (CWorkspace: need 2*M, prefer M+M*NB)
2823 * (RWorkspace: 0)
2824 *
2825  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2826  $ work( iwork ), lwork-iwork+1, ierr )
2827  ie = 1
2828  itauq = itau
2829  itaup = itauq + m
2830  iwork = itaup + m
2831 *
2832 * Zero out above L in A
2833 *
2834  CALL claset( 'U', m-1, m-1, czero, czero,
2835  $ a( 1, 2 ), lda )
2836 *
2837 * Bidiagonalize L in A
2838 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2839 * (RWorkspace: need M)
2840 *
2841  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2842  $ work( itauq ), work( itaup ),
2843  $ work( iwork ), lwork-iwork+1, ierr )
2844 *
2845 * Multiply right vectors bidiagonalizing L by Q in VT
2846 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2847 * (RWorkspace: 0)
2848 *
2849  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2850  $ work( itaup ), vt, ldvt,
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Generate left bidiagonalizing vectors of L in A
2854 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2855 * (RWorkspace: 0)
2856 *
2857  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2858  $ work( iwork ), lwork-iwork+1, ierr )
2859  irwork = ie + m
2860 *
2861 * Perform bidiagonal QR iteration, computing left
2862 * singular vectors of A in A and computing right
2863 * singular vectors of A in VT
2864 * (CWorkspace: 0)
2865 * (RWorkspace: need BDSPAC)
2866 *
2867  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2868  $ ldvt, a, lda, cdum, 1,
2869  $ rwork( irwork ), info )
2870 *
2871  END IF
2872 *
2873  ELSE IF( wntuas ) THEN
2874 *
2875 * Path 6t(N much larger than M, JOBU='S' or 'A',
2876 * JOBVT='S')
2877 * M right singular vectors to be computed in VT and
2878 * M left singular vectors to be computed in U
2879 *
2880  IF( lwork.GE.m*m+3*m ) THEN
2881 *
2882 * Sufficient workspace for a fast algorithm
2883 *
2884  iu = 1
2885  IF( lwork.GE.wrkbl+lda*m ) THEN
2886 *
2887 * WORK(IU) is LDA by N
2888 *
2889  ldwrku = lda
2890  ELSE
2891 *
2892 * WORK(IU) is LDA by M
2893 *
2894  ldwrku = m
2895  END IF
2896  itau = iu + ldwrku*m
2897  iwork = itau + m
2898 *
2899 * Compute A=L*Q
2900 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2901 * (RWorkspace: 0)
2902 *
2903  CALL cgelqf( m, n, a, lda, work( itau ),
2904  $ work( iwork ), lwork-iwork+1, ierr )
2905 *
2906 * Copy L to WORK(IU), zeroing out above it
2907 *
2908  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2909  $ ldwrku )
2910  CALL claset( 'U', m-1, m-1, czero, czero,
2911  $ work( iu+ldwrku ), ldwrku )
2912 *
2913 * Generate Q in A
2914 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2915 * (RWorkspace: 0)
2916 *
2917  CALL cunglq( m, n, m, a, lda, work( itau ),
2918  $ work( iwork ), lwork-iwork+1, ierr )
2919  ie = 1
2920  itauq = itau
2921  itaup = itauq + m
2922  iwork = itaup + m
2923 *
2924 * Bidiagonalize L in WORK(IU), copying result to U
2925 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2926 * (RWorkspace: need M)
2927 *
2928  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2929  $ rwork( ie ), work( itauq ),
2930  $ work( itaup ), work( iwork ),
2931  $ lwork-iwork+1, ierr )
2932  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
2933  $ ldu )
2934 *
2935 * Generate right bidiagonalizing vectors in WORK(IU)
2936 * (CWorkspace: need M*M+3*M-1,
2937 * prefer M*M+2*M+(M-1)*NB)
2938 * (RWorkspace: 0)
2939 *
2940  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2941  $ work( itaup ), work( iwork ),
2942  $ lwork-iwork+1, ierr )
2943 *
2944 * Generate left bidiagonalizing vectors in U
2945 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2946 * (RWorkspace: 0)
2947 *
2948  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2949  $ work( iwork ), lwork-iwork+1, ierr )
2950  irwork = ie + m
2951 *
2952 * Perform bidiagonal QR iteration, computing left
2953 * singular vectors of L in U and computing right
2954 * singular vectors of L in WORK(IU)
2955 * (CWorkspace: need M*M)
2956 * (RWorkspace: need BDSPAC)
2957 *
2958  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2959  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2960  $ rwork( irwork ), info )
2961 *
2962 * Multiply right singular vectors of L in WORK(IU) by
2963 * Q in A, storing result in VT
2964 * (CWorkspace: need M*M)
2965 * (RWorkspace: 0)
2966 *
2967  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2968  $ ldwrku, a, lda, czero, vt, ldvt )
2969 *
2970  ELSE
2971 *
2972 * Insufficient workspace for a fast algorithm
2973 *
2974  itau = 1
2975  iwork = itau + m
2976 *
2977 * Compute A=L*Q, copying result to VT
2978 * (CWorkspace: need 2*M, prefer M+M*NB)
2979 * (RWorkspace: 0)
2980 *
2981  CALL cgelqf( m, n, a, lda, work( itau ),
2982  $ work( iwork ), lwork-iwork+1, ierr )
2983  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2984 *
2985 * Generate Q in VT
2986 * (CWorkspace: need 2*M, prefer M+M*NB)
2987 * (RWorkspace: 0)
2988 *
2989  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2990  $ work( iwork ), lwork-iwork+1, ierr )
2991 *
2992 * Copy L to U, zeroing out above it
2993 *
2994  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2995  CALL claset( 'U', m-1, m-1, czero, czero,
2996  $ u( 1, 2 ), ldu )
2997  ie = 1
2998  itauq = itau
2999  itaup = itauq + m
3000  iwork = itaup + m
3001 *
3002 * Bidiagonalize L in U
3003 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3004 * (RWorkspace: need M)
3005 *
3006  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3007  $ work( itauq ), work( itaup ),
3008  $ work( iwork ), lwork-iwork+1, ierr )
3009 *
3010 * Multiply right bidiagonalizing vectors in U by Q
3011 * in VT
3012 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3013 * (RWorkspace: 0)
3014 *
3015  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3016  $ work( itaup ), vt, ldvt,
3017  $ work( iwork ), lwork-iwork+1, ierr )
3018 *
3019 * Generate left bidiagonalizing vectors in U
3020 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3021 * (RWorkspace: 0)
3022 *
3023  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3024  $ work( iwork ), lwork-iwork+1, ierr )
3025  irwork = ie + m
3026 *
3027 * Perform bidiagonal QR iteration, computing left
3028 * singular vectors of A in U and computing right
3029 * singular vectors of A in VT
3030 * (CWorkspace: 0)
3031 * (RWorkspace: need BDSPAC)
3032 *
3033  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3034  $ ldvt, u, ldu, cdum, 1,
3035  $ rwork( irwork ), info )
3036 *
3037  END IF
3038 *
3039  END IF
3040 *
3041  ELSE IF( wntva ) THEN
3042 *
3043  IF( wntun ) THEN
3044 *
3045 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3046 * N right singular vectors to be computed in VT and
3047 * no left singular vectors to be computed
3048 *
3049  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3050 *
3051 * Sufficient workspace for a fast algorithm
3052 *
3053  ir = 1
3054  IF( lwork.GE.wrkbl+lda*m ) THEN
3055 *
3056 * WORK(IR) is LDA by M
3057 *
3058  ldwrkr = lda
3059  ELSE
3060 *
3061 * WORK(IR) is M by M
3062 *
3063  ldwrkr = m
3064  END IF
3065  itau = ir + ldwrkr*m
3066  iwork = itau + m
3067 *
3068 * Compute A=L*Q, copying result to VT
3069 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3070 * (RWorkspace: 0)
3071 *
3072  CALL cgelqf( m, n, a, lda, work( itau ),
3073  $ work( iwork ), lwork-iwork+1, ierr )
3074  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3075 *
3076 * Copy L to WORK(IR), zeroing out above it
3077 *
3078  CALL clacpy( 'L', m, m, a, lda, work( ir ),
3079  $ ldwrkr )
3080  CALL claset( 'U', m-1, m-1, czero, czero,
3081  $ work( ir+ldwrkr ), ldwrkr )
3082 *
3083 * Generate Q in VT
3084 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3085 * (RWorkspace: 0)
3086 *
3087  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3088  $ work( iwork ), lwork-iwork+1, ierr )
3089  ie = 1
3090  itauq = itau
3091  itaup = itauq + m
3092  iwork = itaup + m
3093 *
3094 * Bidiagonalize L in WORK(IR)
3095 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3096 * (RWorkspace: need M)
3097 *
3098  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
3099  $ rwork( ie ), work( itauq ),
3100  $ work( itaup ), work( iwork ),
3101  $ lwork-iwork+1, ierr )
3102 *
3103 * Generate right bidiagonalizing vectors in WORK(IR)
3104 * (CWorkspace: need M*M+3*M-1,
3105 * prefer M*M+2*M+(M-1)*NB)
3106 * (RWorkspace: 0)
3107 *
3108  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
3109  $ work( itaup ), work( iwork ),
3110  $ lwork-iwork+1, ierr )
3111  irwork = ie + m
3112 *
3113 * Perform bidiagonal QR iteration, computing right
3114 * singular vectors of L in WORK(IR)
3115 * (CWorkspace: need M*M)
3116 * (RWorkspace: need BDSPAC)
3117 *
3118  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3119  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3120  $ rwork( irwork ), info )
3121 *
3122 * Multiply right singular vectors of L in WORK(IR) by
3123 * Q in VT, storing result in A
3124 * (CWorkspace: need M*M)
3125 * (RWorkspace: 0)
3126 *
3127  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
3128  $ ldwrkr, vt, ldvt, czero, a, lda )
3129 *
3130 * Copy right singular vectors of A from A to VT
3131 *
3132  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3133 *
3134  ELSE
3135 *
3136 * Insufficient workspace for a fast algorithm
3137 *
3138  itau = 1
3139  iwork = itau + m
3140 *
3141 * Compute A=L*Q, copying result to VT
3142 * (CWorkspace: need 2*M, prefer M+M*NB)
3143 * (RWorkspace: 0)
3144 *
3145  CALL cgelqf( m, n, a, lda, work( itau ),
3146  $ work( iwork ), lwork-iwork+1, ierr )
3147  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3148 *
3149 * Generate Q in VT
3150 * (CWorkspace: need M+N, prefer M+N*NB)
3151 * (RWorkspace: 0)
3152 *
3153  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3154  $ work( iwork ), lwork-iwork+1, ierr )
3155  ie = 1
3156  itauq = itau
3157  itaup = itauq + m
3158  iwork = itaup + m
3159 *
3160 * Zero out above L in A
3161 *
3162  CALL claset( 'U', m-1, m-1, czero, czero,
3163  $ a( 1, 2 ), lda )
3164 *
3165 * Bidiagonalize L in A
3166 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3167 * (RWorkspace: need M)
3168 *
3169  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3170  $ work( itauq ), work( itaup ),
3171  $ work( iwork ), lwork-iwork+1, ierr )
3172 *
3173 * Multiply right bidiagonalizing vectors in A by Q
3174 * in VT
3175 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3176 * (RWorkspace: 0)
3177 *
3178  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3179  $ work( itaup ), vt, ldvt,
3180  $ work( iwork ), lwork-iwork+1, ierr )
3181  irwork = ie + m
3182 *
3183 * Perform bidiagonal QR iteration, computing right
3184 * singular vectors of A in VT
3185 * (CWorkspace: 0)
3186 * (RWorkspace: need BDSPAC)
3187 *
3188  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3189  $ ldvt, cdum, 1, cdum, 1,
3190  $ rwork( irwork ), info )
3191 *
3192  END IF
3193 *
3194  ELSE IF( wntuo ) THEN
3195 *
3196 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3197 * N right singular vectors to be computed in VT and
3198 * M left singular vectors to be overwritten on A
3199 *
3200  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3201 *
3202 * Sufficient workspace for a fast algorithm
3203 *
3204  iu = 1
3205  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3206 *
3207 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3208 *
3209  ldwrku = lda
3210  ir = iu + ldwrku*m
3211  ldwrkr = lda
3212  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3213 *
3214 * WORK(IU) is LDA by M and WORK(IR) is M by M
3215 *
3216  ldwrku = lda
3217  ir = iu + ldwrku*m
3218  ldwrkr = m
3219  ELSE
3220 *
3221 * WORK(IU) is M by M and WORK(IR) is M by M
3222 *
3223  ldwrku = m
3224  ir = iu + ldwrku*m
3225  ldwrkr = m
3226  END IF
3227  itau = ir + ldwrkr*m
3228  iwork = itau + m
3229 *
3230 * Compute A=L*Q, copying result to VT
3231 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3232 * (RWorkspace: 0)
3233 *
3234  CALL cgelqf( m, n, a, lda, work( itau ),
3235  $ work( iwork ), lwork-iwork+1, ierr )
3236  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3237 *
3238 * Generate Q in VT
3239 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3240 * (RWorkspace: 0)
3241 *
3242  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3243  $ work( iwork ), lwork-iwork+1, ierr )
3244 *
3245 * Copy L to WORK(IU), zeroing out above it
3246 *
3247  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3248  $ ldwrku )
3249  CALL claset( 'U', m-1, m-1, czero, czero,
3250  $ work( iu+ldwrku ), ldwrku )
3251  ie = 1
3252  itauq = itau
3253  itaup = itauq + m
3254  iwork = itaup + m
3255 *
3256 * Bidiagonalize L in WORK(IU), copying result to
3257 * WORK(IR)
3258 * (CWorkspace: need 2*M*M+3*M,
3259 * prefer 2*M*M+2*M+2*M*NB)
3260 * (RWorkspace: need M)
3261 *
3262  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3263  $ rwork( ie ), work( itauq ),
3264  $ work( itaup ), work( iwork ),
3265  $ lwork-iwork+1, ierr )
3266  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
3267  $ work( ir ), ldwrkr )
3268 *
3269 * Generate right bidiagonalizing vectors in WORK(IU)
3270 * (CWorkspace: need 2*M*M+3*M-1,
3271 * prefer 2*M*M+2*M+(M-1)*NB)
3272 * (RWorkspace: 0)
3273 *
3274  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3275  $ work( itaup ), work( iwork ),
3276  $ lwork-iwork+1, ierr )
3277 *
3278 * Generate left bidiagonalizing vectors in WORK(IR)
3279 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3280 * (RWorkspace: 0)
3281 *
3282  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3283  $ work( itauq ), work( iwork ),
3284  $ lwork-iwork+1, ierr )
3285  irwork = ie + m
3286 *
3287 * Perform bidiagonal QR iteration, computing left
3288 * singular vectors of L in WORK(IR) and computing
3289 * right singular vectors of L in WORK(IU)
3290 * (CWorkspace: need 2*M*M)
3291 * (RWorkspace: need BDSPAC)
3292 *
3293  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3294  $ work( iu ), ldwrku, work( ir ),
3295  $ ldwrkr, cdum, 1, rwork( irwork ),
3296  $ info )
3297 *
3298 * Multiply right singular vectors of L in WORK(IU) by
3299 * Q in VT, storing result in A
3300 * (CWorkspace: need M*M)
3301 * (RWorkspace: 0)
3302 *
3303  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3304  $ ldwrku, vt, ldvt, czero, a, lda )
3305 *
3306 * Copy right singular vectors of A from A to VT
3307 *
3308  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3309 *
3310 * Copy left singular vectors of A from WORK(IR) to A
3311 *
3312  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
3313  $ lda )
3314 *
3315  ELSE
3316 *
3317 * Insufficient workspace for a fast algorithm
3318 *
3319  itau = 1
3320  iwork = itau + m
3321 *
3322 * Compute A=L*Q, copying result to VT
3323 * (CWorkspace: need 2*M, prefer M+M*NB)
3324 * (RWorkspace: 0)
3325 *
3326  CALL cgelqf( m, n, a, lda, work( itau ),
3327  $ work( iwork ), lwork-iwork+1, ierr )
3328  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3329 *
3330 * Generate Q in VT
3331 * (CWorkspace: need M+N, prefer M+N*NB)
3332 * (RWorkspace: 0)
3333 *
3334  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3335  $ work( iwork ), lwork-iwork+1, ierr )
3336  ie = 1
3337  itauq = itau
3338  itaup = itauq + m
3339  iwork = itaup + m
3340 *
3341 * Zero out above L in A
3342 *
3343  CALL claset( 'U', m-1, m-1, czero, czero,
3344  $ a( 1, 2 ), lda )
3345 *
3346 * Bidiagonalize L in A
3347 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3348 * (RWorkspace: need M)
3349 *
3350  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3351  $ work( itauq ), work( itaup ),
3352  $ work( iwork ), lwork-iwork+1, ierr )
3353 *
3354 * Multiply right bidiagonalizing vectors in A by Q
3355 * in VT
3356 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3357 * (RWorkspace: 0)
3358 *
3359  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3360  $ work( itaup ), vt, ldvt,
3361  $ work( iwork ), lwork-iwork+1, ierr )
3362 *
3363 * Generate left bidiagonalizing vectors in A
3364 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3365 * (RWorkspace: 0)
3366 *
3367  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
3368  $ work( iwork ), lwork-iwork+1, ierr )
3369  irwork = ie + m
3370 *
3371 * Perform bidiagonal QR iteration, computing left
3372 * singular vectors of A in A and computing right
3373 * singular vectors of A in VT
3374 * (CWorkspace: 0)
3375 * (RWorkspace: need BDSPAC)
3376 *
3377  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3378  $ ldvt, a, lda, cdum, 1,
3379  $ rwork( irwork ), info )
3380 *
3381  END IF
3382 *
3383  ELSE IF( wntuas ) THEN
3384 *
3385 * Path 9t(N much larger than M, JOBU='S' or 'A',
3386 * JOBVT='A')
3387 * N right singular vectors to be computed in VT and
3388 * M left singular vectors to be computed in U
3389 *
3390  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3391 *
3392 * Sufficient workspace for a fast algorithm
3393 *
3394  iu = 1
3395  IF( lwork.GE.wrkbl+lda*m ) THEN
3396 *
3397 * WORK(IU) is LDA by M
3398 *
3399  ldwrku = lda
3400  ELSE
3401 *
3402 * WORK(IU) is M by M
3403 *
3404  ldwrku = m
3405  END IF
3406  itau = iu + ldwrku*m
3407  iwork = itau + m
3408 *
3409 * Compute A=L*Q, copying result to VT
3410 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3411 * (RWorkspace: 0)
3412 *
3413  CALL cgelqf( m, n, a, lda, work( itau ),
3414  $ work( iwork ), lwork-iwork+1, ierr )
3415  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3416 *
3417 * Generate Q in VT
3418 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3419 * (RWorkspace: 0)
3420 *
3421  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3422  $ work( iwork ), lwork-iwork+1, ierr )
3423 *
3424 * Copy L to WORK(IU), zeroing out above it
3425 *
3426  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3427  $ ldwrku )
3428  CALL claset( 'U', m-1, m-1, czero, czero,
3429  $ work( iu+ldwrku ), ldwrku )
3430  ie = 1
3431  itauq = itau
3432  itaup = itauq + m
3433  iwork = itaup + m
3434 *
3435 * Bidiagonalize L in WORK(IU), copying result to U
3436 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3437 * (RWorkspace: need M)
3438 *
3439  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3440  $ rwork( ie ), work( itauq ),
3441  $ work( itaup ), work( iwork ),
3442  $ lwork-iwork+1, ierr )
3443  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
3444  $ ldu )
3445 *
3446 * Generate right bidiagonalizing vectors in WORK(IU)
3447 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3448 * (RWorkspace: 0)
3449 *
3450  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3451  $ work( itaup ), work( iwork ),
3452  $ lwork-iwork+1, ierr )
3453 *
3454 * Generate left bidiagonalizing vectors in U
3455 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3456 * (RWorkspace: 0)
3457 *
3458  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3459  $ work( iwork ), lwork-iwork+1, ierr )
3460  irwork = ie + m
3461 *
3462 * Perform bidiagonal QR iteration, computing left
3463 * singular vectors of L in U and computing right
3464 * singular vectors of L in WORK(IU)
3465 * (CWorkspace: need M*M)
3466 * (RWorkspace: need BDSPAC)
3467 *
3468  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3469  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3470  $ rwork( irwork ), info )
3471 *
3472 * Multiply right singular vectors of L in WORK(IU) by
3473 * Q in VT, storing result in A
3474 * (CWorkspace: need M*M)
3475 * (RWorkspace: 0)
3476 *
3477  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3478  $ ldwrku, vt, ldvt, czero, a, lda )
3479 *
3480 * Copy right singular vectors of A from A to VT
3481 *
3482  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3483 *
3484  ELSE
3485 *
3486 * Insufficient workspace for a fast algorithm
3487 *
3488  itau = 1
3489  iwork = itau + m
3490 *
3491 * Compute A=L*Q, copying result to VT
3492 * (CWorkspace: need 2*M, prefer M+M*NB)
3493 * (RWorkspace: 0)
3494 *
3495  CALL cgelqf( m, n, a, lda, work( itau ),
3496  $ work( iwork ), lwork-iwork+1, ierr )
3497  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3498 *
3499 * Generate Q in VT
3500 * (CWorkspace: need M+N, prefer M+N*NB)
3501 * (RWorkspace: 0)
3502 *
3503  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3504  $ work( iwork ), lwork-iwork+1, ierr )
3505 *
3506 * Copy L to U, zeroing out above it
3507 *
3508  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3509  CALL claset( 'U', m-1, m-1, czero, czero,
3510  $ u( 1, 2 ), ldu )
3511  ie = 1
3512  itauq = itau
3513  itaup = itauq + m
3514  iwork = itaup + m
3515 *
3516 * Bidiagonalize L in U
3517 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3518 * (RWorkspace: need M)
3519 *
3520  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3521  $ work( itauq ), work( itaup ),
3522  $ work( iwork ), lwork-iwork+1, ierr )
3523 *
3524 * Multiply right bidiagonalizing vectors in U by Q
3525 * in VT
3526 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3527 * (RWorkspace: 0)
3528 *
3529  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3530  $ work( itaup ), vt, ldvt,
3531  $ work( iwork ), lwork-iwork+1, ierr )
3532 *
3533 * Generate left bidiagonalizing vectors in U
3534 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3535 * (RWorkspace: 0)
3536 *
3537  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3538  $ work( iwork ), lwork-iwork+1, ierr )
3539  irwork = ie + m
3540 *
3541 * Perform bidiagonal QR iteration, computing left
3542 * singular vectors of A in U and computing right
3543 * singular vectors of A in VT
3544 * (CWorkspace: 0)
3545 * (RWorkspace: need BDSPAC)
3546 *
3547  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3548  $ ldvt, u, ldu, cdum, 1,
3549  $ rwork( irwork ), info )
3550 *
3551  END IF
3552 *
3553  END IF
3554 *
3555  END IF
3556 *
3557  ELSE
3558 *
3559 * N .LT. MNTHR
3560 *
3561 * Path 10t(N greater than M, but not much larger)
3562 * Reduce to bidiagonal form without LQ decomposition
3563 *
3564  ie = 1
3565  itauq = 1
3566  itaup = itauq + m
3567  iwork = itaup + m
3568 *
3569 * Bidiagonalize A
3570 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3571 * (RWorkspace: M)
3572 *
3573  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3574  $ work( itaup ), work( iwork ), lwork-iwork+1,
3575  $ ierr )
3576  IF( wntuas ) THEN
3577 *
3578 * If left singular vectors desired in U, copy result to U
3579 * and generate left bidiagonalizing vectors in U
3580 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3581 * (RWorkspace: 0)
3582 *
3583  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3584  CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3585  $ work( iwork ), lwork-iwork+1, ierr )
3586  END IF
3587  IF( wntvas ) THEN
3588 *
3589 * If right singular vectors desired in VT, copy result to
3590 * VT and generate right bidiagonalizing vectors in VT
3591 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3592 * (RWorkspace: 0)
3593 *
3594  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3595  IF( wntva )
3596  $ nrvt = n
3597  IF( wntvs )
3598  $ nrvt = m
3599  CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3600  $ work( iwork ), lwork-iwork+1, ierr )
3601  END IF
3602  IF( wntuo ) THEN
3603 *
3604 * If left singular vectors desired in A, generate left
3605 * bidiagonalizing vectors in A
3606 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3607 * (RWorkspace: 0)
3608 *
3609  CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),
3610  $ work( iwork ), lwork-iwork+1, ierr )
3611  END IF
3612  IF( wntvo ) THEN
3613 *
3614 * If right singular vectors desired in A, generate right
3615 * bidiagonalizing vectors in A
3616 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3617 * (RWorkspace: 0)
3618 *
3619  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
3620  $ work( iwork ), lwork-iwork+1, ierr )
3621  END IF
3622  irwork = ie + m
3623  IF( wntuas .OR. wntuo )
3624  $ nru = m
3625  IF( wntun )
3626  $ nru = 0
3627  IF( wntvas .OR. wntvo )
3628  $ ncvt = n
3629  IF( wntvn )
3630  $ ncvt = 0
3631  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3632 *
3633 * Perform bidiagonal QR iteration, if desired, computing
3634 * left singular vectors in U and computing right singular
3635 * vectors in VT
3636 * (CWorkspace: 0)
3637 * (RWorkspace: need BDSPAC)
3638 *
3639  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3640  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3641  $ info )
3642  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3643 *
3644 * Perform bidiagonal QR iteration, if desired, computing
3645 * left singular vectors in U and computing right singular
3646 * vectors in A
3647 * (CWorkspace: 0)
3648 * (RWorkspace: need BDSPAC)
3649 *
3650  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3651  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3652  $ info )
3653  ELSE
3654 *
3655 * Perform bidiagonal QR iteration, if desired, computing
3656 * left singular vectors in A and computing right singular
3657 * vectors in VT
3658 * (CWorkspace: 0)
3659 * (RWorkspace: need BDSPAC)
3660 *
3661  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3662  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3663  $ info )
3664  END IF
3665 *
3666  END IF
3667 *
3668  END IF
3669 *
3670 * Undo scaling if necessary
3671 *
3672  IF( iscl.EQ.1 ) THEN
3673  IF( anrm.GT.bignum )
3674  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3675  $ ierr )
3676  IF( info.NE.0 .AND. anrm.GT.bignum )
3677  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3678  $ rwork( ie ), minmn, ierr )
3679  IF( anrm.LT.smlnum )
3680  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3681  $ ierr )
3682  IF( info.NE.0 .AND. anrm.LT.smlnum )
3683  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3684  $ rwork( ie ), minmn, ierr )
3685  END IF
3686 *
3687 * Return optimal workspace in WORK(1)
3688 *
3689  work( 1 ) = maxwrk
3690 *
3691  RETURN
3692 *
3693 * End of CGESVD
3694 *
3695  END