LAPACK  3.6.0
LAPACK: Linear Algebra PACKage
cgesvd.f
Go to the documentation of this file.
1 *> \brief <b> CGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download CGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL RWORK( * ), S( * )
30 * COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> CGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is REAL array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is REAL array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if CBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \date April 2012
210 *
211 *> \ingroup complexGEsing
212 *
213 * =====================================================================
214  SUBROUTINE cgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
215  $ work, lwork, rwork, info )
216 *
217 * -- LAPACK driver routine (version 3.6.0) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220 * April 2012
221 *
222 * .. Scalar Arguments ..
223  CHARACTER JOBU, JOBVT
224  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
225 * ..
226 * .. Array Arguments ..
227  REAL RWORK( * ), S( * )
228  COMPLEX A( lda, * ), U( ldu, * ), VT( ldvt, * ),
229  $ work( * )
230 * ..
231 *
232 * =====================================================================
233 *
234 * .. Parameters ..
235  COMPLEX CZERO, CONE
236  parameter( czero = ( 0.0e0, 0.0e0 ),
237  $ cone = ( 1.0e0, 0.0e0 ) )
238  REAL ZERO, ONE
239  parameter( zero = 0.0e0, one = 1.0e0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
243  $ wntva, wntvas, wntvn, wntvo, wntvs
244  INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
245  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
246  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
247  $ nrvt, wrkbl
248  INTEGER LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M,
249  $ lwork_cgebrd, lwork_cungbr_p, lwork_cungbr_q,
250  $ lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m
251  REAL ANRM, BIGNUM, EPS, SMLNUM
252 * ..
253 * .. Local Arrays ..
254  REAL DUM( 1 )
255  COMPLEX CDUM( 1 )
256 * ..
257 * .. External Subroutines ..
258  EXTERNAL cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf, clacpy,
260  $ slascl, xerbla
261 * ..
262 * .. External Functions ..
263  LOGICAL LSAME
264  INTEGER ILAENV
265  REAL CLANGE, SLAMCH
266  EXTERNAL lsame, ilaenv, clange, slamch
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, min, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input arguments
274 *
275  info = 0
276  minmn = min( m, n )
277  wntua = lsame( jobu, 'A' )
278  wntus = lsame( jobu, 'S' )
279  wntuas = wntua .OR. wntus
280  wntuo = lsame( jobu, 'O' )
281  wntun = lsame( jobu, 'N' )
282  wntva = lsame( jobvt, 'A' )
283  wntvs = lsame( jobvt, 'S' )
284  wntvas = wntva .OR. wntvs
285  wntvo = lsame( jobvt, 'O' )
286  wntvn = lsame( jobvt, 'N' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
290  info = -1
291  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
292  $ ( wntvo .AND. wntuo ) ) THEN
293  info = -2
294  ELSE IF( m.LT.0 ) THEN
295  info = -3
296  ELSE IF( n.LT.0 ) THEN
297  info = -4
298  ELSE IF( lda.LT.max( 1, m ) ) THEN
299  info = -6
300  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
301  info = -9
302  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
303  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
304  info = -11
305  END IF
306 *
307 * Compute workspace
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  IF( m.GE.n .AND. minmn.GT.0 ) THEN
319 *
320 * Space needed for ZBDSQR is BDSPAC = 5*N
321 *
322  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
323 * Compute space needed for CGEQRF
324  CALL cgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
325  lwork_cgeqrf=cdum(1)
326 * Compute space needed for CUNGQR
327  CALL cungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
328  lwork_cungqr_n=cdum(1)
329  CALL cungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
330  lwork_cungqr_m=cdum(1)
331 * Compute space needed for CGEBRD
332  CALL cgebrd( n, n, a, lda, s, dum(1), cdum(1),
333  $ cdum(1), cdum(1), -1, ierr )
334  lwork_cgebrd=cdum(1)
335 * Compute space needed for CUNGBR
336  CALL cungbr( 'P', n, n, n, a, lda, cdum(1),
337  $ cdum(1), -1, ierr )
338  lwork_cungbr_p=cdum(1)
339  CALL cungbr( 'Q', n, n, n, a, lda, cdum(1),
340  $ cdum(1), -1, ierr )
341  lwork_cungbr_q=cdum(1)
342 *
343  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
344  IF( m.GE.mnthr ) THEN
345  IF( wntun ) THEN
346 *
347 * Path 1 (M much larger than N, JOBU='N')
348 *
349  maxwrk = n + lwork_cgeqrf
350  maxwrk = max( maxwrk, 2*n+lwork_cgebrd )
351  IF( wntvo .OR. wntvas )
352  $ maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
353  minwrk = 3*n
354  ELSE IF( wntuo .AND. wntvn ) THEN
355 *
356 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
357 *
358  wrkbl = n + lwork_cgeqrf
359  wrkbl = max( wrkbl, n+lwork_cungqr_n )
360  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
361  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
362  maxwrk = max( n*n+wrkbl, n*n+m*n )
363  minwrk = 2*n + m
364  ELSE IF( wntuo .AND. wntvas ) THEN
365 *
366 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
367 * 'A')
368 *
369  wrkbl = n + lwork_cgeqrf
370  wrkbl = max( wrkbl, n+lwork_cungqr_n )
371  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
372  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
373  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
374  maxwrk = max( n*n+wrkbl, n*n+m*n )
375  minwrk = 2*n + m
376  ELSE IF( wntus .AND. wntvn ) THEN
377 *
378 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
379 *
380  wrkbl = n + lwork_cgeqrf
381  wrkbl = max( wrkbl, n+lwork_cungqr_n )
382  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
383  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
384  maxwrk = n*n + wrkbl
385  minwrk = 2*n + m
386  ELSE IF( wntus .AND. wntvo ) THEN
387 *
388 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
389 *
390  wrkbl = n + lwork_cgeqrf
391  wrkbl = max( wrkbl, n+lwork_cungqr_n )
392  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
393  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
394  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
395  maxwrk = 2*n*n + wrkbl
396  minwrk = 2*n + m
397  ELSE IF( wntus .AND. wntvas ) THEN
398 *
399 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
400 * 'A')
401 *
402  wrkbl = n + lwork_cgeqrf
403  wrkbl = max( wrkbl, n+lwork_cungqr_n )
404  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
405  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
406  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
407  maxwrk = n*n + wrkbl
408  minwrk = 2*n + m
409  ELSE IF( wntua .AND. wntvn ) THEN
410 *
411 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
412 *
413  wrkbl = n + lwork_cgeqrf
414  wrkbl = max( wrkbl, n+lwork_cungqr_m )
415  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
416  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
417  maxwrk = n*n + wrkbl
418  minwrk = 2*n + m
419  ELSE IF( wntua .AND. wntvo ) THEN
420 *
421 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
422 *
423  wrkbl = n + lwork_cgeqrf
424  wrkbl = max( wrkbl, n+lwork_cungqr_m )
425  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
426  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
427  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = 2*n + m
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_cgeqrf
436  wrkbl = max( wrkbl, n+lwork_cungqr_m )
437  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
438  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
439  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
440  maxwrk = n*n + wrkbl
441  minwrk = 2*n + m
442  END IF
443  ELSE
444 *
445 * Path 10 (M at least N, but not much larger)
446 *
447  CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
448  $ cdum(1), cdum(1), -1, ierr )
449  lwork_cgebrd=cdum(1)
450  maxwrk = 2*n + lwork_cgebrd
451  IF( wntus .OR. wntuo ) THEN
452  CALL cungbr( 'Q', m, n, n, a, lda, cdum(1),
453  $ cdum(1), -1, ierr )
454  lwork_cungbr_q=cdum(1)
455  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
456  END IF
457  IF( wntua ) THEN
458  CALL cungbr( 'Q', m, m, n, a, lda, cdum(1),
459  $ cdum(1), -1, ierr )
460  lwork_cungbr_q=cdum(1)
461  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
462  END IF
463  IF( .NOT.wntvn ) THEN
464  maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
465  minwrk = 2*n + m
466  END IF
467  END IF
468  ELSE IF( minmn.GT.0 ) THEN
469 *
470 * Space needed for CBDSQR is BDSPAC = 5*M
471 *
472  mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
473 * Compute space needed for CGELQF
474  CALL cgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
475  lwork_cgelqf=cdum(1)
476 * Compute space needed for CUNGLQ
477  CALL cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
478  $ ierr )
479  lwork_cunglq_n=cdum(1)
480  CALL cunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
481  lwork_cunglq_m=cdum(1)
482 * Compute space needed for CGEBRD
483  CALL cgebrd( m, m, a, lda, s, dum(1), cdum(1),
484  $ cdum(1), cdum(1), -1, ierr )
485  lwork_cgebrd=cdum(1)
486 * Compute space needed for CUNGBR P
487  CALL cungbr( 'P', m, m, m, a, n, cdum(1),
488  $ cdum(1), -1, ierr )
489  lwork_cungbr_p=cdum(1)
490 * Compute space needed for CUNGBR Q
491  CALL cungbr( 'Q', m, m, m, a, n, cdum(1),
492  $ cdum(1), -1, ierr )
493  lwork_cungbr_q=cdum(1)
494  IF( n.GE.mnthr ) THEN
495  IF( wntvn ) THEN
496 *
497 * Path 1t(N much larger than M, JOBVT='N')
498 *
499  maxwrk = m + lwork_cgelqf
500  maxwrk = max( maxwrk, 2*m+lwork_cgebrd )
501  IF( wntuo .OR. wntuas )
502  $ maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
503  minwrk = 3*m
504  ELSE IF( wntvo .AND. wntun ) THEN
505 *
506 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507 *
508  wrkbl = m + lwork_cgelqf
509  wrkbl = max( wrkbl, m+lwork_cunglq_m )
510  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
511  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
512  maxwrk = max( m*m+wrkbl, m*m+m*n )
513  minwrk = 2*m + n
514  ELSE IF( wntvo .AND. wntuas ) THEN
515 *
516 * Path 3t(N much larger than M, JOBU='S' or 'A',
517 * JOBVT='O')
518 *
519  wrkbl = m + lwork_cgelqf
520  wrkbl = max( wrkbl, m+lwork_cunglq_m )
521  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
522  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
523  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
524  maxwrk = max( m*m+wrkbl, m*m+m*n )
525  minwrk = 2*m + n
526  ELSE IF( wntvs .AND. wntun ) THEN
527 *
528 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
529 *
530  wrkbl = m + lwork_cgelqf
531  wrkbl = max( wrkbl, m+lwork_cunglq_m )
532  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
533  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
534  maxwrk = m*m + wrkbl
535  minwrk = 2*m + n
536  ELSE IF( wntvs .AND. wntuo ) THEN
537 *
538 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
539 *
540  wrkbl = m + lwork_cgelqf
541  wrkbl = max( wrkbl, m+lwork_cunglq_m )
542  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
543  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
544  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
545  maxwrk = 2*m*m + wrkbl
546  minwrk = 2*m + n
547  ELSE IF( wntvs .AND. wntuas ) THEN
548 *
549 * Path 6t(N much larger than M, JOBU='S' or 'A',
550 * JOBVT='S')
551 *
552  wrkbl = m + lwork_cgelqf
553  wrkbl = max( wrkbl, m+lwork_cunglq_m )
554  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
555  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
556  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
557  maxwrk = m*m + wrkbl
558  minwrk = 2*m + n
559  ELSE IF( wntva .AND. wntun ) THEN
560 *
561 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
562 *
563  wrkbl = m + lwork_cgelqf
564  wrkbl = max( wrkbl, m+lwork_cunglq_n )
565  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
566  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
567  maxwrk = m*m + wrkbl
568  minwrk = 2*m + n
569  ELSE IF( wntva .AND. wntuo ) THEN
570 *
571 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
572 *
573  wrkbl = m + lwork_cgelqf
574  wrkbl = max( wrkbl, m+lwork_cunglq_n )
575  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
576  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
577  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
578  maxwrk = 2*m*m + wrkbl
579  minwrk = 2*m + n
580  ELSE IF( wntva .AND. wntuas ) THEN
581 *
582 * Path 9t(N much larger than M, JOBU='S' or 'A',
583 * JOBVT='A')
584 *
585  wrkbl = m + lwork_cgelqf
586  wrkbl = max( wrkbl, m+lwork_cunglq_n )
587  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
588  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
589  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
590  maxwrk = m*m + wrkbl
591  minwrk = 2*m + n
592  END IF
593  ELSE
594 *
595 * Path 10t(N greater than M, but not much larger)
596 *
597  CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
598  $ cdum(1), cdum(1), -1, ierr )
599  lwork_cgebrd=cdum(1)
600  maxwrk = 2*m + lwork_cgebrd
601  IF( wntvs .OR. wntvo ) THEN
602 * Compute space needed for CUNGBR P
603  CALL cungbr( 'P', m, n, m, a, n, cdum(1),
604  $ cdum(1), -1, ierr )
605  lwork_cungbr_p=cdum(1)
606  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
607  END IF
608  IF( wntva ) THEN
609  CALL cungbr( 'P', n, n, m, a, n, cdum(1),
610  $ cdum(1), -1, ierr )
611  lwork_cungbr_p=cdum(1)
612  maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
613  END IF
614  IF( .NOT.wntun ) THEN
615  maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
616  minwrk = 2*m + n
617  END IF
618  END IF
619  END IF
620  maxwrk = max( minwrk, maxwrk )
621  work( 1 ) = maxwrk
622 *
623  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
624  info = -13
625  END IF
626  END IF
627 *
628  IF( info.NE.0 ) THEN
629  CALL xerbla( 'CGESVD', -info )
630  RETURN
631  ELSE IF( lquery ) THEN
632  RETURN
633  END IF
634 *
635 * Quick return if possible
636 *
637  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
638  RETURN
639  END IF
640 *
641 * Get machine constants
642 *
643  eps = slamch( 'P' )
644  smlnum = sqrt( slamch( 'S' ) ) / eps
645  bignum = one / smlnum
646 *
647 * Scale A if max element outside range [SMLNUM,BIGNUM]
648 *
649  anrm = clange( 'M', m, n, a, lda, dum )
650  iscl = 0
651  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
652  iscl = 1
653  CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
654  ELSE IF( anrm.GT.bignum ) THEN
655  iscl = 1
656  CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
657  END IF
658 *
659  IF( m.GE.n ) THEN
660 *
661 * A has at least as many rows as columns. If A has sufficiently
662 * more rows than columns, first reduce using the QR
663 * decomposition (if sufficient workspace available)
664 *
665  IF( m.GE.mnthr ) THEN
666 *
667  IF( wntun ) THEN
668 *
669 * Path 1 (M much larger than N, JOBU='N')
670 * No left singular vectors to be computed
671 *
672  itau = 1
673  iwork = itau + n
674 *
675 * Compute A=Q*R
676 * (CWorkspace: need 2*N, prefer N+N*NB)
677 * (RWorkspace: need 0)
678 *
679  CALL cgeqrf( m, n, a, lda, work( itau ), work( iwork ),
680  $ lwork-iwork+1, ierr )
681 *
682 * Zero out below R
683 *
684  CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
685  $ lda )
686  ie = 1
687  itauq = 1
688  itaup = itauq + n
689  iwork = itaup + n
690 *
691 * Bidiagonalize R in A
692 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
693 * (RWorkspace: need N)
694 *
695  CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
696  $ work( itaup ), work( iwork ), lwork-iwork+1,
697  $ ierr )
698  ncvt = 0
699  IF( wntvo .OR. wntvas ) THEN
700 *
701 * If right singular vectors desired, generate P'.
702 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
703 * (RWorkspace: 0)
704 *
705  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
706  $ work( iwork ), lwork-iwork+1, ierr )
707  ncvt = n
708  END IF
709  irwork = ie + n
710 *
711 * Perform bidiagonal QR iteration, computing right
712 * singular vectors of A in A if desired
713 * (CWorkspace: 0)
714 * (RWorkspace: need BDSPAC)
715 *
716  CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
717  $ cdum, 1, cdum, 1, rwork( irwork ), info )
718 *
719 * If right singular vectors desired in VT, copy them there
720 *
721  IF( wntvas )
722  $ CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
723 *
724  ELSE IF( wntuo .AND. wntvn ) THEN
725 *
726 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
727 * N left singular vectors to be overwritten on A and
728 * no right singular vectors to be computed
729 *
730  IF( lwork.GE.n*n+3*n ) THEN
731 *
732 * Sufficient workspace for a fast algorithm
733 *
734  ir = 1
735  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
736 *
737 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
738 *
739  ldwrku = lda
740  ldwrkr = lda
741  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
742 *
743 * WORK(IU) is LDA by N, WORK(IR) is N by N
744 *
745  ldwrku = lda
746  ldwrkr = n
747  ELSE
748 *
749 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
750 *
751  ldwrku = ( lwork-n*n ) / n
752  ldwrkr = n
753  END IF
754  itau = ir + ldwrkr*n
755  iwork = itau + n
756 *
757 * Compute A=Q*R
758 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
759 * (RWorkspace: 0)
760 *
761  CALL cgeqrf( m, n, a, lda, work( itau ),
762  $ work( iwork ), lwork-iwork+1, ierr )
763 *
764 * Copy R to WORK(IR) and zero out below it
765 *
766  CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
767  CALL claset( 'L', n-1, n-1, czero, czero,
768  $ work( ir+1 ), ldwrkr )
769 *
770 * Generate Q in A
771 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
772 * (RWorkspace: 0)
773 *
774  CALL cungqr( m, n, n, a, lda, work( itau ),
775  $ work( iwork ), lwork-iwork+1, ierr )
776  ie = 1
777  itauq = itau
778  itaup = itauq + n
779  iwork = itaup + n
780 *
781 * Bidiagonalize R in WORK(IR)
782 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
783 * (RWorkspace: need N)
784 *
785  CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
786  $ work( itauq ), work( itaup ),
787  $ work( iwork ), lwork-iwork+1, ierr )
788 *
789 * Generate left vectors bidiagonalizing R
790 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
791 * (RWorkspace: need 0)
792 *
793  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
794  $ work( itauq ), work( iwork ),
795  $ lwork-iwork+1, ierr )
796  irwork = ie + n
797 *
798 * Perform bidiagonal QR iteration, computing left
799 * singular vectors of R in WORK(IR)
800 * (CWorkspace: need N*N)
801 * (RWorkspace: need BDSPAC)
802 *
803  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
804  $ work( ir ), ldwrkr, cdum, 1,
805  $ rwork( irwork ), info )
806  iu = itauq
807 *
808 * Multiply Q in A by left singular vectors of R in
809 * WORK(IR), storing result in WORK(IU) and copying to A
810 * (CWorkspace: need N*N+N, prefer N*N+M*N)
811 * (RWorkspace: 0)
812 *
813  DO 10 i = 1, m, ldwrku
814  chunk = min( m-i+1, ldwrku )
815  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
816  $ lda, work( ir ), ldwrkr, czero,
817  $ work( iu ), ldwrku )
818  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
819  $ a( i, 1 ), lda )
820  10 CONTINUE
821 *
822  ELSE
823 *
824 * Insufficient workspace for a fast algorithm
825 *
826  ie = 1
827  itauq = 1
828  itaup = itauq + n
829  iwork = itaup + n
830 *
831 * Bidiagonalize A
832 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
833 * (RWorkspace: N)
834 *
835  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
836  $ work( itauq ), work( itaup ),
837  $ work( iwork ), lwork-iwork+1, ierr )
838 *
839 * Generate left vectors bidiagonalizing A
840 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
841 * (RWorkspace: 0)
842 *
843  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
844  $ work( iwork ), lwork-iwork+1, ierr )
845  irwork = ie + n
846 *
847 * Perform bidiagonal QR iteration, computing left
848 * singular vectors of A in A
849 * (CWorkspace: need 0)
850 * (RWorkspace: need BDSPAC)
851 *
852  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
853  $ a, lda, cdum, 1, rwork( irwork ), info )
854 *
855  END IF
856 *
857  ELSE IF( wntuo .AND. wntvas ) THEN
858 *
859 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
860 * N left singular vectors to be overwritten on A and
861 * N right singular vectors to be computed in VT
862 *
863  IF( lwork.GE.n*n+3*n ) THEN
864 *
865 * Sufficient workspace for a fast algorithm
866 *
867  ir = 1
868  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
869 *
870 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
871 *
872  ldwrku = lda
873  ldwrkr = lda
874  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
875 *
876 * WORK(IU) is LDA by N and WORK(IR) is N by N
877 *
878  ldwrku = lda
879  ldwrkr = n
880  ELSE
881 *
882 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
883 *
884  ldwrku = ( lwork-n*n ) / n
885  ldwrkr = n
886  END IF
887  itau = ir + ldwrkr*n
888  iwork = itau + n
889 *
890 * Compute A=Q*R
891 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
892 * (RWorkspace: 0)
893 *
894  CALL cgeqrf( m, n, a, lda, work( itau ),
895  $ work( iwork ), lwork-iwork+1, ierr )
896 *
897 * Copy R to VT, zeroing out below it
898 *
899  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
900  IF( n.GT.1 )
901  $ CALL claset( 'L', n-1, n-1, czero, czero,
902  $ vt( 2, 1 ), ldvt )
903 *
904 * Generate Q in A
905 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
906 * (RWorkspace: 0)
907 *
908  CALL cungqr( m, n, n, a, lda, work( itau ),
909  $ work( iwork ), lwork-iwork+1, ierr )
910  ie = 1
911  itauq = itau
912  itaup = itauq + n
913  iwork = itaup + n
914 *
915 * Bidiagonalize R in VT, copying result to WORK(IR)
916 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
917 * (RWorkspace: need N)
918 *
919  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
920  $ work( itauq ), work( itaup ),
921  $ work( iwork ), lwork-iwork+1, ierr )
922  CALL clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
923 *
924 * Generate left vectors bidiagonalizing R in WORK(IR)
925 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
926 * (RWorkspace: 0)
927 *
928  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
929  $ work( itauq ), work( iwork ),
930  $ lwork-iwork+1, ierr )
931 *
932 * Generate right vectors bidiagonalizing R in VT
933 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
934 * (RWorkspace: 0)
935 *
936  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
937  $ work( iwork ), lwork-iwork+1, ierr )
938  irwork = ie + n
939 *
940 * Perform bidiagonal QR iteration, computing left
941 * singular vectors of R in WORK(IR) and computing right
942 * singular vectors of R in VT
943 * (CWorkspace: need N*N)
944 * (RWorkspace: need BDSPAC)
945 *
946  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
947  $ ldvt, work( ir ), ldwrkr, cdum, 1,
948  $ rwork( irwork ), info )
949  iu = itauq
950 *
951 * Multiply Q in A by left singular vectors of R in
952 * WORK(IR), storing result in WORK(IU) and copying to A
953 * (CWorkspace: need N*N+N, prefer N*N+M*N)
954 * (RWorkspace: 0)
955 *
956  DO 20 i = 1, m, ldwrku
957  chunk = min( m-i+1, ldwrku )
958  CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
959  $ lda, work( ir ), ldwrkr, czero,
960  $ work( iu ), ldwrku )
961  CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
962  $ a( i, 1 ), lda )
963  20 CONTINUE
964 *
965  ELSE
966 *
967 * Insufficient workspace for a fast algorithm
968 *
969  itau = 1
970  iwork = itau + n
971 *
972 * Compute A=Q*R
973 * (CWorkspace: need 2*N, prefer N+N*NB)
974 * (RWorkspace: 0)
975 *
976  CALL cgeqrf( m, n, a, lda, work( itau ),
977  $ work( iwork ), lwork-iwork+1, ierr )
978 *
979 * Copy R to VT, zeroing out below it
980 *
981  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
982  IF( n.GT.1 )
983  $ CALL claset( 'L', n-1, n-1, czero, czero,
984  $ vt( 2, 1 ), ldvt )
985 *
986 * Generate Q in A
987 * (CWorkspace: need 2*N, prefer N+N*NB)
988 * (RWorkspace: 0)
989 *
990  CALL cungqr( m, n, n, a, lda, work( itau ),
991  $ work( iwork ), lwork-iwork+1, ierr )
992  ie = 1
993  itauq = itau
994  itaup = itauq + n
995  iwork = itaup + n
996 *
997 * Bidiagonalize R in VT
998 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
999 * (RWorkspace: N)
1000 *
1001  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1002  $ work( itauq ), work( itaup ),
1003  $ work( iwork ), lwork-iwork+1, ierr )
1004 *
1005 * Multiply Q in A by left vectors bidiagonalizing R
1006 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1007 * (RWorkspace: 0)
1008 *
1009  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1010  $ work( itauq ), a, lda, work( iwork ),
1011  $ lwork-iwork+1, ierr )
1012 *
1013 * Generate right vectors bidiagonalizing R in VT
1014 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1015 * (RWorkspace: 0)
1016 *
1017  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1018  $ work( iwork ), lwork-iwork+1, ierr )
1019  irwork = ie + n
1020 *
1021 * Perform bidiagonal QR iteration, computing left
1022 * singular vectors of A in A and computing right
1023 * singular vectors of A in VT
1024 * (CWorkspace: 0)
1025 * (RWorkspace: need BDSPAC)
1026 *
1027  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1028  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1029  $ info )
1030 *
1031  END IF
1032 *
1033  ELSE IF( wntus ) THEN
1034 *
1035  IF( wntvn ) THEN
1036 *
1037 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1038 * N left singular vectors to be computed in U and
1039 * no right singular vectors to be computed
1040 *
1041  IF( lwork.GE.n*n+3*n ) THEN
1042 *
1043 * Sufficient workspace for a fast algorithm
1044 *
1045  ir = 1
1046  IF( lwork.GE.wrkbl+lda*n ) THEN
1047 *
1048 * WORK(IR) is LDA by N
1049 *
1050  ldwrkr = lda
1051  ELSE
1052 *
1053 * WORK(IR) is N by N
1054 *
1055  ldwrkr = n
1056  END IF
1057  itau = ir + ldwrkr*n
1058  iwork = itau + n
1059 *
1060 * Compute A=Q*R
1061 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1062 * (RWorkspace: 0)
1063 *
1064  CALL cgeqrf( m, n, a, lda, work( itau ),
1065  $ work( iwork ), lwork-iwork+1, ierr )
1066 *
1067 * Copy R to WORK(IR), zeroing out below it
1068 *
1069  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1070  $ ldwrkr )
1071  CALL claset( 'L', n-1, n-1, czero, czero,
1072  $ work( ir+1 ), ldwrkr )
1073 *
1074 * Generate Q in A
1075 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1076 * (RWorkspace: 0)
1077 *
1078  CALL cungqr( m, n, n, a, lda, work( itau ),
1079  $ work( iwork ), lwork-iwork+1, ierr )
1080  ie = 1
1081  itauq = itau
1082  itaup = itauq + n
1083  iwork = itaup + n
1084 *
1085 * Bidiagonalize R in WORK(IR)
1086 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1087 * (RWorkspace: need N)
1088 *
1089  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1090  $ rwork( ie ), work( itauq ),
1091  $ work( itaup ), work( iwork ),
1092  $ lwork-iwork+1, ierr )
1093 *
1094 * Generate left vectors bidiagonalizing R in WORK(IR)
1095 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1096 * (RWorkspace: 0)
1097 *
1098  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1099  $ work( itauq ), work( iwork ),
1100  $ lwork-iwork+1, ierr )
1101  irwork = ie + n
1102 *
1103 * Perform bidiagonal QR iteration, computing left
1104 * singular vectors of R in WORK(IR)
1105 * (CWorkspace: need N*N)
1106 * (RWorkspace: need BDSPAC)
1107 *
1108  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1109  $ 1, work( ir ), ldwrkr, cdum, 1,
1110  $ rwork( irwork ), info )
1111 *
1112 * Multiply Q in A by left singular vectors of R in
1113 * WORK(IR), storing result in U
1114 * (CWorkspace: need N*N)
1115 * (RWorkspace: 0)
1116 *
1117  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1118  $ work( ir ), ldwrkr, czero, u, ldu )
1119 *
1120  ELSE
1121 *
1122 * Insufficient workspace for a fast algorithm
1123 *
1124  itau = 1
1125  iwork = itau + n
1126 *
1127 * Compute A=Q*R, copying result to U
1128 * (CWorkspace: need 2*N, prefer N+N*NB)
1129 * (RWorkspace: 0)
1130 *
1131  CALL cgeqrf( m, n, a, lda, work( itau ),
1132  $ work( iwork ), lwork-iwork+1, ierr )
1133  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1134 *
1135 * Generate Q in U
1136 * (CWorkspace: need 2*N, prefer N+N*NB)
1137 * (RWorkspace: 0)
1138 *
1139  CALL cungqr( m, n, n, u, ldu, work( itau ),
1140  $ work( iwork ), lwork-iwork+1, ierr )
1141  ie = 1
1142  itauq = itau
1143  itaup = itauq + n
1144  iwork = itaup + n
1145 *
1146 * Zero out below R in A
1147 *
1148  CALL claset( 'L', n-1, n-1, czero, czero,
1149  $ a( 2, 1 ), lda )
1150 *
1151 * Bidiagonalize R in A
1152 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1153 * (RWorkspace: need N)
1154 *
1155  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1156  $ work( itauq ), work( itaup ),
1157  $ work( iwork ), lwork-iwork+1, ierr )
1158 *
1159 * Multiply Q in U by left vectors bidiagonalizing R
1160 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1161 * (RWorkspace: 0)
1162 *
1163  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1164  $ work( itauq ), u, ldu, work( iwork ),
1165  $ lwork-iwork+1, ierr )
1166  irwork = ie + n
1167 *
1168 * Perform bidiagonal QR iteration, computing left
1169 * singular vectors of A in U
1170 * (CWorkspace: 0)
1171 * (RWorkspace: need BDSPAC)
1172 *
1173  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1174  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1175  $ info )
1176 *
1177  END IF
1178 *
1179  ELSE IF( wntvo ) THEN
1180 *
1181 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1182 * N left singular vectors to be computed in U and
1183 * N right singular vectors to be overwritten on A
1184 *
1185  IF( lwork.GE.2*n*n+3*n ) THEN
1186 *
1187 * Sufficient workspace for a fast algorithm
1188 *
1189  iu = 1
1190  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1191 *
1192 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1193 *
1194  ldwrku = lda
1195  ir = iu + ldwrku*n
1196  ldwrkr = lda
1197  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1198 *
1199 * WORK(IU) is LDA by N and WORK(IR) is N by N
1200 *
1201  ldwrku = lda
1202  ir = iu + ldwrku*n
1203  ldwrkr = n
1204  ELSE
1205 *
1206 * WORK(IU) is N by N and WORK(IR) is N by N
1207 *
1208  ldwrku = n
1209  ir = iu + ldwrku*n
1210  ldwrkr = n
1211  END IF
1212  itau = ir + ldwrkr*n
1213  iwork = itau + n
1214 *
1215 * Compute A=Q*R
1216 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1217 * (RWorkspace: 0)
1218 *
1219  CALL cgeqrf( m, n, a, lda, work( itau ),
1220  $ work( iwork ), lwork-iwork+1, ierr )
1221 *
1222 * Copy R to WORK(IU), zeroing out below it
1223 *
1224  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1225  $ ldwrku )
1226  CALL claset( 'L', n-1, n-1, czero, czero,
1227  $ work( iu+1 ), ldwrku )
1228 *
1229 * Generate Q in A
1230 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1231 * (RWorkspace: 0)
1232 *
1233  CALL cungqr( m, n, n, a, lda, work( itau ),
1234  $ work( iwork ), lwork-iwork+1, ierr )
1235  ie = 1
1236  itauq = itau
1237  itaup = itauq + n
1238  iwork = itaup + n
1239 *
1240 * Bidiagonalize R in WORK(IU), copying result to
1241 * WORK(IR)
1242 * (CWorkspace: need 2*N*N+3*N,
1243 * prefer 2*N*N+2*N+2*N*NB)
1244 * (RWorkspace: need N)
1245 *
1246  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1247  $ rwork( ie ), work( itauq ),
1248  $ work( itaup ), work( iwork ),
1249  $ lwork-iwork+1, ierr )
1250  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1251  $ work( ir ), ldwrkr )
1252 *
1253 * Generate left bidiagonalizing vectors in WORK(IU)
1254 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1255 * (RWorkspace: 0)
1256 *
1257  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1258  $ work( itauq ), work( iwork ),
1259  $ lwork-iwork+1, ierr )
1260 *
1261 * Generate right bidiagonalizing vectors in WORK(IR)
1262 * (CWorkspace: need 2*N*N+3*N-1,
1263 * prefer 2*N*N+2*N+(N-1)*NB)
1264 * (RWorkspace: 0)
1265 *
1266  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1267  $ work( itaup ), work( iwork ),
1268  $ lwork-iwork+1, ierr )
1269  irwork = ie + n
1270 *
1271 * Perform bidiagonal QR iteration, computing left
1272 * singular vectors of R in WORK(IU) and computing
1273 * right singular vectors of R in WORK(IR)
1274 * (CWorkspace: need 2*N*N)
1275 * (RWorkspace: need BDSPAC)
1276 *
1277  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1278  $ work( ir ), ldwrkr, work( iu ),
1279  $ ldwrku, cdum, 1, rwork( irwork ),
1280  $ info )
1281 *
1282 * Multiply Q in A by left singular vectors of R in
1283 * WORK(IU), storing result in U
1284 * (CWorkspace: need N*N)
1285 * (RWorkspace: 0)
1286 *
1287  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1288  $ work( iu ), ldwrku, czero, u, ldu )
1289 *
1290 * Copy right singular vectors of R to A
1291 * (CWorkspace: need N*N)
1292 * (RWorkspace: 0)
1293 *
1294  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1295  $ lda )
1296 *
1297  ELSE
1298 *
1299 * Insufficient workspace for a fast algorithm
1300 *
1301  itau = 1
1302  iwork = itau + n
1303 *
1304 * Compute A=Q*R, copying result to U
1305 * (CWorkspace: need 2*N, prefer N+N*NB)
1306 * (RWorkspace: 0)
1307 *
1308  CALL cgeqrf( m, n, a, lda, work( itau ),
1309  $ work( iwork ), lwork-iwork+1, ierr )
1310  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1311 *
1312 * Generate Q in U
1313 * (CWorkspace: need 2*N, prefer N+N*NB)
1314 * (RWorkspace: 0)
1315 *
1316  CALL cungqr( m, n, n, u, ldu, work( itau ),
1317  $ work( iwork ), lwork-iwork+1, ierr )
1318  ie = 1
1319  itauq = itau
1320  itaup = itauq + n
1321  iwork = itaup + n
1322 *
1323 * Zero out below R in A
1324 *
1325  CALL claset( 'L', n-1, n-1, czero, czero,
1326  $ a( 2, 1 ), lda )
1327 *
1328 * Bidiagonalize R in A
1329 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1330 * (RWorkspace: need N)
1331 *
1332  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1333  $ work( itauq ), work( itaup ),
1334  $ work( iwork ), lwork-iwork+1, ierr )
1335 *
1336 * Multiply Q in U by left vectors bidiagonalizing R
1337 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1338 * (RWorkspace: 0)
1339 *
1340  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1341  $ work( itauq ), u, ldu, work( iwork ),
1342  $ lwork-iwork+1, ierr )
1343 *
1344 * Generate right vectors bidiagonalizing R in A
1345 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1346 * (RWorkspace: 0)
1347 *
1348  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1349  $ work( iwork ), lwork-iwork+1, ierr )
1350  irwork = ie + n
1351 *
1352 * Perform bidiagonal QR iteration, computing left
1353 * singular vectors of A in U and computing right
1354 * singular vectors of A in A
1355 * (CWorkspace: 0)
1356 * (RWorkspace: need BDSPAC)
1357 *
1358  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1359  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1360  $ info )
1361 *
1362  END IF
1363 *
1364  ELSE IF( wntvas ) THEN
1365 *
1366 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1367 * or 'A')
1368 * N left singular vectors to be computed in U and
1369 * N right singular vectors to be computed in VT
1370 *
1371  IF( lwork.GE.n*n+3*n ) THEN
1372 *
1373 * Sufficient workspace for a fast algorithm
1374 *
1375  iu = 1
1376  IF( lwork.GE.wrkbl+lda*n ) THEN
1377 *
1378 * WORK(IU) is LDA by N
1379 *
1380  ldwrku = lda
1381  ELSE
1382 *
1383 * WORK(IU) is N by N
1384 *
1385  ldwrku = n
1386  END IF
1387  itau = iu + ldwrku*n
1388  iwork = itau + n
1389 *
1390 * Compute A=Q*R
1391 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1392 * (RWorkspace: 0)
1393 *
1394  CALL cgeqrf( m, n, a, lda, work( itau ),
1395  $ work( iwork ), lwork-iwork+1, ierr )
1396 *
1397 * Copy R to WORK(IU), zeroing out below it
1398 *
1399  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1400  $ ldwrku )
1401  CALL claset( 'L', n-1, n-1, czero, czero,
1402  $ work( iu+1 ), ldwrku )
1403 *
1404 * Generate Q in A
1405 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1406 * (RWorkspace: 0)
1407 *
1408  CALL cungqr( m, n, n, a, lda, work( itau ),
1409  $ work( iwork ), lwork-iwork+1, ierr )
1410  ie = 1
1411  itauq = itau
1412  itaup = itauq + n
1413  iwork = itaup + n
1414 *
1415 * Bidiagonalize R in WORK(IU), copying result to VT
1416 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1417 * (RWorkspace: need N)
1418 *
1419  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1420  $ rwork( ie ), work( itauq ),
1421  $ work( itaup ), work( iwork ),
1422  $ lwork-iwork+1, ierr )
1423  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1424  $ ldvt )
1425 *
1426 * Generate left bidiagonalizing vectors in WORK(IU)
1427 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1428 * (RWorkspace: 0)
1429 *
1430  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1431  $ work( itauq ), work( iwork ),
1432  $ lwork-iwork+1, ierr )
1433 *
1434 * Generate right bidiagonalizing vectors in VT
1435 * (CWorkspace: need N*N+3*N-1,
1436 * prefer N*N+2*N+(N-1)*NB)
1437 * (RWorkspace: 0)
1438 *
1439  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1440  $ work( iwork ), lwork-iwork+1, ierr )
1441  irwork = ie + n
1442 *
1443 * Perform bidiagonal QR iteration, computing left
1444 * singular vectors of R in WORK(IU) and computing
1445 * right singular vectors of R in VT
1446 * (CWorkspace: need N*N)
1447 * (RWorkspace: need BDSPAC)
1448 *
1449  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1450  $ ldvt, work( iu ), ldwrku, cdum, 1,
1451  $ rwork( irwork ), info )
1452 *
1453 * Multiply Q in A by left singular vectors of R in
1454 * WORK(IU), storing result in U
1455 * (CWorkspace: need N*N)
1456 * (RWorkspace: 0)
1457 *
1458  CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1459  $ work( iu ), ldwrku, czero, u, ldu )
1460 *
1461  ELSE
1462 *
1463 * Insufficient workspace for a fast algorithm
1464 *
1465  itau = 1
1466  iwork = itau + n
1467 *
1468 * Compute A=Q*R, copying result to U
1469 * (CWorkspace: need 2*N, prefer N+N*NB)
1470 * (RWorkspace: 0)
1471 *
1472  CALL cgeqrf( m, n, a, lda, work( itau ),
1473  $ work( iwork ), lwork-iwork+1, ierr )
1474  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1475 *
1476 * Generate Q in U
1477 * (CWorkspace: need 2*N, prefer N+N*NB)
1478 * (RWorkspace: 0)
1479 *
1480  CALL cungqr( m, n, n, u, ldu, work( itau ),
1481  $ work( iwork ), lwork-iwork+1, ierr )
1482 *
1483 * Copy R to VT, zeroing out below it
1484 *
1485  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1486  IF( n.GT.1 )
1487  $ CALL claset( 'L', n-1, n-1, czero, czero,
1488  $ vt( 2, 1 ), ldvt )
1489  ie = 1
1490  itauq = itau
1491  itaup = itauq + n
1492  iwork = itaup + n
1493 *
1494 * Bidiagonalize R in VT
1495 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1496 * (RWorkspace: need N)
1497 *
1498  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1499  $ work( itauq ), work( itaup ),
1500  $ work( iwork ), lwork-iwork+1, ierr )
1501 *
1502 * Multiply Q in U by left bidiagonalizing vectors
1503 * in VT
1504 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1505 * (RWorkspace: 0)
1506 *
1507  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1508  $ work( itauq ), u, ldu, work( iwork ),
1509  $ lwork-iwork+1, ierr )
1510 *
1511 * Generate right bidiagonalizing vectors in VT
1512 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1513 * (RWorkspace: 0)
1514 *
1515  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1516  $ work( iwork ), lwork-iwork+1, ierr )
1517  irwork = ie + n
1518 *
1519 * Perform bidiagonal QR iteration, computing left
1520 * singular vectors of A in U and computing right
1521 * singular vectors of A in VT
1522 * (CWorkspace: 0)
1523 * (RWorkspace: need BDSPAC)
1524 *
1525  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1526  $ ldvt, u, ldu, cdum, 1,
1527  $ rwork( irwork ), info )
1528 *
1529  END IF
1530 *
1531  END IF
1532 *
1533  ELSE IF( wntua ) THEN
1534 *
1535  IF( wntvn ) THEN
1536 *
1537 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1538 * M left singular vectors to be computed in U and
1539 * no right singular vectors to be computed
1540 *
1541  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1542 *
1543 * Sufficient workspace for a fast algorithm
1544 *
1545  ir = 1
1546  IF( lwork.GE.wrkbl+lda*n ) THEN
1547 *
1548 * WORK(IR) is LDA by N
1549 *
1550  ldwrkr = lda
1551  ELSE
1552 *
1553 * WORK(IR) is N by N
1554 *
1555  ldwrkr = n
1556  END IF
1557  itau = ir + ldwrkr*n
1558  iwork = itau + n
1559 *
1560 * Compute A=Q*R, copying result to U
1561 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1562 * (RWorkspace: 0)
1563 *
1564  CALL cgeqrf( m, n, a, lda, work( itau ),
1565  $ work( iwork ), lwork-iwork+1, ierr )
1566  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1567 *
1568 * Copy R to WORK(IR), zeroing out below it
1569 *
1570  CALL clacpy( 'U', n, n, a, lda, work( ir ),
1571  $ ldwrkr )
1572  CALL claset( 'L', n-1, n-1, czero, czero,
1573  $ work( ir+1 ), ldwrkr )
1574 *
1575 * Generate Q in U
1576 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1577 * (RWorkspace: 0)
1578 *
1579  CALL cungqr( m, m, n, u, ldu, work( itau ),
1580  $ work( iwork ), lwork-iwork+1, ierr )
1581  ie = 1
1582  itauq = itau
1583  itaup = itauq + n
1584  iwork = itaup + n
1585 *
1586 * Bidiagonalize R in WORK(IR)
1587 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1588 * (RWorkspace: need N)
1589 *
1590  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1591  $ rwork( ie ), work( itauq ),
1592  $ work( itaup ), work( iwork ),
1593  $ lwork-iwork+1, ierr )
1594 *
1595 * Generate left bidiagonalizing vectors in WORK(IR)
1596 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1597 * (RWorkspace: 0)
1598 *
1599  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1600  $ work( itauq ), work( iwork ),
1601  $ lwork-iwork+1, ierr )
1602  irwork = ie + n
1603 *
1604 * Perform bidiagonal QR iteration, computing left
1605 * singular vectors of R in WORK(IR)
1606 * (CWorkspace: need N*N)
1607 * (RWorkspace: need BDSPAC)
1608 *
1609  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1610  $ 1, work( ir ), ldwrkr, cdum, 1,
1611  $ rwork( irwork ), info )
1612 *
1613 * Multiply Q in U by left singular vectors of R in
1614 * WORK(IR), storing result in A
1615 * (CWorkspace: need N*N)
1616 * (RWorkspace: 0)
1617 *
1618  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1619  $ work( ir ), ldwrkr, czero, a, lda )
1620 *
1621 * Copy left singular vectors of A from A to U
1622 *
1623  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1624 *
1625  ELSE
1626 *
1627 * Insufficient workspace for a fast algorithm
1628 *
1629  itau = 1
1630  iwork = itau + n
1631 *
1632 * Compute A=Q*R, copying result to U
1633 * (CWorkspace: need 2*N, prefer N+N*NB)
1634 * (RWorkspace: 0)
1635 *
1636  CALL cgeqrf( m, n, a, lda, work( itau ),
1637  $ work( iwork ), lwork-iwork+1, ierr )
1638  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1639 *
1640 * Generate Q in U
1641 * (CWorkspace: need N+M, prefer N+M*NB)
1642 * (RWorkspace: 0)
1643 *
1644  CALL cungqr( m, m, n, u, ldu, work( itau ),
1645  $ work( iwork ), lwork-iwork+1, ierr )
1646  ie = 1
1647  itauq = itau
1648  itaup = itauq + n
1649  iwork = itaup + n
1650 *
1651 * Zero out below R in A
1652 *
1653  CALL claset( 'L', n-1, n-1, czero, czero,
1654  $ a( 2, 1 ), lda )
1655 *
1656 * Bidiagonalize R in A
1657 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1658 * (RWorkspace: need N)
1659 *
1660  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1661  $ work( itauq ), work( itaup ),
1662  $ work( iwork ), lwork-iwork+1, ierr )
1663 *
1664 * Multiply Q in U by left bidiagonalizing vectors
1665 * in A
1666 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1667 * (RWorkspace: 0)
1668 *
1669  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1670  $ work( itauq ), u, ldu, work( iwork ),
1671  $ lwork-iwork+1, ierr )
1672  irwork = ie + n
1673 *
1674 * Perform bidiagonal QR iteration, computing left
1675 * singular vectors of A in U
1676 * (CWorkspace: 0)
1677 * (RWorkspace: need BDSPAC)
1678 *
1679  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1680  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1681  $ info )
1682 *
1683  END IF
1684 *
1685  ELSE IF( wntvo ) THEN
1686 *
1687 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1688 * M left singular vectors to be computed in U and
1689 * N right singular vectors to be overwritten on A
1690 *
1691  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1692 *
1693 * Sufficient workspace for a fast algorithm
1694 *
1695  iu = 1
1696  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1697 *
1698 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1699 *
1700  ldwrku = lda
1701  ir = iu + ldwrku*n
1702  ldwrkr = lda
1703  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1704 *
1705 * WORK(IU) is LDA by N and WORK(IR) is N by N
1706 *
1707  ldwrku = lda
1708  ir = iu + ldwrku*n
1709  ldwrkr = n
1710  ELSE
1711 *
1712 * WORK(IU) is N by N and WORK(IR) is N by N
1713 *
1714  ldwrku = n
1715  ir = iu + ldwrku*n
1716  ldwrkr = n
1717  END IF
1718  itau = ir + ldwrkr*n
1719  iwork = itau + n
1720 *
1721 * Compute A=Q*R, copying result to U
1722 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1723 * (RWorkspace: 0)
1724 *
1725  CALL cgeqrf( m, n, a, lda, work( itau ),
1726  $ work( iwork ), lwork-iwork+1, ierr )
1727  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1728 *
1729 * Generate Q in U
1730 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1731 * (RWorkspace: 0)
1732 *
1733  CALL cungqr( m, m, n, u, ldu, work( itau ),
1734  $ work( iwork ), lwork-iwork+1, ierr )
1735 *
1736 * Copy R to WORK(IU), zeroing out below it
1737 *
1738  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1739  $ ldwrku )
1740  CALL claset( 'L', n-1, n-1, czero, czero,
1741  $ work( iu+1 ), ldwrku )
1742  ie = 1
1743  itauq = itau
1744  itaup = itauq + n
1745  iwork = itaup + n
1746 *
1747 * Bidiagonalize R in WORK(IU), copying result to
1748 * WORK(IR)
1749 * (CWorkspace: need 2*N*N+3*N,
1750 * prefer 2*N*N+2*N+2*N*NB)
1751 * (RWorkspace: need N)
1752 *
1753  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1754  $ rwork( ie ), work( itauq ),
1755  $ work( itaup ), work( iwork ),
1756  $ lwork-iwork+1, ierr )
1757  CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1758  $ work( ir ), ldwrkr )
1759 *
1760 * Generate left bidiagonalizing vectors in WORK(IU)
1761 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1762 * (RWorkspace: 0)
1763 *
1764  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1765  $ work( itauq ), work( iwork ),
1766  $ lwork-iwork+1, ierr )
1767 *
1768 * Generate right bidiagonalizing vectors in WORK(IR)
1769 * (CWorkspace: need 2*N*N+3*N-1,
1770 * prefer 2*N*N+2*N+(N-1)*NB)
1771 * (RWorkspace: 0)
1772 *
1773  CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1774  $ work( itaup ), work( iwork ),
1775  $ lwork-iwork+1, ierr )
1776  irwork = ie + n
1777 *
1778 * Perform bidiagonal QR iteration, computing left
1779 * singular vectors of R in WORK(IU) and computing
1780 * right singular vectors of R in WORK(IR)
1781 * (CWorkspace: need 2*N*N)
1782 * (RWorkspace: need BDSPAC)
1783 *
1784  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1785  $ work( ir ), ldwrkr, work( iu ),
1786  $ ldwrku, cdum, 1, rwork( irwork ),
1787  $ info )
1788 *
1789 * Multiply Q in U by left singular vectors of R in
1790 * WORK(IU), storing result in A
1791 * (CWorkspace: need N*N)
1792 * (RWorkspace: 0)
1793 *
1794  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1795  $ work( iu ), ldwrku, czero, a, lda )
1796 *
1797 * Copy left singular vectors of A from A to U
1798 *
1799  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1800 *
1801 * Copy right singular vectors of R from WORK(IR) to A
1802 *
1803  CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1804  $ lda )
1805 *
1806  ELSE
1807 *
1808 * Insufficient workspace for a fast algorithm
1809 *
1810  itau = 1
1811  iwork = itau + n
1812 *
1813 * Compute A=Q*R, copying result to U
1814 * (CWorkspace: need 2*N, prefer N+N*NB)
1815 * (RWorkspace: 0)
1816 *
1817  CALL cgeqrf( m, n, a, lda, work( itau ),
1818  $ work( iwork ), lwork-iwork+1, ierr )
1819  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1820 *
1821 * Generate Q in U
1822 * (CWorkspace: need N+M, prefer N+M*NB)
1823 * (RWorkspace: 0)
1824 *
1825  CALL cungqr( m, m, n, u, ldu, work( itau ),
1826  $ work( iwork ), lwork-iwork+1, ierr )
1827  ie = 1
1828  itauq = itau
1829  itaup = itauq + n
1830  iwork = itaup + n
1831 *
1832 * Zero out below R in A
1833 *
1834  CALL claset( 'L', n-1, n-1, czero, czero,
1835  $ a( 2, 1 ), lda )
1836 *
1837 * Bidiagonalize R in A
1838 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1839 * (RWorkspace: need N)
1840 *
1841  CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1842  $ work( itauq ), work( itaup ),
1843  $ work( iwork ), lwork-iwork+1, ierr )
1844 *
1845 * Multiply Q in U by left bidiagonalizing vectors
1846 * in A
1847 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1848 * (RWorkspace: 0)
1849 *
1850  CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1851  $ work( itauq ), u, ldu, work( iwork ),
1852  $ lwork-iwork+1, ierr )
1853 *
1854 * Generate right bidiagonalizing vectors in A
1855 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1856 * (RWorkspace: 0)
1857 *
1858  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1859  $ work( iwork ), lwork-iwork+1, ierr )
1860  irwork = ie + n
1861 *
1862 * Perform bidiagonal QR iteration, computing left
1863 * singular vectors of A in U and computing right
1864 * singular vectors of A in A
1865 * (CWorkspace: 0)
1866 * (RWorkspace: need BDSPAC)
1867 *
1868  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1869  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1870  $ info )
1871 *
1872  END IF
1873 *
1874  ELSE IF( wntvas ) THEN
1875 *
1876 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1877 * or 'A')
1878 * M left singular vectors to be computed in U and
1879 * N right singular vectors to be computed in VT
1880 *
1881  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1882 *
1883 * Sufficient workspace for a fast algorithm
1884 *
1885  iu = 1
1886  IF( lwork.GE.wrkbl+lda*n ) THEN
1887 *
1888 * WORK(IU) is LDA by N
1889 *
1890  ldwrku = lda
1891  ELSE
1892 *
1893 * WORK(IU) is N by N
1894 *
1895  ldwrku = n
1896  END IF
1897  itau = iu + ldwrku*n
1898  iwork = itau + n
1899 *
1900 * Compute A=Q*R, copying result to U
1901 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1902 * (RWorkspace: 0)
1903 *
1904  CALL cgeqrf( m, n, a, lda, work( itau ),
1905  $ work( iwork ), lwork-iwork+1, ierr )
1906  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1907 *
1908 * Generate Q in U
1909 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1910 * (RWorkspace: 0)
1911 *
1912  CALL cungqr( m, m, n, u, ldu, work( itau ),
1913  $ work( iwork ), lwork-iwork+1, ierr )
1914 *
1915 * Copy R to WORK(IU), zeroing out below it
1916 *
1917  CALL clacpy( 'U', n, n, a, lda, work( iu ),
1918  $ ldwrku )
1919  CALL claset( 'L', n-1, n-1, czero, czero,
1920  $ work( iu+1 ), ldwrku )
1921  ie = 1
1922  itauq = itau
1923  itaup = itauq + n
1924  iwork = itaup + n
1925 *
1926 * Bidiagonalize R in WORK(IU), copying result to VT
1927 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1928 * (RWorkspace: need N)
1929 *
1930  CALL cgebrd( n, n, work( iu ), ldwrku, s,
1931  $ rwork( ie ), work( itauq ),
1932  $ work( itaup ), work( iwork ),
1933  $ lwork-iwork+1, ierr )
1934  CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1935  $ ldvt )
1936 *
1937 * Generate left bidiagonalizing vectors in WORK(IU)
1938 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1939 * (RWorkspace: 0)
1940 *
1941  CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1942  $ work( itauq ), work( iwork ),
1943  $ lwork-iwork+1, ierr )
1944 *
1945 * Generate right bidiagonalizing vectors in VT
1946 * (CWorkspace: need N*N+3*N-1,
1947 * prefer N*N+2*N+(N-1)*NB)
1948 * (RWorkspace: need 0)
1949 *
1950  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1951  $ work( iwork ), lwork-iwork+1, ierr )
1952  irwork = ie + n
1953 *
1954 * Perform bidiagonal QR iteration, computing left
1955 * singular vectors of R in WORK(IU) and computing
1956 * right singular vectors of R in VT
1957 * (CWorkspace: need N*N)
1958 * (RWorkspace: need BDSPAC)
1959 *
1960  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1961  $ ldvt, work( iu ), ldwrku, cdum, 1,
1962  $ rwork( irwork ), info )
1963 *
1964 * Multiply Q in U by left singular vectors of R in
1965 * WORK(IU), storing result in A
1966 * (CWorkspace: need N*N)
1967 * (RWorkspace: 0)
1968 *
1969  CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1970  $ work( iu ), ldwrku, czero, a, lda )
1971 *
1972 * Copy left singular vectors of A from A to U
1973 *
1974  CALL clacpy( 'F', m, n, a, lda, u, ldu )
1975 *
1976  ELSE
1977 *
1978 * Insufficient workspace for a fast algorithm
1979 *
1980  itau = 1
1981  iwork = itau + n
1982 *
1983 * Compute A=Q*R, copying result to U
1984 * (CWorkspace: need 2*N, prefer N+N*NB)
1985 * (RWorkspace: 0)
1986 *
1987  CALL cgeqrf( m, n, a, lda, work( itau ),
1988  $ work( iwork ), lwork-iwork+1, ierr )
1989  CALL clacpy( 'L', m, n, a, lda, u, ldu )
1990 *
1991 * Generate Q in U
1992 * (CWorkspace: need N+M, prefer N+M*NB)
1993 * (RWorkspace: 0)
1994 *
1995  CALL cungqr( m, m, n, u, ldu, work( itau ),
1996  $ work( iwork ), lwork-iwork+1, ierr )
1997 *
1998 * Copy R from A to VT, zeroing out below it
1999 *
2000  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2001  IF( n.GT.1 )
2002  $ CALL claset( 'L', n-1, n-1, czero, czero,
2003  $ vt( 2, 1 ), ldvt )
2004  ie = 1
2005  itauq = itau
2006  itaup = itauq + n
2007  iwork = itaup + n
2008 *
2009 * Bidiagonalize R in VT
2010 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2011 * (RWorkspace: need N)
2012 *
2013  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
2014  $ work( itauq ), work( itaup ),
2015  $ work( iwork ), lwork-iwork+1, ierr )
2016 *
2017 * Multiply Q in U by left bidiagonalizing vectors
2018 * in VT
2019 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2020 * (RWorkspace: 0)
2021 *
2022  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2023  $ work( itauq ), u, ldu, work( iwork ),
2024  $ lwork-iwork+1, ierr )
2025 *
2026 * Generate right bidiagonalizing vectors in VT
2027 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2028 * (RWorkspace: 0)
2029 *
2030  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2031  $ work( iwork ), lwork-iwork+1, ierr )
2032  irwork = ie + n
2033 *
2034 * Perform bidiagonal QR iteration, computing left
2035 * singular vectors of A in U and computing right
2036 * singular vectors of A in VT
2037 * (CWorkspace: 0)
2038 * (RWorkspace: need BDSPAC)
2039 *
2040  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2041  $ ldvt, u, ldu, cdum, 1,
2042  $ rwork( irwork ), info )
2043 *
2044  END IF
2045 *
2046  END IF
2047 *
2048  END IF
2049 *
2050  ELSE
2051 *
2052 * M .LT. MNTHR
2053 *
2054 * Path 10 (M at least N, but not much larger)
2055 * Reduce to bidiagonal form without QR decomposition
2056 *
2057  ie = 1
2058  itauq = 1
2059  itaup = itauq + n
2060  iwork = itaup + n
2061 *
2062 * Bidiagonalize A
2063 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2064 * (RWorkspace: need N)
2065 *
2066  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2067  $ work( itaup ), work( iwork ), lwork-iwork+1,
2068  $ ierr )
2069  IF( wntuas ) THEN
2070 *
2071 * If left singular vectors desired in U, copy result to U
2072 * and generate left bidiagonalizing vectors in U
2073 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2074 * (RWorkspace: 0)
2075 *
2076  CALL clacpy( 'L', m, n, a, lda, u, ldu )
2077  IF( wntus )
2078  $ ncu = n
2079  IF( wntua )
2080  $ ncu = m
2081  CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2082  $ work( iwork ), lwork-iwork+1, ierr )
2083  END IF
2084  IF( wntvas ) THEN
2085 *
2086 * If right singular vectors desired in VT, copy result to
2087 * VT and generate right bidiagonalizing vectors in VT
2088 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2089 * (RWorkspace: 0)
2090 *
2091  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2092  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2093  $ work( iwork ), lwork-iwork+1, ierr )
2094  END IF
2095  IF( wntuo ) THEN
2096 *
2097 * If left singular vectors desired in A, generate left
2098 * bidiagonalizing vectors in A
2099 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2100 * (RWorkspace: 0)
2101 *
2102  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
2103  $ work( iwork ), lwork-iwork+1, ierr )
2104  END IF
2105  IF( wntvo ) THEN
2106 *
2107 * If right singular vectors desired in A, generate right
2108 * bidiagonalizing vectors in A
2109 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2110 * (RWorkspace: 0)
2111 *
2112  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
2113  $ work( iwork ), lwork-iwork+1, ierr )
2114  END IF
2115  irwork = ie + n
2116  IF( wntuas .OR. wntuo )
2117  $ nru = m
2118  IF( wntun )
2119  $ nru = 0
2120  IF( wntvas .OR. wntvo )
2121  $ ncvt = n
2122  IF( wntvn )
2123  $ ncvt = 0
2124  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2125 *
2126 * Perform bidiagonal QR iteration, if desired, computing
2127 * left singular vectors in U and computing right singular
2128 * vectors in VT
2129 * (CWorkspace: 0)
2130 * (RWorkspace: need BDSPAC)
2131 *
2132  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2133  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2134  $ info )
2135  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2136 *
2137 * Perform bidiagonal QR iteration, if desired, computing
2138 * left singular vectors in U and computing right singular
2139 * vectors in A
2140 * (CWorkspace: 0)
2141 * (RWorkspace: need BDSPAC)
2142 *
2143  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2144  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2145  $ info )
2146  ELSE
2147 *
2148 * Perform bidiagonal QR iteration, if desired, computing
2149 * left singular vectors in A and computing right singular
2150 * vectors in VT
2151 * (CWorkspace: 0)
2152 * (RWorkspace: need BDSPAC)
2153 *
2154  CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2155  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2156  $ info )
2157  END IF
2158 *
2159  END IF
2160 *
2161  ELSE
2162 *
2163 * A has more columns than rows. If A has sufficiently more
2164 * columns than rows, first reduce using the LQ decomposition (if
2165 * sufficient workspace available)
2166 *
2167  IF( n.GE.mnthr ) THEN
2168 *
2169  IF( wntvn ) THEN
2170 *
2171 * Path 1t(N much larger than M, JOBVT='N')
2172 * No right singular vectors to be computed
2173 *
2174  itau = 1
2175  iwork = itau + m
2176 *
2177 * Compute A=L*Q
2178 * (CWorkspace: need 2*M, prefer M+M*NB)
2179 * (RWorkspace: 0)
2180 *
2181  CALL cgelqf( m, n, a, lda, work( itau ), work( iwork ),
2182  $ lwork-iwork+1, ierr )
2183 *
2184 * Zero out above L
2185 *
2186  CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2187  $ lda )
2188  ie = 1
2189  itauq = 1
2190  itaup = itauq + m
2191  iwork = itaup + m
2192 *
2193 * Bidiagonalize L in A
2194 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2195 * (RWorkspace: need M)
2196 *
2197  CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2198  $ work( itaup ), work( iwork ), lwork-iwork+1,
2199  $ ierr )
2200  IF( wntuo .OR. wntuas ) THEN
2201 *
2202 * If left singular vectors desired, generate Q
2203 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2204 * (RWorkspace: 0)
2205 *
2206  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2207  $ work( iwork ), lwork-iwork+1, ierr )
2208  END IF
2209  irwork = ie + m
2210  nru = 0
2211  IF( wntuo .OR. wntuas )
2212  $ nru = m
2213 *
2214 * Perform bidiagonal QR iteration, computing left singular
2215 * vectors of A in A if desired
2216 * (CWorkspace: 0)
2217 * (RWorkspace: need BDSPAC)
2218 *
2219  CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2220  $ a, lda, cdum, 1, rwork( irwork ), info )
2221 *
2222 * If left singular vectors desired in U, copy them there
2223 *
2224  IF( wntuas )
2225  $ CALL clacpy( 'F', m, m, a, lda, u, ldu )
2226 *
2227  ELSE IF( wntvo .AND. wntun ) THEN
2228 *
2229 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2230 * M right singular vectors to be overwritten on A and
2231 * no left singular vectors to be computed
2232 *
2233  IF( lwork.GE.m*m+3*m ) THEN
2234 *
2235 * Sufficient workspace for a fast algorithm
2236 *
2237  ir = 1
2238  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2239 *
2240 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2241 *
2242  ldwrku = lda
2243  chunk = n
2244  ldwrkr = lda
2245  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2246 *
2247 * WORK(IU) is LDA by N and WORK(IR) is M by M
2248 *
2249  ldwrku = lda
2250  chunk = n
2251  ldwrkr = m
2252  ELSE
2253 *
2254 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2255 *
2256  ldwrku = m
2257  chunk = ( lwork-m*m ) / m
2258  ldwrkr = m
2259  END IF
2260  itau = ir + ldwrkr*m
2261  iwork = itau + m
2262 *
2263 * Compute A=L*Q
2264 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2265 * (RWorkspace: 0)
2266 *
2267  CALL cgelqf( m, n, a, lda, work( itau ),
2268  $ work( iwork ), lwork-iwork+1, ierr )
2269 *
2270 * Copy L to WORK(IR) and zero out above it
2271 *
2272  CALL clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2273  CALL claset( 'U', m-1, m-1, czero, czero,
2274  $ work( ir+ldwrkr ), ldwrkr )
2275 *
2276 * Generate Q in A
2277 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2278 * (RWorkspace: 0)
2279 *
2280  CALL cunglq( m, n, m, a, lda, work( itau ),
2281  $ work( iwork ), lwork-iwork+1, ierr )
2282  ie = 1
2283  itauq = itau
2284  itaup = itauq + m
2285  iwork = itaup + m
2286 *
2287 * Bidiagonalize L in WORK(IR)
2288 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2289 * (RWorkspace: need M)
2290 *
2291  CALL cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2292  $ work( itauq ), work( itaup ),
2293  $ work( iwork ), lwork-iwork+1, ierr )
2294 *
2295 * Generate right vectors bidiagonalizing L
2296 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2297 * (RWorkspace: 0)
2298 *
2299  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2300  $ work( itaup ), work( iwork ),
2301  $ lwork-iwork+1, ierr )
2302  irwork = ie + m
2303 *
2304 * Perform bidiagonal QR iteration, computing right
2305 * singular vectors of L in WORK(IR)
2306 * (CWorkspace: need M*M)
2307 * (RWorkspace: need BDSPAC)
2308 *
2309  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2310  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2311  $ rwork( irwork ), info )
2312  iu = itauq
2313 *
2314 * Multiply right singular vectors of L in WORK(IR) by Q
2315 * in A, storing result in WORK(IU) and copying to A
2316 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2317 * (RWorkspace: 0)
2318 *
2319  DO 30 i = 1, n, chunk
2320  blk = min( n-i+1, chunk )
2321  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2322  $ ldwrkr, a( 1, i ), lda, czero,
2323  $ work( iu ), ldwrku )
2324  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2325  $ a( 1, i ), lda )
2326  30 CONTINUE
2327 *
2328  ELSE
2329 *
2330 * Insufficient workspace for a fast algorithm
2331 *
2332  ie = 1
2333  itauq = 1
2334  itaup = itauq + m
2335  iwork = itaup + m
2336 *
2337 * Bidiagonalize A
2338 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2339 * (RWorkspace: need M)
2340 *
2341  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
2342  $ work( itauq ), work( itaup ),
2343  $ work( iwork ), lwork-iwork+1, ierr )
2344 *
2345 * Generate right vectors bidiagonalizing A
2346 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2347 * (RWorkspace: 0)
2348 *
2349  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
2350  $ work( iwork ), lwork-iwork+1, ierr )
2351  irwork = ie + m
2352 *
2353 * Perform bidiagonal QR iteration, computing right
2354 * singular vectors of A in A
2355 * (CWorkspace: 0)
2356 * (RWorkspace: need BDSPAC)
2357 *
2358  CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2359  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2360 *
2361  END IF
2362 *
2363  ELSE IF( wntvo .AND. wntuas ) THEN
2364 *
2365 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2366 * M right singular vectors to be overwritten on A and
2367 * M left singular vectors to be computed in U
2368 *
2369  IF( lwork.GE.m*m+3*m ) THEN
2370 *
2371 * Sufficient workspace for a fast algorithm
2372 *
2373  ir = 1
2374  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2375 *
2376 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2377 *
2378  ldwrku = lda
2379  chunk = n
2380  ldwrkr = lda
2381  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2382 *
2383 * WORK(IU) is LDA by N and WORK(IR) is M by M
2384 *
2385  ldwrku = lda
2386  chunk = n
2387  ldwrkr = m
2388  ELSE
2389 *
2390 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2391 *
2392  ldwrku = m
2393  chunk = ( lwork-m*m ) / m
2394  ldwrkr = m
2395  END IF
2396  itau = ir + ldwrkr*m
2397  iwork = itau + m
2398 *
2399 * Compute A=L*Q
2400 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2401 * (RWorkspace: 0)
2402 *
2403  CALL cgelqf( m, n, a, lda, work( itau ),
2404  $ work( iwork ), lwork-iwork+1, ierr )
2405 *
2406 * Copy L to U, zeroing about above it
2407 *
2408  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2409  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2410  $ ldu )
2411 *
2412 * Generate Q in A
2413 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2414 * (RWorkspace: 0)
2415 *
2416  CALL cunglq( m, n, m, a, lda, work( itau ),
2417  $ work( iwork ), lwork-iwork+1, ierr )
2418  ie = 1
2419  itauq = itau
2420  itaup = itauq + m
2421  iwork = itaup + m
2422 *
2423 * Bidiagonalize L in U, copying result to WORK(IR)
2424 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2425 * (RWorkspace: need M)
2426 *
2427  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2428  $ work( itauq ), work( itaup ),
2429  $ work( iwork ), lwork-iwork+1, ierr )
2430  CALL clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2431 *
2432 * Generate right vectors bidiagonalizing L in WORK(IR)
2433 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2434 * (RWorkspace: 0)
2435 *
2436  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2437  $ work( itaup ), work( iwork ),
2438  $ lwork-iwork+1, ierr )
2439 *
2440 * Generate left vectors bidiagonalizing L in U
2441 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2442 * (RWorkspace: 0)
2443 *
2444  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2445  $ work( iwork ), lwork-iwork+1, ierr )
2446  irwork = ie + m
2447 *
2448 * Perform bidiagonal QR iteration, computing left
2449 * singular vectors of L in U, and computing right
2450 * singular vectors of L in WORK(IR)
2451 * (CWorkspace: need M*M)
2452 * (RWorkspace: need BDSPAC)
2453 *
2454  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2455  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2456  $ rwork( irwork ), info )
2457  iu = itauq
2458 *
2459 * Multiply right singular vectors of L in WORK(IR) by Q
2460 * in A, storing result in WORK(IU) and copying to A
2461 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2462 * (RWorkspace: 0)
2463 *
2464  DO 40 i = 1, n, chunk
2465  blk = min( n-i+1, chunk )
2466  CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2467  $ ldwrkr, a( 1, i ), lda, czero,
2468  $ work( iu ), ldwrku )
2469  CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2470  $ a( 1, i ), lda )
2471  40 CONTINUE
2472 *
2473  ELSE
2474 *
2475 * Insufficient workspace for a fast algorithm
2476 *
2477  itau = 1
2478  iwork = itau + m
2479 *
2480 * Compute A=L*Q
2481 * (CWorkspace: need 2*M, prefer M+M*NB)
2482 * (RWorkspace: 0)
2483 *
2484  CALL cgelqf( m, n, a, lda, work( itau ),
2485  $ work( iwork ), lwork-iwork+1, ierr )
2486 *
2487 * Copy L to U, zeroing out above it
2488 *
2489  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2490  CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2491  $ ldu )
2492 *
2493 * Generate Q in A
2494 * (CWorkspace: need 2*M, prefer M+M*NB)
2495 * (RWorkspace: 0)
2496 *
2497  CALL cunglq( m, n, m, a, lda, work( itau ),
2498  $ work( iwork ), lwork-iwork+1, ierr )
2499  ie = 1
2500  itauq = itau
2501  itaup = itauq + m
2502  iwork = itaup + m
2503 *
2504 * Bidiagonalize L in U
2505 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2506 * (RWorkspace: need M)
2507 *
2508  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2509  $ work( itauq ), work( itaup ),
2510  $ work( iwork ), lwork-iwork+1, ierr )
2511 *
2512 * Multiply right vectors bidiagonalizing L by Q in A
2513 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2514 * (RWorkspace: 0)
2515 *
2516  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2517  $ work( itaup ), a, lda, work( iwork ),
2518  $ lwork-iwork+1, ierr )
2519 *
2520 * Generate left vectors bidiagonalizing L in U
2521 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2522 * (RWorkspace: 0)
2523 *
2524  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2525  $ work( iwork ), lwork-iwork+1, ierr )
2526  irwork = ie + m
2527 *
2528 * Perform bidiagonal QR iteration, computing left
2529 * singular vectors of A in U and computing right
2530 * singular vectors of A in A
2531 * (CWorkspace: 0)
2532 * (RWorkspace: need BDSPAC)
2533 *
2534  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2535  $ u, ldu, cdum, 1, rwork( irwork ), info )
2536 *
2537  END IF
2538 *
2539  ELSE IF( wntvs ) THEN
2540 *
2541  IF( wntun ) THEN
2542 *
2543 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2544 * M right singular vectors to be computed in VT and
2545 * no left singular vectors to be computed
2546 *
2547  IF( lwork.GE.m*m+3*m ) THEN
2548 *
2549 * Sufficient workspace for a fast algorithm
2550 *
2551  ir = 1
2552  IF( lwork.GE.wrkbl+lda*m ) THEN
2553 *
2554 * WORK(IR) is LDA by M
2555 *
2556  ldwrkr = lda
2557  ELSE
2558 *
2559 * WORK(IR) is M by M
2560 *
2561  ldwrkr = m
2562  END IF
2563  itau = ir + ldwrkr*m
2564  iwork = itau + m
2565 *
2566 * Compute A=L*Q
2567 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2568 * (RWorkspace: 0)
2569 *
2570  CALL cgelqf( m, n, a, lda, work( itau ),
2571  $ work( iwork ), lwork-iwork+1, ierr )
2572 *
2573 * Copy L to WORK(IR), zeroing out above it
2574 *
2575  CALL clacpy( 'L', m, m, a, lda, work( ir ),
2576  $ ldwrkr )
2577  CALL claset( 'U', m-1, m-1, czero, czero,
2578  $ work( ir+ldwrkr ), ldwrkr )
2579 *
2580 * Generate Q in A
2581 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2582 * (RWorkspace: 0)
2583 *
2584  CALL cunglq( m, n, m, a, lda, work( itau ),
2585  $ work( iwork ), lwork-iwork+1, ierr )
2586  ie = 1
2587  itauq = itau
2588  itaup = itauq + m
2589  iwork = itaup + m
2590 *
2591 * Bidiagonalize L in WORK(IR)
2592 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2593 * (RWorkspace: need M)
2594 *
2595  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
2596  $ rwork( ie ), work( itauq ),
2597  $ work( itaup ), work( iwork ),
2598  $ lwork-iwork+1, ierr )
2599 *
2600 * Generate right vectors bidiagonalizing L in
2601 * WORK(IR)
2602 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2603 * (RWorkspace: 0)
2604 *
2605  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2606  $ work( itaup ), work( iwork ),
2607  $ lwork-iwork+1, ierr )
2608  irwork = ie + m
2609 *
2610 * Perform bidiagonal QR iteration, computing right
2611 * singular vectors of L in WORK(IR)
2612 * (CWorkspace: need M*M)
2613 * (RWorkspace: need BDSPAC)
2614 *
2615  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2616  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2617  $ rwork( irwork ), info )
2618 *
2619 * Multiply right singular vectors of L in WORK(IR) by
2620 * Q in A, storing result in VT
2621 * (CWorkspace: need M*M)
2622 * (RWorkspace: 0)
2623 *
2624  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
2625  $ ldwrkr, a, lda, czero, vt, ldvt )
2626 *
2627  ELSE
2628 *
2629 * Insufficient workspace for a fast algorithm
2630 *
2631  itau = 1
2632  iwork = itau + m
2633 *
2634 * Compute A=L*Q
2635 * (CWorkspace: need 2*M, prefer M+M*NB)
2636 * (RWorkspace: 0)
2637 *
2638  CALL cgelqf( m, n, a, lda, work( itau ),
2639  $ work( iwork ), lwork-iwork+1, ierr )
2640 *
2641 * Copy result to VT
2642 *
2643  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2644 *
2645 * Generate Q in VT
2646 * (CWorkspace: need 2*M, prefer M+M*NB)
2647 * (RWorkspace: 0)
2648 *
2649  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2650  $ work( iwork ), lwork-iwork+1, ierr )
2651  ie = 1
2652  itauq = itau
2653  itaup = itauq + m
2654  iwork = itaup + m
2655 *
2656 * Zero out above L in A
2657 *
2658  CALL claset( 'U', m-1, m-1, czero, czero,
2659  $ a( 1, 2 ), lda )
2660 *
2661 * Bidiagonalize L in A
2662 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2663 * (RWorkspace: need M)
2664 *
2665  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2666  $ work( itauq ), work( itaup ),
2667  $ work( iwork ), lwork-iwork+1, ierr )
2668 *
2669 * Multiply right vectors bidiagonalizing L by Q in VT
2670 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2671 * (RWorkspace: 0)
2672 *
2673  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2674  $ work( itaup ), vt, ldvt,
2675  $ work( iwork ), lwork-iwork+1, ierr )
2676  irwork = ie + m
2677 *
2678 * Perform bidiagonal QR iteration, computing right
2679 * singular vectors of A in VT
2680 * (CWorkspace: 0)
2681 * (RWorkspace: need BDSPAC)
2682 *
2683  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2684  $ ldvt, cdum, 1, cdum, 1,
2685  $ rwork( irwork ), info )
2686 *
2687  END IF
2688 *
2689  ELSE IF( wntuo ) THEN
2690 *
2691 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2692 * M right singular vectors to be computed in VT and
2693 * M left singular vectors to be overwritten on A
2694 *
2695  IF( lwork.GE.2*m*m+3*m ) THEN
2696 *
2697 * Sufficient workspace for a fast algorithm
2698 *
2699  iu = 1
2700  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2701 *
2702 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2703 *
2704  ldwrku = lda
2705  ir = iu + ldwrku*m
2706  ldwrkr = lda
2707  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2708 *
2709 * WORK(IU) is LDA by M and WORK(IR) is M by M
2710 *
2711  ldwrku = lda
2712  ir = iu + ldwrku*m
2713  ldwrkr = m
2714  ELSE
2715 *
2716 * WORK(IU) is M by M and WORK(IR) is M by M
2717 *
2718  ldwrku = m
2719  ir = iu + ldwrku*m
2720  ldwrkr = m
2721  END IF
2722  itau = ir + ldwrkr*m
2723  iwork = itau + m
2724 *
2725 * Compute A=L*Q
2726 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2727 * (RWorkspace: 0)
2728 *
2729  CALL cgelqf( m, n, a, lda, work( itau ),
2730  $ work( iwork ), lwork-iwork+1, ierr )
2731 *
2732 * Copy L to WORK(IU), zeroing out below it
2733 *
2734  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2735  $ ldwrku )
2736  CALL claset( 'U', m-1, m-1, czero, czero,
2737  $ work( iu+ldwrku ), ldwrku )
2738 *
2739 * Generate Q in A
2740 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2741 * (RWorkspace: 0)
2742 *
2743  CALL cunglq( m, n, m, a, lda, work( itau ),
2744  $ work( iwork ), lwork-iwork+1, ierr )
2745  ie = 1
2746  itauq = itau
2747  itaup = itauq + m
2748  iwork = itaup + m
2749 *
2750 * Bidiagonalize L in WORK(IU), copying result to
2751 * WORK(IR)
2752 * (CWorkspace: need 2*M*M+3*M,
2753 * prefer 2*M*M+2*M+2*M*NB)
2754 * (RWorkspace: need M)
2755 *
2756  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2757  $ rwork( ie ), work( itauq ),
2758  $ work( itaup ), work( iwork ),
2759  $ lwork-iwork+1, ierr )
2760  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
2761  $ work( ir ), ldwrkr )
2762 *
2763 * Generate right bidiagonalizing vectors in WORK(IU)
2764 * (CWorkspace: need 2*M*M+3*M-1,
2765 * prefer 2*M*M+2*M+(M-1)*NB)
2766 * (RWorkspace: 0)
2767 *
2768  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2769  $ work( itaup ), work( iwork ),
2770  $ lwork-iwork+1, ierr )
2771 *
2772 * Generate left bidiagonalizing vectors in WORK(IR)
2773 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2774 * (RWorkspace: 0)
2775 *
2776  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2777  $ work( itauq ), work( iwork ),
2778  $ lwork-iwork+1, ierr )
2779  irwork = ie + m
2780 *
2781 * Perform bidiagonal QR iteration, computing left
2782 * singular vectors of L in WORK(IR) and computing
2783 * right singular vectors of L in WORK(IU)
2784 * (CWorkspace: need 2*M*M)
2785 * (RWorkspace: need BDSPAC)
2786 *
2787  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2788  $ work( iu ), ldwrku, work( ir ),
2789  $ ldwrkr, cdum, 1, rwork( irwork ),
2790  $ info )
2791 *
2792 * Multiply right singular vectors of L in WORK(IU) by
2793 * Q in A, storing result in VT
2794 * (CWorkspace: need M*M)
2795 * (RWorkspace: 0)
2796 *
2797  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2798  $ ldwrku, a, lda, czero, vt, ldvt )
2799 *
2800 * Copy left singular vectors of L to A
2801 * (CWorkspace: need M*M)
2802 * (RWorkspace: 0)
2803 *
2804  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
2805  $ lda )
2806 *
2807  ELSE
2808 *
2809 * Insufficient workspace for a fast algorithm
2810 *
2811  itau = 1
2812  iwork = itau + m
2813 *
2814 * Compute A=L*Q, copying result to VT
2815 * (CWorkspace: need 2*M, prefer M+M*NB)
2816 * (RWorkspace: 0)
2817 *
2818  CALL cgelqf( m, n, a, lda, work( itau ),
2819  $ work( iwork ), lwork-iwork+1, ierr )
2820  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2821 *
2822 * Generate Q in VT
2823 * (CWorkspace: need 2*M, prefer M+M*NB)
2824 * (RWorkspace: 0)
2825 *
2826  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2827  $ work( iwork ), lwork-iwork+1, ierr )
2828  ie = 1
2829  itauq = itau
2830  itaup = itauq + m
2831  iwork = itaup + m
2832 *
2833 * Zero out above L in A
2834 *
2835  CALL claset( 'U', m-1, m-1, czero, czero,
2836  $ a( 1, 2 ), lda )
2837 *
2838 * Bidiagonalize L in A
2839 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2840 * (RWorkspace: need M)
2841 *
2842  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2843  $ work( itauq ), work( itaup ),
2844  $ work( iwork ), lwork-iwork+1, ierr )
2845 *
2846 * Multiply right vectors bidiagonalizing L by Q in VT
2847 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2848 * (RWorkspace: 0)
2849 *
2850  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2851  $ work( itaup ), vt, ldvt,
2852  $ work( iwork ), lwork-iwork+1, ierr )
2853 *
2854 * Generate left bidiagonalizing vectors of L in A
2855 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2856 * (RWorkspace: 0)
2857 *
2858  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2859  $ work( iwork ), lwork-iwork+1, ierr )
2860  irwork = ie + m
2861 *
2862 * Perform bidiagonal QR iteration, computing left
2863 * singular vectors of A in A and computing right
2864 * singular vectors of A in VT
2865 * (CWorkspace: 0)
2866 * (RWorkspace: need BDSPAC)
2867 *
2868  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2869  $ ldvt, a, lda, cdum, 1,
2870  $ rwork( irwork ), info )
2871 *
2872  END IF
2873 *
2874  ELSE IF( wntuas ) THEN
2875 *
2876 * Path 6t(N much larger than M, JOBU='S' or 'A',
2877 * JOBVT='S')
2878 * M right singular vectors to be computed in VT and
2879 * M left singular vectors to be computed in U
2880 *
2881  IF( lwork.GE.m*m+3*m ) THEN
2882 *
2883 * Sufficient workspace for a fast algorithm
2884 *
2885  iu = 1
2886  IF( lwork.GE.wrkbl+lda*m ) THEN
2887 *
2888 * WORK(IU) is LDA by N
2889 *
2890  ldwrku = lda
2891  ELSE
2892 *
2893 * WORK(IU) is LDA by M
2894 *
2895  ldwrku = m
2896  END IF
2897  itau = iu + ldwrku*m
2898  iwork = itau + m
2899 *
2900 * Compute A=L*Q
2901 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2902 * (RWorkspace: 0)
2903 *
2904  CALL cgelqf( m, n, a, lda, work( itau ),
2905  $ work( iwork ), lwork-iwork+1, ierr )
2906 *
2907 * Copy L to WORK(IU), zeroing out above it
2908 *
2909  CALL clacpy( 'L', m, m, a, lda, work( iu ),
2910  $ ldwrku )
2911  CALL claset( 'U', m-1, m-1, czero, czero,
2912  $ work( iu+ldwrku ), ldwrku )
2913 *
2914 * Generate Q in A
2915 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2916 * (RWorkspace: 0)
2917 *
2918  CALL cunglq( m, n, m, a, lda, work( itau ),
2919  $ work( iwork ), lwork-iwork+1, ierr )
2920  ie = 1
2921  itauq = itau
2922  itaup = itauq + m
2923  iwork = itaup + m
2924 *
2925 * Bidiagonalize L in WORK(IU), copying result to U
2926 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2927 * (RWorkspace: need M)
2928 *
2929  CALL cgebrd( m, m, work( iu ), ldwrku, s,
2930  $ rwork( ie ), work( itauq ),
2931  $ work( itaup ), work( iwork ),
2932  $ lwork-iwork+1, ierr )
2933  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
2934  $ ldu )
2935 *
2936 * Generate right bidiagonalizing vectors in WORK(IU)
2937 * (CWorkspace: need M*M+3*M-1,
2938 * prefer M*M+2*M+(M-1)*NB)
2939 * (RWorkspace: 0)
2940 *
2941  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2942  $ work( itaup ), work( iwork ),
2943  $ lwork-iwork+1, ierr )
2944 *
2945 * Generate left bidiagonalizing vectors in U
2946 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2947 * (RWorkspace: 0)
2948 *
2949  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2950  $ work( iwork ), lwork-iwork+1, ierr )
2951  irwork = ie + m
2952 *
2953 * Perform bidiagonal QR iteration, computing left
2954 * singular vectors of L in U and computing right
2955 * singular vectors of L in WORK(IU)
2956 * (CWorkspace: need M*M)
2957 * (RWorkspace: need BDSPAC)
2958 *
2959  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2960  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2961  $ rwork( irwork ), info )
2962 *
2963 * Multiply right singular vectors of L in WORK(IU) by
2964 * Q in A, storing result in VT
2965 * (CWorkspace: need M*M)
2966 * (RWorkspace: 0)
2967 *
2968  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2969  $ ldwrku, a, lda, czero, vt, ldvt )
2970 *
2971  ELSE
2972 *
2973 * Insufficient workspace for a fast algorithm
2974 *
2975  itau = 1
2976  iwork = itau + m
2977 *
2978 * Compute A=L*Q, copying result to VT
2979 * (CWorkspace: need 2*M, prefer M+M*NB)
2980 * (RWorkspace: 0)
2981 *
2982  CALL cgelqf( m, n, a, lda, work( itau ),
2983  $ work( iwork ), lwork-iwork+1, ierr )
2984  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2985 *
2986 * Generate Q in VT
2987 * (CWorkspace: need 2*M, prefer M+M*NB)
2988 * (RWorkspace: 0)
2989 *
2990  CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2991  $ work( iwork ), lwork-iwork+1, ierr )
2992 *
2993 * Copy L to U, zeroing out above it
2994 *
2995  CALL clacpy( 'L', m, m, a, lda, u, ldu )
2996  CALL claset( 'U', m-1, m-1, czero, czero,
2997  $ u( 1, 2 ), ldu )
2998  ie = 1
2999  itauq = itau
3000  itaup = itauq + m
3001  iwork = itaup + m
3002 *
3003 * Bidiagonalize L in U
3004 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3005 * (RWorkspace: need M)
3006 *
3007  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3008  $ work( itauq ), work( itaup ),
3009  $ work( iwork ), lwork-iwork+1, ierr )
3010 *
3011 * Multiply right bidiagonalizing vectors in U by Q
3012 * in VT
3013 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3014 * (RWorkspace: 0)
3015 *
3016  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3017  $ work( itaup ), vt, ldvt,
3018  $ work( iwork ), lwork-iwork+1, ierr )
3019 *
3020 * Generate left bidiagonalizing vectors in U
3021 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3022 * (RWorkspace: 0)
3023 *
3024  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3025  $ work( iwork ), lwork-iwork+1, ierr )
3026  irwork = ie + m
3027 *
3028 * Perform bidiagonal QR iteration, computing left
3029 * singular vectors of A in U and computing right
3030 * singular vectors of A in VT
3031 * (CWorkspace: 0)
3032 * (RWorkspace: need BDSPAC)
3033 *
3034  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3035  $ ldvt, u, ldu, cdum, 1,
3036  $ rwork( irwork ), info )
3037 *
3038  END IF
3039 *
3040  END IF
3041 *
3042  ELSE IF( wntva ) THEN
3043 *
3044  IF( wntun ) THEN
3045 *
3046 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3047 * N right singular vectors to be computed in VT and
3048 * no left singular vectors to be computed
3049 *
3050  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3051 *
3052 * Sufficient workspace for a fast algorithm
3053 *
3054  ir = 1
3055  IF( lwork.GE.wrkbl+lda*m ) THEN
3056 *
3057 * WORK(IR) is LDA by M
3058 *
3059  ldwrkr = lda
3060  ELSE
3061 *
3062 * WORK(IR) is M by M
3063 *
3064  ldwrkr = m
3065  END IF
3066  itau = ir + ldwrkr*m
3067  iwork = itau + m
3068 *
3069 * Compute A=L*Q, copying result to VT
3070 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3071 * (RWorkspace: 0)
3072 *
3073  CALL cgelqf( m, n, a, lda, work( itau ),
3074  $ work( iwork ), lwork-iwork+1, ierr )
3075  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3076 *
3077 * Copy L to WORK(IR), zeroing out above it
3078 *
3079  CALL clacpy( 'L', m, m, a, lda, work( ir ),
3080  $ ldwrkr )
3081  CALL claset( 'U', m-1, m-1, czero, czero,
3082  $ work( ir+ldwrkr ), ldwrkr )
3083 *
3084 * Generate Q in VT
3085 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3086 * (RWorkspace: 0)
3087 *
3088  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3089  $ work( iwork ), lwork-iwork+1, ierr )
3090  ie = 1
3091  itauq = itau
3092  itaup = itauq + m
3093  iwork = itaup + m
3094 *
3095 * Bidiagonalize L in WORK(IR)
3096 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3097 * (RWorkspace: need M)
3098 *
3099  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
3100  $ rwork( ie ), work( itauq ),
3101  $ work( itaup ), work( iwork ),
3102  $ lwork-iwork+1, ierr )
3103 *
3104 * Generate right bidiagonalizing vectors in WORK(IR)
3105 * (CWorkspace: need M*M+3*M-1,
3106 * prefer M*M+2*M+(M-1)*NB)
3107 * (RWorkspace: 0)
3108 *
3109  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
3110  $ work( itaup ), work( iwork ),
3111  $ lwork-iwork+1, ierr )
3112  irwork = ie + m
3113 *
3114 * Perform bidiagonal QR iteration, computing right
3115 * singular vectors of L in WORK(IR)
3116 * (CWorkspace: need M*M)
3117 * (RWorkspace: need BDSPAC)
3118 *
3119  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3120  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3121  $ rwork( irwork ), info )
3122 *
3123 * Multiply right singular vectors of L in WORK(IR) by
3124 * Q in VT, storing result in A
3125 * (CWorkspace: need M*M)
3126 * (RWorkspace: 0)
3127 *
3128  CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
3129  $ ldwrkr, vt, ldvt, czero, a, lda )
3130 *
3131 * Copy right singular vectors of A from A to VT
3132 *
3133  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3134 *
3135  ELSE
3136 *
3137 * Insufficient workspace for a fast algorithm
3138 *
3139  itau = 1
3140  iwork = itau + m
3141 *
3142 * Compute A=L*Q, copying result to VT
3143 * (CWorkspace: need 2*M, prefer M+M*NB)
3144 * (RWorkspace: 0)
3145 *
3146  CALL cgelqf( m, n, a, lda, work( itau ),
3147  $ work( iwork ), lwork-iwork+1, ierr )
3148  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3149 *
3150 * Generate Q in VT
3151 * (CWorkspace: need M+N, prefer M+N*NB)
3152 * (RWorkspace: 0)
3153 *
3154  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3155  $ work( iwork ), lwork-iwork+1, ierr )
3156  ie = 1
3157  itauq = itau
3158  itaup = itauq + m
3159  iwork = itaup + m
3160 *
3161 * Zero out above L in A
3162 *
3163  CALL claset( 'U', m-1, m-1, czero, czero,
3164  $ a( 1, 2 ), lda )
3165 *
3166 * Bidiagonalize L in A
3167 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3168 * (RWorkspace: need M)
3169 *
3170  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3171  $ work( itauq ), work( itaup ),
3172  $ work( iwork ), lwork-iwork+1, ierr )
3173 *
3174 * Multiply right bidiagonalizing vectors in A by Q
3175 * in VT
3176 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3177 * (RWorkspace: 0)
3178 *
3179  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3180  $ work( itaup ), vt, ldvt,
3181  $ work( iwork ), lwork-iwork+1, ierr )
3182  irwork = ie + m
3183 *
3184 * Perform bidiagonal QR iteration, computing right
3185 * singular vectors of A in VT
3186 * (CWorkspace: 0)
3187 * (RWorkspace: need BDSPAC)
3188 *
3189  CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3190  $ ldvt, cdum, 1, cdum, 1,
3191  $ rwork( irwork ), info )
3192 *
3193  END IF
3194 *
3195  ELSE IF( wntuo ) THEN
3196 *
3197 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3198 * N right singular vectors to be computed in VT and
3199 * M left singular vectors to be overwritten on A
3200 *
3201  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3202 *
3203 * Sufficient workspace for a fast algorithm
3204 *
3205  iu = 1
3206  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3207 *
3208 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3209 *
3210  ldwrku = lda
3211  ir = iu + ldwrku*m
3212  ldwrkr = lda
3213  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3214 *
3215 * WORK(IU) is LDA by M and WORK(IR) is M by M
3216 *
3217  ldwrku = lda
3218  ir = iu + ldwrku*m
3219  ldwrkr = m
3220  ELSE
3221 *
3222 * WORK(IU) is M by M and WORK(IR) is M by M
3223 *
3224  ldwrku = m
3225  ir = iu + ldwrku*m
3226  ldwrkr = m
3227  END IF
3228  itau = ir + ldwrkr*m
3229  iwork = itau + m
3230 *
3231 * Compute A=L*Q, copying result to VT
3232 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3233 * (RWorkspace: 0)
3234 *
3235  CALL cgelqf( m, n, a, lda, work( itau ),
3236  $ work( iwork ), lwork-iwork+1, ierr )
3237  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3238 *
3239 * Generate Q in VT
3240 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3241 * (RWorkspace: 0)
3242 *
3243  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3244  $ work( iwork ), lwork-iwork+1, ierr )
3245 *
3246 * Copy L to WORK(IU), zeroing out above it
3247 *
3248  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3249  $ ldwrku )
3250  CALL claset( 'U', m-1, m-1, czero, czero,
3251  $ work( iu+ldwrku ), ldwrku )
3252  ie = 1
3253  itauq = itau
3254  itaup = itauq + m
3255  iwork = itaup + m
3256 *
3257 * Bidiagonalize L in WORK(IU), copying result to
3258 * WORK(IR)
3259 * (CWorkspace: need 2*M*M+3*M,
3260 * prefer 2*M*M+2*M+2*M*NB)
3261 * (RWorkspace: need M)
3262 *
3263  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3264  $ rwork( ie ), work( itauq ),
3265  $ work( itaup ), work( iwork ),
3266  $ lwork-iwork+1, ierr )
3267  CALL clacpy( 'L', m, m, work( iu ), ldwrku,
3268  $ work( ir ), ldwrkr )
3269 *
3270 * Generate right bidiagonalizing vectors in WORK(IU)
3271 * (CWorkspace: need 2*M*M+3*M-1,
3272 * prefer 2*M*M+2*M+(M-1)*NB)
3273 * (RWorkspace: 0)
3274 *
3275  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3276  $ work( itaup ), work( iwork ),
3277  $ lwork-iwork+1, ierr )
3278 *
3279 * Generate left bidiagonalizing vectors in WORK(IR)
3280 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3281 * (RWorkspace: 0)
3282 *
3283  CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3284  $ work( itauq ), work( iwork ),
3285  $ lwork-iwork+1, ierr )
3286  irwork = ie + m
3287 *
3288 * Perform bidiagonal QR iteration, computing left
3289 * singular vectors of L in WORK(IR) and computing
3290 * right singular vectors of L in WORK(IU)
3291 * (CWorkspace: need 2*M*M)
3292 * (RWorkspace: need BDSPAC)
3293 *
3294  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3295  $ work( iu ), ldwrku, work( ir ),
3296  $ ldwrkr, cdum, 1, rwork( irwork ),
3297  $ info )
3298 *
3299 * Multiply right singular vectors of L in WORK(IU) by
3300 * Q in VT, storing result in A
3301 * (CWorkspace: need M*M)
3302 * (RWorkspace: 0)
3303 *
3304  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3305  $ ldwrku, vt, ldvt, czero, a, lda )
3306 *
3307 * Copy right singular vectors of A from A to VT
3308 *
3309  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3310 *
3311 * Copy left singular vectors of A from WORK(IR) to A
3312 *
3313  CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
3314  $ lda )
3315 *
3316  ELSE
3317 *
3318 * Insufficient workspace for a fast algorithm
3319 *
3320  itau = 1
3321  iwork = itau + m
3322 *
3323 * Compute A=L*Q, copying result to VT
3324 * (CWorkspace: need 2*M, prefer M+M*NB)
3325 * (RWorkspace: 0)
3326 *
3327  CALL cgelqf( m, n, a, lda, work( itau ),
3328  $ work( iwork ), lwork-iwork+1, ierr )
3329  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3330 *
3331 * Generate Q in VT
3332 * (CWorkspace: need M+N, prefer M+N*NB)
3333 * (RWorkspace: 0)
3334 *
3335  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3336  $ work( iwork ), lwork-iwork+1, ierr )
3337  ie = 1
3338  itauq = itau
3339  itaup = itauq + m
3340  iwork = itaup + m
3341 *
3342 * Zero out above L in A
3343 *
3344  CALL claset( 'U', m-1, m-1, czero, czero,
3345  $ a( 1, 2 ), lda )
3346 *
3347 * Bidiagonalize L in A
3348 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3349 * (RWorkspace: need M)
3350 *
3351  CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3352  $ work( itauq ), work( itaup ),
3353  $ work( iwork ), lwork-iwork+1, ierr )
3354 *
3355 * Multiply right bidiagonalizing vectors in A by Q
3356 * in VT
3357 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3358 * (RWorkspace: 0)
3359 *
3360  CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3361  $ work( itaup ), vt, ldvt,
3362  $ work( iwork ), lwork-iwork+1, ierr )
3363 *
3364 * Generate left bidiagonalizing vectors in A
3365 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3366 * (RWorkspace: 0)
3367 *
3368  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
3369  $ work( iwork ), lwork-iwork+1, ierr )
3370  irwork = ie + m
3371 *
3372 * Perform bidiagonal QR iteration, computing left
3373 * singular vectors of A in A and computing right
3374 * singular vectors of A in VT
3375 * (CWorkspace: 0)
3376 * (RWorkspace: need BDSPAC)
3377 *
3378  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3379  $ ldvt, a, lda, cdum, 1,
3380  $ rwork( irwork ), info )
3381 *
3382  END IF
3383 *
3384  ELSE IF( wntuas ) THEN
3385 *
3386 * Path 9t(N much larger than M, JOBU='S' or 'A',
3387 * JOBVT='A')
3388 * N right singular vectors to be computed in VT and
3389 * M left singular vectors to be computed in U
3390 *
3391  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3392 *
3393 * Sufficient workspace for a fast algorithm
3394 *
3395  iu = 1
3396  IF( lwork.GE.wrkbl+lda*m ) THEN
3397 *
3398 * WORK(IU) is LDA by M
3399 *
3400  ldwrku = lda
3401  ELSE
3402 *
3403 * WORK(IU) is M by M
3404 *
3405  ldwrku = m
3406  END IF
3407  itau = iu + ldwrku*m
3408  iwork = itau + m
3409 *
3410 * Compute A=L*Q, copying result to VT
3411 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3412 * (RWorkspace: 0)
3413 *
3414  CALL cgelqf( m, n, a, lda, work( itau ),
3415  $ work( iwork ), lwork-iwork+1, ierr )
3416  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3417 *
3418 * Generate Q in VT
3419 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3420 * (RWorkspace: 0)
3421 *
3422  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3423  $ work( iwork ), lwork-iwork+1, ierr )
3424 *
3425 * Copy L to WORK(IU), zeroing out above it
3426 *
3427  CALL clacpy( 'L', m, m, a, lda, work( iu ),
3428  $ ldwrku )
3429  CALL claset( 'U', m-1, m-1, czero, czero,
3430  $ work( iu+ldwrku ), ldwrku )
3431  ie = 1
3432  itauq = itau
3433  itaup = itauq + m
3434  iwork = itaup + m
3435 *
3436 * Bidiagonalize L in WORK(IU), copying result to U
3437 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3438 * (RWorkspace: need M)
3439 *
3440  CALL cgebrd( m, m, work( iu ), ldwrku, s,
3441  $ rwork( ie ), work( itauq ),
3442  $ work( itaup ), work( iwork ),
3443  $ lwork-iwork+1, ierr )
3444  CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
3445  $ ldu )
3446 *
3447 * Generate right bidiagonalizing vectors in WORK(IU)
3448 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3449 * (RWorkspace: 0)
3450 *
3451  CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3452  $ work( itaup ), work( iwork ),
3453  $ lwork-iwork+1, ierr )
3454 *
3455 * Generate left bidiagonalizing vectors in U
3456 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3457 * (RWorkspace: 0)
3458 *
3459  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3460  $ work( iwork ), lwork-iwork+1, ierr )
3461  irwork = ie + m
3462 *
3463 * Perform bidiagonal QR iteration, computing left
3464 * singular vectors of L in U and computing right
3465 * singular vectors of L in WORK(IU)
3466 * (CWorkspace: need M*M)
3467 * (RWorkspace: need BDSPAC)
3468 *
3469  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3470  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3471  $ rwork( irwork ), info )
3472 *
3473 * Multiply right singular vectors of L in WORK(IU) by
3474 * Q in VT, storing result in A
3475 * (CWorkspace: need M*M)
3476 * (RWorkspace: 0)
3477 *
3478  CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3479  $ ldwrku, vt, ldvt, czero, a, lda )
3480 *
3481 * Copy right singular vectors of A from A to VT
3482 *
3483  CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3484 *
3485  ELSE
3486 *
3487 * Insufficient workspace for a fast algorithm
3488 *
3489  itau = 1
3490  iwork = itau + m
3491 *
3492 * Compute A=L*Q, copying result to VT
3493 * (CWorkspace: need 2*M, prefer M+M*NB)
3494 * (RWorkspace: 0)
3495 *
3496  CALL cgelqf( m, n, a, lda, work( itau ),
3497  $ work( iwork ), lwork-iwork+1, ierr )
3498  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3499 *
3500 * Generate Q in VT
3501 * (CWorkspace: need M+N, prefer M+N*NB)
3502 * (RWorkspace: 0)
3503 *
3504  CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3505  $ work( iwork ), lwork-iwork+1, ierr )
3506 *
3507 * Copy L to U, zeroing out above it
3508 *
3509  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3510  CALL claset( 'U', m-1, m-1, czero, czero,
3511  $ u( 1, 2 ), ldu )
3512  ie = 1
3513  itauq = itau
3514  itaup = itauq + m
3515  iwork = itaup + m
3516 *
3517 * Bidiagonalize L in U
3518 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3519 * (RWorkspace: need M)
3520 *
3521  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3522  $ work( itauq ), work( itaup ),
3523  $ work( iwork ), lwork-iwork+1, ierr )
3524 *
3525 * Multiply right bidiagonalizing vectors in U by Q
3526 * in VT
3527 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3528 * (RWorkspace: 0)
3529 *
3530  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3531  $ work( itaup ), vt, ldvt,
3532  $ work( iwork ), lwork-iwork+1, ierr )
3533 *
3534 * Generate left bidiagonalizing vectors in U
3535 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3536 * (RWorkspace: 0)
3537 *
3538  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3539  $ work( iwork ), lwork-iwork+1, ierr )
3540  irwork = ie + m
3541 *
3542 * Perform bidiagonal QR iteration, computing left
3543 * singular vectors of A in U and computing right
3544 * singular vectors of A in VT
3545 * (CWorkspace: 0)
3546 * (RWorkspace: need BDSPAC)
3547 *
3548  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3549  $ ldvt, u, ldu, cdum, 1,
3550  $ rwork( irwork ), info )
3551 *
3552  END IF
3553 *
3554  END IF
3555 *
3556  END IF
3557 *
3558  ELSE
3559 *
3560 * N .LT. MNTHR
3561 *
3562 * Path 10t(N greater than M, but not much larger)
3563 * Reduce to bidiagonal form without LQ decomposition
3564 *
3565  ie = 1
3566  itauq = 1
3567  itaup = itauq + m
3568  iwork = itaup + m
3569 *
3570 * Bidiagonalize A
3571 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3572 * (RWorkspace: M)
3573 *
3574  CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3575  $ work( itaup ), work( iwork ), lwork-iwork+1,
3576  $ ierr )
3577  IF( wntuas ) THEN
3578 *
3579 * If left singular vectors desired in U, copy result to U
3580 * and generate left bidiagonalizing vectors in U
3581 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3582 * (RWorkspace: 0)
3583 *
3584  CALL clacpy( 'L', m, m, a, lda, u, ldu )
3585  CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3586  $ work( iwork ), lwork-iwork+1, ierr )
3587  END IF
3588  IF( wntvas ) THEN
3589 *
3590 * If right singular vectors desired in VT, copy result to
3591 * VT and generate right bidiagonalizing vectors in VT
3592 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3593 * (RWorkspace: 0)
3594 *
3595  CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3596  IF( wntva )
3597  $ nrvt = n
3598  IF( wntvs )
3599  $ nrvt = m
3600  CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3601  $ work( iwork ), lwork-iwork+1, ierr )
3602  END IF
3603  IF( wntuo ) THEN
3604 *
3605 * If left singular vectors desired in A, generate left
3606 * bidiagonalizing vectors in A
3607 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3608 * (RWorkspace: 0)
3609 *
3610  CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),
3611  $ work( iwork ), lwork-iwork+1, ierr )
3612  END IF
3613  IF( wntvo ) THEN
3614 *
3615 * If right singular vectors desired in A, generate right
3616 * bidiagonalizing vectors in A
3617 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3618 * (RWorkspace: 0)
3619 *
3620  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
3621  $ work( iwork ), lwork-iwork+1, ierr )
3622  END IF
3623  irwork = ie + m
3624  IF( wntuas .OR. wntuo )
3625  $ nru = m
3626  IF( wntun )
3627  $ nru = 0
3628  IF( wntvas .OR. wntvo )
3629  $ ncvt = n
3630  IF( wntvn )
3631  $ ncvt = 0
3632  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3633 *
3634 * Perform bidiagonal QR iteration, if desired, computing
3635 * left singular vectors in U and computing right singular
3636 * vectors in VT
3637 * (CWorkspace: 0)
3638 * (RWorkspace: need BDSPAC)
3639 *
3640  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3641  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3642  $ info )
3643  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3644 *
3645 * Perform bidiagonal QR iteration, if desired, computing
3646 * left singular vectors in U and computing right singular
3647 * vectors in A
3648 * (CWorkspace: 0)
3649 * (RWorkspace: need BDSPAC)
3650 *
3651  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3652  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3653  $ info )
3654  ELSE
3655 *
3656 * Perform bidiagonal QR iteration, if desired, computing
3657 * left singular vectors in A and computing right singular
3658 * vectors in VT
3659 * (CWorkspace: 0)
3660 * (RWorkspace: need BDSPAC)
3661 *
3662  CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3663  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3664  $ info )
3665  END IF
3666 *
3667  END IF
3668 *
3669  END IF
3670 *
3671 * Undo scaling if necessary
3672 *
3673  IF( iscl.EQ.1 ) THEN
3674  IF( anrm.GT.bignum )
3675  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3676  $ ierr )
3677  IF( info.NE.0 .AND. anrm.GT.bignum )
3678  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3679  $ rwork( ie ), minmn, ierr )
3680  IF( anrm.LT.smlnum )
3681  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3682  $ ierr )
3683  IF( info.NE.0 .AND. anrm.LT.smlnum )
3684  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3685  $ rwork( ie ), minmn, ierr )
3686  END IF
3687 *
3688 * Return optimal workspace in WORK(1)
3689 *
3690  work( 1 ) = maxwrk
3691 *
3692  RETURN
3693 *
3694 * End of CGESVD
3695 *
3696  END
subroutine claset(UPLO, M, N, ALPHA, BETA, A, LDA)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: claset.f:108
subroutine cunmbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMBR
Definition: cunmbr.f:199
subroutine cgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGEQRF
Definition: cgeqrf.f:138
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:189
subroutine cgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO)
CGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: cgesvd.f:216
subroutine cungbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGBR
Definition: cungbr.f:159
subroutine cgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
CGEBRD
Definition: cgebrd.f:208
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
Definition: clacpy.f:105
subroutine cgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
CGELQF
Definition: cgelqf.f:137
subroutine clascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: clascl.f:141
subroutine cunglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGLQ
Definition: cunglq.f:129
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:141
subroutine cungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
CUNGQR
Definition: cungqr.f:130
subroutine cbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, RWORK, INFO)
CBDSQR
Definition: cbdsqr.f:224