LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
zgesvd.f
Go to the documentation of this file.
1 *> \brief <b> ZGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION RWORK( * ), S( * )
30 * COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> ZGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX*16 array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is DOUBLE PRECISION array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX*16 array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX*16 array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if ZBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \ingroup complex16GEsing
210 *
211 * =====================================================================
212  SUBROUTINE zgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
213  $ VT, LDVT, WORK, LWORK, RWORK, INFO )
214 *
215 * -- LAPACK driver routine --
216 * -- LAPACK is a software package provided by Univ. of Tennessee, --
217 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
218 *
219 * .. Scalar Arguments ..
220  CHARACTER JOBU, JOBVT
221  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222 * ..
223 * .. Array Arguments ..
224  DOUBLE PRECISION RWORK( * ), S( * )
225  COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
226  $ work( * )
227 * ..
228 *
229 * =====================================================================
230 *
231 * .. Parameters ..
232  COMPLEX*16 CZERO, CONE
233  parameter( czero = ( 0.0d0, 0.0d0 ),
234  $ cone = ( 1.0d0, 0.0d0 ) )
235  DOUBLE PRECISION ZERO, ONE
236  parameter( zero = 0.0d0, one = 1.0d0 )
237 * ..
238 * .. Local Scalars ..
239  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
240  $ wntva, wntvas, wntvn, wntvo, wntvs
241  INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
242  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
243  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
244  $ nrvt, wrkbl
245  INTEGER LWORK_ZGEQRF, LWORK_ZUNGQR_N, LWORK_ZUNGQR_M,
246  $ lwork_zgebrd, lwork_zungbr_p, lwork_zungbr_q,
247  $ lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m
248  DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
249 * ..
250 * .. Local Arrays ..
251  DOUBLE PRECISION DUM( 1 )
252  COMPLEX*16 CDUM( 1 )
253 * ..
254 * .. External Subroutines ..
255  EXTERNAL dlascl, xerbla, zbdsqr, zgebrd, zgelqf, zgemm,
257  $ zungqr, zunmbr
258 * ..
259 * .. External Functions ..
260  LOGICAL LSAME
261  INTEGER ILAENV
262  DOUBLE PRECISION DLAMCH, ZLANGE
263  EXTERNAL lsame, ilaenv, dlamch, zlange
264 * ..
265 * .. Intrinsic Functions ..
266  INTRINSIC max, min, sqrt
267 * ..
268 * .. Executable Statements ..
269 *
270 * Test the input arguments
271 *
272  info = 0
273  minmn = min( m, n )
274  wntua = lsame( jobu, 'A' )
275  wntus = lsame( jobu, 'S' )
276  wntuas = wntua .OR. wntus
277  wntuo = lsame( jobu, 'O' )
278  wntun = lsame( jobu, 'N' )
279  wntva = lsame( jobvt, 'A' )
280  wntvs = lsame( jobvt, 'S' )
281  wntvas = wntva .OR. wntvs
282  wntvo = lsame( jobvt, 'O' )
283  wntvn = lsame( jobvt, 'N' )
284  lquery = ( lwork.EQ.-1 )
285 *
286  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
287  info = -1
288  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
289  $ ( wntvo .AND. wntuo ) ) THEN
290  info = -2
291  ELSE IF( m.LT.0 ) THEN
292  info = -3
293  ELSE IF( n.LT.0 ) THEN
294  info = -4
295  ELSE IF( lda.LT.max( 1, m ) ) THEN
296  info = -6
297  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
298  info = -9
299  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
300  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
301  info = -11
302  END IF
303 *
304 * Compute workspace
305 * (Note: Comments in the code beginning "Workspace:" describe the
306 * minimal amount of workspace needed at that point in the code,
307 * as well as the preferred amount for good performance.
308 * CWorkspace refers to complex workspace, and RWorkspace to
309 * real workspace. NB refers to the optimal block size for the
310 * immediately following subroutine, as returned by ILAENV.)
311 *
312  IF( info.EQ.0 ) THEN
313  minwrk = 1
314  maxwrk = 1
315  IF( m.GE.n .AND. minmn.GT.0 ) THEN
316 *
317 * Space needed for ZBDSQR is BDSPAC = 5*N
318 *
319  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
320 * Compute space needed for ZGEQRF
321  CALL zgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
322  lwork_zgeqrf = int( cdum(1) )
323 * Compute space needed for ZUNGQR
324  CALL zungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
325  lwork_zungqr_n = int( cdum(1) )
326  CALL zungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
327  lwork_zungqr_m = int( cdum(1) )
328 * Compute space needed for ZGEBRD
329  CALL zgebrd( n, n, a, lda, s, dum(1), cdum(1),
330  $ cdum(1), cdum(1), -1, ierr )
331  lwork_zgebrd = int( cdum(1) )
332 * Compute space needed for ZUNGBR
333  CALL zungbr( 'P', n, n, n, a, lda, cdum(1),
334  $ cdum(1), -1, ierr )
335  lwork_zungbr_p = int( cdum(1) )
336  CALL zungbr( 'Q', n, n, n, a, lda, cdum(1),
337  $ cdum(1), -1, ierr )
338  lwork_zungbr_q = int( cdum(1) )
339 *
340  IF( m.GE.mnthr ) THEN
341  IF( wntun ) THEN
342 *
343 * Path 1 (M much larger than N, JOBU='N')
344 *
345  maxwrk = n + lwork_zgeqrf
346  maxwrk = max( maxwrk, 2*n+lwork_zgebrd )
347  IF( wntvo .OR. wntvas )
348  $ maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
349  minwrk = 3*n
350  ELSE IF( wntuo .AND. wntvn ) THEN
351 *
352 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
353 *
354  wrkbl = n + lwork_zgeqrf
355  wrkbl = max( wrkbl, n+lwork_zungqr_n )
356  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
357  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
358  maxwrk = max( n*n+wrkbl, n*n+m*n )
359  minwrk = 2*n + m
360  ELSE IF( wntuo .AND. wntvas ) THEN
361 *
362 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
363 * 'A')
364 *
365  wrkbl = n + lwork_zgeqrf
366  wrkbl = max( wrkbl, n+lwork_zungqr_n )
367  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
368  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
369  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
370  maxwrk = max( n*n+wrkbl, n*n+m*n )
371  minwrk = 2*n + m
372  ELSE IF( wntus .AND. wntvn ) THEN
373 *
374 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
375 *
376  wrkbl = n + lwork_zgeqrf
377  wrkbl = max( wrkbl, n+lwork_zungqr_n )
378  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
379  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
380  maxwrk = n*n + wrkbl
381  minwrk = 2*n + m
382  ELSE IF( wntus .AND. wntvo ) THEN
383 *
384 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385 *
386  wrkbl = n + lwork_zgeqrf
387  wrkbl = max( wrkbl, n+lwork_zungqr_n )
388  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
389  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
390  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
391  maxwrk = 2*n*n + wrkbl
392  minwrk = 2*n + m
393  ELSE IF( wntus .AND. wntvas ) THEN
394 *
395 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
396 * 'A')
397 *
398  wrkbl = n + lwork_zgeqrf
399  wrkbl = max( wrkbl, n+lwork_zungqr_n )
400  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
401  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
402  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
403  maxwrk = n*n + wrkbl
404  minwrk = 2*n + m
405  ELSE IF( wntua .AND. wntvn ) THEN
406 *
407 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
408 *
409  wrkbl = n + lwork_zgeqrf
410  wrkbl = max( wrkbl, n+lwork_zungqr_m )
411  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
412  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
413  maxwrk = n*n + wrkbl
414  minwrk = 2*n + m
415  ELSE IF( wntua .AND. wntvo ) THEN
416 *
417 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418 *
419  wrkbl = n + lwork_zgeqrf
420  wrkbl = max( wrkbl, n+lwork_zungqr_m )
421  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
422  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
423  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
424  maxwrk = 2*n*n + wrkbl
425  minwrk = 2*n + m
426  ELSE IF( wntua .AND. wntvas ) THEN
427 *
428 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
429 * 'A')
430 *
431  wrkbl = n + lwork_zgeqrf
432  wrkbl = max( wrkbl, n+lwork_zungqr_m )
433  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
434  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
435  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
436  maxwrk = n*n + wrkbl
437  minwrk = 2*n + m
438  END IF
439  ELSE
440 *
441 * Path 10 (M at least N, but not much larger)
442 *
443  CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
444  $ cdum(1), cdum(1), -1, ierr )
445  lwork_zgebrd = int( cdum(1) )
446  maxwrk = 2*n + lwork_zgebrd
447  IF( wntus .OR. wntuo ) THEN
448  CALL zungbr( 'Q', m, n, n, a, lda, cdum(1),
449  $ cdum(1), -1, ierr )
450  lwork_zungbr_q = int( cdum(1) )
451  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
452  END IF
453  IF( wntua ) THEN
454  CALL zungbr( 'Q', m, m, n, a, lda, cdum(1),
455  $ cdum(1), -1, ierr )
456  lwork_zungbr_q = int( cdum(1) )
457  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
458  END IF
459  IF( .NOT.wntvn ) THEN
460  maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
461  END IF
462  minwrk = 2*n + m
463  END IF
464  ELSE IF( minmn.GT.0 ) THEN
465 *
466 * Space needed for ZBDSQR is BDSPAC = 5*M
467 *
468  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
469 * Compute space needed for ZGELQF
470  CALL zgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
471  lwork_zgelqf = int( cdum(1) )
472 * Compute space needed for ZUNGLQ
473  CALL zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
474  $ ierr )
475  lwork_zunglq_n = int( cdum(1) )
476  CALL zunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
477  lwork_zunglq_m = int( cdum(1) )
478 * Compute space needed for ZGEBRD
479  CALL zgebrd( m, m, a, lda, s, dum(1), cdum(1),
480  $ cdum(1), cdum(1), -1, ierr )
481  lwork_zgebrd = int( cdum(1) )
482 * Compute space needed for ZUNGBR P
483  CALL zungbr( 'P', m, m, m, a, n, cdum(1),
484  $ cdum(1), -1, ierr )
485  lwork_zungbr_p = int( cdum(1) )
486 * Compute space needed for ZUNGBR Q
487  CALL zungbr( 'Q', m, m, m, a, n, cdum(1),
488  $ cdum(1), -1, ierr )
489  lwork_zungbr_q = int( cdum(1) )
490  IF( n.GE.mnthr ) THEN
491  IF( wntvn ) THEN
492 *
493 * Path 1t(N much larger than M, JOBVT='N')
494 *
495  maxwrk = m + lwork_zgelqf
496  maxwrk = max( maxwrk, 2*m+lwork_zgebrd )
497  IF( wntuo .OR. wntuas )
498  $ maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
499  minwrk = 3*m
500  ELSE IF( wntvo .AND. wntun ) THEN
501 *
502 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
503 *
504  wrkbl = m + lwork_zgelqf
505  wrkbl = max( wrkbl, m+lwork_zunglq_m )
506  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
507  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
508  maxwrk = max( m*m+wrkbl, m*m+m*n )
509  minwrk = 2*m + n
510  ELSE IF( wntvo .AND. wntuas ) THEN
511 *
512 * Path 3t(N much larger than M, JOBU='S' or 'A',
513 * JOBVT='O')
514 *
515  wrkbl = m + lwork_zgelqf
516  wrkbl = max( wrkbl, m+lwork_zunglq_m )
517  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
518  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
519  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
520  maxwrk = max( m*m+wrkbl, m*m+m*n )
521  minwrk = 2*m + n
522  ELSE IF( wntvs .AND. wntun ) THEN
523 *
524 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
525 *
526  wrkbl = m + lwork_zgelqf
527  wrkbl = max( wrkbl, m+lwork_zunglq_m )
528  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
529  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
530  maxwrk = m*m + wrkbl
531  minwrk = 2*m + n
532  ELSE IF( wntvs .AND. wntuo ) THEN
533 *
534 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
535 *
536  wrkbl = m + lwork_zgelqf
537  wrkbl = max( wrkbl, m+lwork_zunglq_m )
538  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
539  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
540  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
541  maxwrk = 2*m*m + wrkbl
542  minwrk = 2*m + n
543  ELSE IF( wntvs .AND. wntuas ) THEN
544 *
545 * Path 6t(N much larger than M, JOBU='S' or 'A',
546 * JOBVT='S')
547 *
548  wrkbl = m + lwork_zgelqf
549  wrkbl = max( wrkbl, m+lwork_zunglq_m )
550  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
551  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
552  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
553  maxwrk = m*m + wrkbl
554  minwrk = 2*m + n
555  ELSE IF( wntva .AND. wntun ) THEN
556 *
557 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
558 *
559  wrkbl = m + lwork_zgelqf
560  wrkbl = max( wrkbl, m+lwork_zunglq_n )
561  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
562  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
563  maxwrk = m*m + wrkbl
564  minwrk = 2*m + n
565  ELSE IF( wntva .AND. wntuo ) THEN
566 *
567 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
568 *
569  wrkbl = m + lwork_zgelqf
570  wrkbl = max( wrkbl, m+lwork_zunglq_n )
571  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
572  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
573  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
574  maxwrk = 2*m*m + wrkbl
575  minwrk = 2*m + n
576  ELSE IF( wntva .AND. wntuas ) THEN
577 *
578 * Path 9t(N much larger than M, JOBU='S' or 'A',
579 * JOBVT='A')
580 *
581  wrkbl = m + lwork_zgelqf
582  wrkbl = max( wrkbl, m+lwork_zunglq_n )
583  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
584  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
585  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
586  maxwrk = m*m + wrkbl
587  minwrk = 2*m + n
588  END IF
589  ELSE
590 *
591 * Path 10t(N greater than M, but not much larger)
592 *
593  CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
594  $ cdum(1), cdum(1), -1, ierr )
595  lwork_zgebrd = int( cdum(1) )
596  maxwrk = 2*m + lwork_zgebrd
597  IF( wntvs .OR. wntvo ) THEN
598 * Compute space needed for ZUNGBR P
599  CALL zungbr( 'P', m, n, m, a, n, cdum(1),
600  $ cdum(1), -1, ierr )
601  lwork_zungbr_p = int( cdum(1) )
602  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
603  END IF
604  IF( wntva ) THEN
605  CALL zungbr( 'P', n, n, m, a, n, cdum(1),
606  $ cdum(1), -1, ierr )
607  lwork_zungbr_p = int( cdum(1) )
608  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
609  END IF
610  IF( .NOT.wntun ) THEN
611  maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
612  END IF
613  minwrk = 2*m + n
614  END IF
615  END IF
616  maxwrk = max( maxwrk, minwrk )
617  work( 1 ) = maxwrk
618 *
619  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
620  info = -13
621  END IF
622  END IF
623 *
624  IF( info.NE.0 ) THEN
625  CALL xerbla( 'ZGESVD', -info )
626  RETURN
627  ELSE IF( lquery ) THEN
628  RETURN
629  END IF
630 *
631 * Quick return if possible
632 *
633  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
634  RETURN
635  END IF
636 *
637 * Get machine constants
638 *
639  eps = dlamch( 'P' )
640  smlnum = sqrt( dlamch( 'S' ) ) / eps
641  bignum = one / smlnum
642 *
643 * Scale A if max element outside range [SMLNUM,BIGNUM]
644 *
645  anrm = zlange( 'M', m, n, a, lda, dum )
646  iscl = 0
647  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
648  iscl = 1
649  CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
650  ELSE IF( anrm.GT.bignum ) THEN
651  iscl = 1
652  CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
653  END IF
654 *
655  IF( m.GE.n ) THEN
656 *
657 * A has at least as many rows as columns. If A has sufficiently
658 * more rows than columns, first reduce using the QR
659 * decomposition (if sufficient workspace available)
660 *
661  IF( m.GE.mnthr ) THEN
662 *
663  IF( wntun ) THEN
664 *
665 * Path 1 (M much larger than N, JOBU='N')
666 * No left singular vectors to be computed
667 *
668  itau = 1
669  iwork = itau + n
670 *
671 * Compute A=Q*R
672 * (CWorkspace: need 2*N, prefer N+N*NB)
673 * (RWorkspace: need 0)
674 *
675  CALL zgeqrf( m, n, a, lda, work( itau ), work( iwork ),
676  $ lwork-iwork+1, ierr )
677 *
678 * Zero out below R
679 *
680  IF( n .GT. 1 ) THEN
681  CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
682  $ lda )
683  END IF
684  ie = 1
685  itauq = 1
686  itaup = itauq + n
687  iwork = itaup + n
688 *
689 * Bidiagonalize R in A
690 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
691 * (RWorkspace: need N)
692 *
693  CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
694  $ work( itaup ), work( iwork ), lwork-iwork+1,
695  $ ierr )
696  ncvt = 0
697  IF( wntvo .OR. wntvas ) THEN
698 *
699 * If right singular vectors desired, generate P'.
700 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
701 * (RWorkspace: 0)
702 *
703  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
704  $ work( iwork ), lwork-iwork+1, ierr )
705  ncvt = n
706  END IF
707  irwork = ie + n
708 *
709 * Perform bidiagonal QR iteration, computing right
710 * singular vectors of A in A if desired
711 * (CWorkspace: 0)
712 * (RWorkspace: need BDSPAC)
713 *
714  CALL zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
715  $ cdum, 1, cdum, 1, rwork( irwork ), info )
716 *
717 * If right singular vectors desired in VT, copy them there
718 *
719  IF( wntvas )
720  $ CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
721 *
722  ELSE IF( wntuo .AND. wntvn ) THEN
723 *
724 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
725 * N left singular vectors to be overwritten on A and
726 * no right singular vectors to be computed
727 *
728  IF( lwork.GE.n*n+3*n ) THEN
729 *
730 * Sufficient workspace for a fast algorithm
731 *
732  ir = 1
733  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
734 *
735 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
736 *
737  ldwrku = lda
738  ldwrkr = lda
739  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
740 *
741 * WORK(IU) is LDA by N, WORK(IR) is N by N
742 *
743  ldwrku = lda
744  ldwrkr = n
745  ELSE
746 *
747 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
748 *
749  ldwrku = ( lwork-n*n ) / n
750  ldwrkr = n
751  END IF
752  itau = ir + ldwrkr*n
753  iwork = itau + n
754 *
755 * Compute A=Q*R
756 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
757 * (RWorkspace: 0)
758 *
759  CALL zgeqrf( m, n, a, lda, work( itau ),
760  $ work( iwork ), lwork-iwork+1, ierr )
761 *
762 * Copy R to WORK(IR) and zero out below it
763 *
764  CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
765  CALL zlaset( 'L', n-1, n-1, czero, czero,
766  $ work( ir+1 ), ldwrkr )
767 *
768 * Generate Q in A
769 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
770 * (RWorkspace: 0)
771 *
772  CALL zungqr( m, n, n, a, lda, work( itau ),
773  $ work( iwork ), lwork-iwork+1, ierr )
774  ie = 1
775  itauq = itau
776  itaup = itauq + n
777  iwork = itaup + n
778 *
779 * Bidiagonalize R in WORK(IR)
780 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
781 * (RWorkspace: need N)
782 *
783  CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
784  $ work( itauq ), work( itaup ),
785  $ work( iwork ), lwork-iwork+1, ierr )
786 *
787 * Generate left vectors bidiagonalizing R
788 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
789 * (RWorkspace: need 0)
790 *
791  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
792  $ work( itauq ), work( iwork ),
793  $ lwork-iwork+1, ierr )
794  irwork = ie + n
795 *
796 * Perform bidiagonal QR iteration, computing left
797 * singular vectors of R in WORK(IR)
798 * (CWorkspace: need N*N)
799 * (RWorkspace: need BDSPAC)
800 *
801  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
802  $ work( ir ), ldwrkr, cdum, 1,
803  $ rwork( irwork ), info )
804  iu = itauq
805 *
806 * Multiply Q in A by left singular vectors of R in
807 * WORK(IR), storing result in WORK(IU) and copying to A
808 * (CWorkspace: need N*N+N, prefer N*N+M*N)
809 * (RWorkspace: 0)
810 *
811  DO 10 i = 1, m, ldwrku
812  chunk = min( m-i+1, ldwrku )
813  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
814  $ lda, work( ir ), ldwrkr, czero,
815  $ work( iu ), ldwrku )
816  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
817  $ a( i, 1 ), lda )
818  10 CONTINUE
819 *
820  ELSE
821 *
822 * Insufficient workspace for a fast algorithm
823 *
824  ie = 1
825  itauq = 1
826  itaup = itauq + n
827  iwork = itaup + n
828 *
829 * Bidiagonalize A
830 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
831 * (RWorkspace: N)
832 *
833  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
834  $ work( itauq ), work( itaup ),
835  $ work( iwork ), lwork-iwork+1, ierr )
836 *
837 * Generate left vectors bidiagonalizing A
838 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
839 * (RWorkspace: 0)
840 *
841  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
842  $ work( iwork ), lwork-iwork+1, ierr )
843  irwork = ie + n
844 *
845 * Perform bidiagonal QR iteration, computing left
846 * singular vectors of A in A
847 * (CWorkspace: need 0)
848 * (RWorkspace: need BDSPAC)
849 *
850  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
851  $ a, lda, cdum, 1, rwork( irwork ), info )
852 *
853  END IF
854 *
855  ELSE IF( wntuo .AND. wntvas ) THEN
856 *
857 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858 * N left singular vectors to be overwritten on A and
859 * N right singular vectors to be computed in VT
860 *
861  IF( lwork.GE.n*n+3*n ) THEN
862 *
863 * Sufficient workspace for a fast algorithm
864 *
865  ir = 1
866  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
867 *
868 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
869 *
870  ldwrku = lda
871  ldwrkr = lda
872  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
873 *
874 * WORK(IU) is LDA by N and WORK(IR) is N by N
875 *
876  ldwrku = lda
877  ldwrkr = n
878  ELSE
879 *
880 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881 *
882  ldwrku = ( lwork-n*n ) / n
883  ldwrkr = n
884  END IF
885  itau = ir + ldwrkr*n
886  iwork = itau + n
887 *
888 * Compute A=Q*R
889 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
890 * (RWorkspace: 0)
891 *
892  CALL zgeqrf( m, n, a, lda, work( itau ),
893  $ work( iwork ), lwork-iwork+1, ierr )
894 *
895 * Copy R to VT, zeroing out below it
896 *
897  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
898  IF( n.GT.1 )
899  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
900  $ vt( 2, 1 ), ldvt )
901 *
902 * Generate Q in A
903 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
904 * (RWorkspace: 0)
905 *
906  CALL zungqr( m, n, n, a, lda, work( itau ),
907  $ work( iwork ), lwork-iwork+1, ierr )
908  ie = 1
909  itauq = itau
910  itaup = itauq + n
911  iwork = itaup + n
912 *
913 * Bidiagonalize R in VT, copying result to WORK(IR)
914 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
915 * (RWorkspace: need N)
916 *
917  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
918  $ work( itauq ), work( itaup ),
919  $ work( iwork ), lwork-iwork+1, ierr )
920  CALL zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
921 *
922 * Generate left vectors bidiagonalizing R in WORK(IR)
923 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
924 * (RWorkspace: 0)
925 *
926  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
927  $ work( itauq ), work( iwork ),
928  $ lwork-iwork+1, ierr )
929 *
930 * Generate right vectors bidiagonalizing R in VT
931 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
932 * (RWorkspace: 0)
933 *
934  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
935  $ work( iwork ), lwork-iwork+1, ierr )
936  irwork = ie + n
937 *
938 * Perform bidiagonal QR iteration, computing left
939 * singular vectors of R in WORK(IR) and computing right
940 * singular vectors of R in VT
941 * (CWorkspace: need N*N)
942 * (RWorkspace: need BDSPAC)
943 *
944  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
945  $ ldvt, work( ir ), ldwrkr, cdum, 1,
946  $ rwork( irwork ), info )
947  iu = itauq
948 *
949 * Multiply Q in A by left singular vectors of R in
950 * WORK(IR), storing result in WORK(IU) and copying to A
951 * (CWorkspace: need N*N+N, prefer N*N+M*N)
952 * (RWorkspace: 0)
953 *
954  DO 20 i = 1, m, ldwrku
955  chunk = min( m-i+1, ldwrku )
956  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
957  $ lda, work( ir ), ldwrkr, czero,
958  $ work( iu ), ldwrku )
959  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
960  $ a( i, 1 ), lda )
961  20 CONTINUE
962 *
963  ELSE
964 *
965 * Insufficient workspace for a fast algorithm
966 *
967  itau = 1
968  iwork = itau + n
969 *
970 * Compute A=Q*R
971 * (CWorkspace: need 2*N, prefer N+N*NB)
972 * (RWorkspace: 0)
973 *
974  CALL zgeqrf( m, n, a, lda, work( itau ),
975  $ work( iwork ), lwork-iwork+1, ierr )
976 *
977 * Copy R to VT, zeroing out below it
978 *
979  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
980  IF( n.GT.1 )
981  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
982  $ vt( 2, 1 ), ldvt )
983 *
984 * Generate Q in A
985 * (CWorkspace: need 2*N, prefer N+N*NB)
986 * (RWorkspace: 0)
987 *
988  CALL zungqr( m, n, n, a, lda, work( itau ),
989  $ work( iwork ), lwork-iwork+1, ierr )
990  ie = 1
991  itauq = itau
992  itaup = itauq + n
993  iwork = itaup + n
994 *
995 * Bidiagonalize R in VT
996 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
997 * (RWorkspace: N)
998 *
999  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1000  $ work( itauq ), work( itaup ),
1001  $ work( iwork ), lwork-iwork+1, ierr )
1002 *
1003 * Multiply Q in A by left vectors bidiagonalizing R
1004 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1005 * (RWorkspace: 0)
1006 *
1007  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1008  $ work( itauq ), a, lda, work( iwork ),
1009  $ lwork-iwork+1, ierr )
1010 *
1011 * Generate right vectors bidiagonalizing R in VT
1012 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1013 * (RWorkspace: 0)
1014 *
1015  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1016  $ work( iwork ), lwork-iwork+1, ierr )
1017  irwork = ie + n
1018 *
1019 * Perform bidiagonal QR iteration, computing left
1020 * singular vectors of A in A and computing right
1021 * singular vectors of A in VT
1022 * (CWorkspace: 0)
1023 * (RWorkspace: need BDSPAC)
1024 *
1025  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1026  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1027  $ info )
1028 *
1029  END IF
1030 *
1031  ELSE IF( wntus ) THEN
1032 *
1033  IF( wntvn ) THEN
1034 *
1035 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1036 * N left singular vectors to be computed in U and
1037 * no right singular vectors to be computed
1038 *
1039  IF( lwork.GE.n*n+3*n ) THEN
1040 *
1041 * Sufficient workspace for a fast algorithm
1042 *
1043  ir = 1
1044  IF( lwork.GE.wrkbl+lda*n ) THEN
1045 *
1046 * WORK(IR) is LDA by N
1047 *
1048  ldwrkr = lda
1049  ELSE
1050 *
1051 * WORK(IR) is N by N
1052 *
1053  ldwrkr = n
1054  END IF
1055  itau = ir + ldwrkr*n
1056  iwork = itau + n
1057 *
1058 * Compute A=Q*R
1059 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1060 * (RWorkspace: 0)
1061 *
1062  CALL zgeqrf( m, n, a, lda, work( itau ),
1063  $ work( iwork ), lwork-iwork+1, ierr )
1064 *
1065 * Copy R to WORK(IR), zeroing out below it
1066 *
1067  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1068  $ ldwrkr )
1069  CALL zlaset( 'L', n-1, n-1, czero, czero,
1070  $ work( ir+1 ), ldwrkr )
1071 *
1072 * Generate Q in A
1073 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1074 * (RWorkspace: 0)
1075 *
1076  CALL zungqr( m, n, n, a, lda, work( itau ),
1077  $ work( iwork ), lwork-iwork+1, ierr )
1078  ie = 1
1079  itauq = itau
1080  itaup = itauq + n
1081  iwork = itaup + n
1082 *
1083 * Bidiagonalize R in WORK(IR)
1084 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1085 * (RWorkspace: need N)
1086 *
1087  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1088  $ rwork( ie ), work( itauq ),
1089  $ work( itaup ), work( iwork ),
1090  $ lwork-iwork+1, ierr )
1091 *
1092 * Generate left vectors bidiagonalizing R in WORK(IR)
1093 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1094 * (RWorkspace: 0)
1095 *
1096  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1097  $ work( itauq ), work( iwork ),
1098  $ lwork-iwork+1, ierr )
1099  irwork = ie + n
1100 *
1101 * Perform bidiagonal QR iteration, computing left
1102 * singular vectors of R in WORK(IR)
1103 * (CWorkspace: need N*N)
1104 * (RWorkspace: need BDSPAC)
1105 *
1106  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1107  $ 1, work( ir ), ldwrkr, cdum, 1,
1108  $ rwork( irwork ), info )
1109 *
1110 * Multiply Q in A by left singular vectors of R in
1111 * WORK(IR), storing result in U
1112 * (CWorkspace: need N*N)
1113 * (RWorkspace: 0)
1114 *
1115  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1116  $ work( ir ), ldwrkr, czero, u, ldu )
1117 *
1118  ELSE
1119 *
1120 * Insufficient workspace for a fast algorithm
1121 *
1122  itau = 1
1123  iwork = itau + n
1124 *
1125 * Compute A=Q*R, copying result to U
1126 * (CWorkspace: need 2*N, prefer N+N*NB)
1127 * (RWorkspace: 0)
1128 *
1129  CALL zgeqrf( m, n, a, lda, work( itau ),
1130  $ work( iwork ), lwork-iwork+1, ierr )
1131  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1132 *
1133 * Generate Q in U
1134 * (CWorkspace: need 2*N, prefer N+N*NB)
1135 * (RWorkspace: 0)
1136 *
1137  CALL zungqr( m, n, n, u, ldu, work( itau ),
1138  $ work( iwork ), lwork-iwork+1, ierr )
1139  ie = 1
1140  itauq = itau
1141  itaup = itauq + n
1142  iwork = itaup + n
1143 *
1144 * Zero out below R in A
1145 *
1146  IF( n .GT. 1 ) THEN
1147  CALL zlaset( 'L', n-1, n-1, czero, czero,
1148  $ a( 2, 1 ), lda )
1149  END IF
1150 *
1151 * Bidiagonalize R in A
1152 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1153 * (RWorkspace: need N)
1154 *
1155  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1156  $ work( itauq ), work( itaup ),
1157  $ work( iwork ), lwork-iwork+1, ierr )
1158 *
1159 * Multiply Q in U by left vectors bidiagonalizing R
1160 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1161 * (RWorkspace: 0)
1162 *
1163  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1164  $ work( itauq ), u, ldu, work( iwork ),
1165  $ lwork-iwork+1, ierr )
1166  irwork = ie + n
1167 *
1168 * Perform bidiagonal QR iteration, computing left
1169 * singular vectors of A in U
1170 * (CWorkspace: 0)
1171 * (RWorkspace: need BDSPAC)
1172 *
1173  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1174  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1175  $ info )
1176 *
1177  END IF
1178 *
1179  ELSE IF( wntvo ) THEN
1180 *
1181 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1182 * N left singular vectors to be computed in U and
1183 * N right singular vectors to be overwritten on A
1184 *
1185  IF( lwork.GE.2*n*n+3*n ) THEN
1186 *
1187 * Sufficient workspace for a fast algorithm
1188 *
1189  iu = 1
1190  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1191 *
1192 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1193 *
1194  ldwrku = lda
1195  ir = iu + ldwrku*n
1196  ldwrkr = lda
1197  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1198 *
1199 * WORK(IU) is LDA by N and WORK(IR) is N by N
1200 *
1201  ldwrku = lda
1202  ir = iu + ldwrku*n
1203  ldwrkr = n
1204  ELSE
1205 *
1206 * WORK(IU) is N by N and WORK(IR) is N by N
1207 *
1208  ldwrku = n
1209  ir = iu + ldwrku*n
1210  ldwrkr = n
1211  END IF
1212  itau = ir + ldwrkr*n
1213  iwork = itau + n
1214 *
1215 * Compute A=Q*R
1216 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1217 * (RWorkspace: 0)
1218 *
1219  CALL zgeqrf( m, n, a, lda, work( itau ),
1220  $ work( iwork ), lwork-iwork+1, ierr )
1221 *
1222 * Copy R to WORK(IU), zeroing out below it
1223 *
1224  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1225  $ ldwrku )
1226  CALL zlaset( 'L', n-1, n-1, czero, czero,
1227  $ work( iu+1 ), ldwrku )
1228 *
1229 * Generate Q in A
1230 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1231 * (RWorkspace: 0)
1232 *
1233  CALL zungqr( m, n, n, a, lda, work( itau ),
1234  $ work( iwork ), lwork-iwork+1, ierr )
1235  ie = 1
1236  itauq = itau
1237  itaup = itauq + n
1238  iwork = itaup + n
1239 *
1240 * Bidiagonalize R in WORK(IU), copying result to
1241 * WORK(IR)
1242 * (CWorkspace: need 2*N*N+3*N,
1243 * prefer 2*N*N+2*N+2*N*NB)
1244 * (RWorkspace: need N)
1245 *
1246  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1247  $ rwork( ie ), work( itauq ),
1248  $ work( itaup ), work( iwork ),
1249  $ lwork-iwork+1, ierr )
1250  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1251  $ work( ir ), ldwrkr )
1252 *
1253 * Generate left bidiagonalizing vectors in WORK(IU)
1254 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1255 * (RWorkspace: 0)
1256 *
1257  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1258  $ work( itauq ), work( iwork ),
1259  $ lwork-iwork+1, ierr )
1260 *
1261 * Generate right bidiagonalizing vectors in WORK(IR)
1262 * (CWorkspace: need 2*N*N+3*N-1,
1263 * prefer 2*N*N+2*N+(N-1)*NB)
1264 * (RWorkspace: 0)
1265 *
1266  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1267  $ work( itaup ), work( iwork ),
1268  $ lwork-iwork+1, ierr )
1269  irwork = ie + n
1270 *
1271 * Perform bidiagonal QR iteration, computing left
1272 * singular vectors of R in WORK(IU) and computing
1273 * right singular vectors of R in WORK(IR)
1274 * (CWorkspace: need 2*N*N)
1275 * (RWorkspace: need BDSPAC)
1276 *
1277  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1278  $ work( ir ), ldwrkr, work( iu ),
1279  $ ldwrku, cdum, 1, rwork( irwork ),
1280  $ info )
1281 *
1282 * Multiply Q in A by left singular vectors of R in
1283 * WORK(IU), storing result in U
1284 * (CWorkspace: need N*N)
1285 * (RWorkspace: 0)
1286 *
1287  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1288  $ work( iu ), ldwrku, czero, u, ldu )
1289 *
1290 * Copy right singular vectors of R to A
1291 * (CWorkspace: need N*N)
1292 * (RWorkspace: 0)
1293 *
1294  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1295  $ lda )
1296 *
1297  ELSE
1298 *
1299 * Insufficient workspace for a fast algorithm
1300 *
1301  itau = 1
1302  iwork = itau + n
1303 *
1304 * Compute A=Q*R, copying result to U
1305 * (CWorkspace: need 2*N, prefer N+N*NB)
1306 * (RWorkspace: 0)
1307 *
1308  CALL zgeqrf( m, n, a, lda, work( itau ),
1309  $ work( iwork ), lwork-iwork+1, ierr )
1310  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1311 *
1312 * Generate Q in U
1313 * (CWorkspace: need 2*N, prefer N+N*NB)
1314 * (RWorkspace: 0)
1315 *
1316  CALL zungqr( m, n, n, u, ldu, work( itau ),
1317  $ work( iwork ), lwork-iwork+1, ierr )
1318  ie = 1
1319  itauq = itau
1320  itaup = itauq + n
1321  iwork = itaup + n
1322 *
1323 * Zero out below R in A
1324 *
1325  IF( n .GT. 1 ) THEN
1326  CALL zlaset( 'L', n-1, n-1, czero, czero,
1327  $ a( 2, 1 ), lda )
1328  END IF
1329 *
1330 * Bidiagonalize R in A
1331 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1332 * (RWorkspace: need N)
1333 *
1334  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1335  $ work( itauq ), work( itaup ),
1336  $ work( iwork ), lwork-iwork+1, ierr )
1337 *
1338 * Multiply Q in U by left vectors bidiagonalizing R
1339 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1340 * (RWorkspace: 0)
1341 *
1342  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1343  $ work( itauq ), u, ldu, work( iwork ),
1344  $ lwork-iwork+1, ierr )
1345 *
1346 * Generate right vectors bidiagonalizing R in A
1347 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1348 * (RWorkspace: 0)
1349 *
1350  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1351  $ work( iwork ), lwork-iwork+1, ierr )
1352  irwork = ie + n
1353 *
1354 * Perform bidiagonal QR iteration, computing left
1355 * singular vectors of A in U and computing right
1356 * singular vectors of A in A
1357 * (CWorkspace: 0)
1358 * (RWorkspace: need BDSPAC)
1359 *
1360  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1361  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1362  $ info )
1363 *
1364  END IF
1365 *
1366  ELSE IF( wntvas ) THEN
1367 *
1368 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1369 * or 'A')
1370 * N left singular vectors to be computed in U and
1371 * N right singular vectors to be computed in VT
1372 *
1373  IF( lwork.GE.n*n+3*n ) THEN
1374 *
1375 * Sufficient workspace for a fast algorithm
1376 *
1377  iu = 1
1378  IF( lwork.GE.wrkbl+lda*n ) THEN
1379 *
1380 * WORK(IU) is LDA by N
1381 *
1382  ldwrku = lda
1383  ELSE
1384 *
1385 * WORK(IU) is N by N
1386 *
1387  ldwrku = n
1388  END IF
1389  itau = iu + ldwrku*n
1390  iwork = itau + n
1391 *
1392 * Compute A=Q*R
1393 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1394 * (RWorkspace: 0)
1395 *
1396  CALL zgeqrf( m, n, a, lda, work( itau ),
1397  $ work( iwork ), lwork-iwork+1, ierr )
1398 *
1399 * Copy R to WORK(IU), zeroing out below it
1400 *
1401  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1402  $ ldwrku )
1403  CALL zlaset( 'L', n-1, n-1, czero, czero,
1404  $ work( iu+1 ), ldwrku )
1405 *
1406 * Generate Q in A
1407 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1408 * (RWorkspace: 0)
1409 *
1410  CALL zungqr( m, n, n, a, lda, work( itau ),
1411  $ work( iwork ), lwork-iwork+1, ierr )
1412  ie = 1
1413  itauq = itau
1414  itaup = itauq + n
1415  iwork = itaup + n
1416 *
1417 * Bidiagonalize R in WORK(IU), copying result to VT
1418 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1419 * (RWorkspace: need N)
1420 *
1421  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1422  $ rwork( ie ), work( itauq ),
1423  $ work( itaup ), work( iwork ),
1424  $ lwork-iwork+1, ierr )
1425  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1426  $ ldvt )
1427 *
1428 * Generate left bidiagonalizing vectors in WORK(IU)
1429 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1430 * (RWorkspace: 0)
1431 *
1432  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1433  $ work( itauq ), work( iwork ),
1434  $ lwork-iwork+1, ierr )
1435 *
1436 * Generate right bidiagonalizing vectors in VT
1437 * (CWorkspace: need N*N+3*N-1,
1438 * prefer N*N+2*N+(N-1)*NB)
1439 * (RWorkspace: 0)
1440 *
1441  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1442  $ work( iwork ), lwork-iwork+1, ierr )
1443  irwork = ie + n
1444 *
1445 * Perform bidiagonal QR iteration, computing left
1446 * singular vectors of R in WORK(IU) and computing
1447 * right singular vectors of R in VT
1448 * (CWorkspace: need N*N)
1449 * (RWorkspace: need BDSPAC)
1450 *
1451  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1452  $ ldvt, work( iu ), ldwrku, cdum, 1,
1453  $ rwork( irwork ), info )
1454 *
1455 * Multiply Q in A by left singular vectors of R in
1456 * WORK(IU), storing result in U
1457 * (CWorkspace: need N*N)
1458 * (RWorkspace: 0)
1459 *
1460  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1461  $ work( iu ), ldwrku, czero, u, ldu )
1462 *
1463  ELSE
1464 *
1465 * Insufficient workspace for a fast algorithm
1466 *
1467  itau = 1
1468  iwork = itau + n
1469 *
1470 * Compute A=Q*R, copying result to U
1471 * (CWorkspace: need 2*N, prefer N+N*NB)
1472 * (RWorkspace: 0)
1473 *
1474  CALL zgeqrf( m, n, a, lda, work( itau ),
1475  $ work( iwork ), lwork-iwork+1, ierr )
1476  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1477 *
1478 * Generate Q in U
1479 * (CWorkspace: need 2*N, prefer N+N*NB)
1480 * (RWorkspace: 0)
1481 *
1482  CALL zungqr( m, n, n, u, ldu, work( itau ),
1483  $ work( iwork ), lwork-iwork+1, ierr )
1484 *
1485 * Copy R to VT, zeroing out below it
1486 *
1487  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1488  IF( n.GT.1 )
1489  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
1490  $ vt( 2, 1 ), ldvt )
1491  ie = 1
1492  itauq = itau
1493  itaup = itauq + n
1494  iwork = itaup + n
1495 *
1496 * Bidiagonalize R in VT
1497 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1498 * (RWorkspace: need N)
1499 *
1500  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1501  $ work( itauq ), work( itaup ),
1502  $ work( iwork ), lwork-iwork+1, ierr )
1503 *
1504 * Multiply Q in U by left bidiagonalizing vectors
1505 * in VT
1506 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1507 * (RWorkspace: 0)
1508 *
1509  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1510  $ work( itauq ), u, ldu, work( iwork ),
1511  $ lwork-iwork+1, ierr )
1512 *
1513 * Generate right bidiagonalizing vectors in VT
1514 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1515 * (RWorkspace: 0)
1516 *
1517  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1518  $ work( iwork ), lwork-iwork+1, ierr )
1519  irwork = ie + n
1520 *
1521 * Perform bidiagonal QR iteration, computing left
1522 * singular vectors of A in U and computing right
1523 * singular vectors of A in VT
1524 * (CWorkspace: 0)
1525 * (RWorkspace: need BDSPAC)
1526 *
1527  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1528  $ ldvt, u, ldu, cdum, 1,
1529  $ rwork( irwork ), info )
1530 *
1531  END IF
1532 *
1533  END IF
1534 *
1535  ELSE IF( wntua ) THEN
1536 *
1537  IF( wntvn ) THEN
1538 *
1539 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1540 * M left singular vectors to be computed in U and
1541 * no right singular vectors to be computed
1542 *
1543  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1544 *
1545 * Sufficient workspace for a fast algorithm
1546 *
1547  ir = 1
1548  IF( lwork.GE.wrkbl+lda*n ) THEN
1549 *
1550 * WORK(IR) is LDA by N
1551 *
1552  ldwrkr = lda
1553  ELSE
1554 *
1555 * WORK(IR) is N by N
1556 *
1557  ldwrkr = n
1558  END IF
1559  itau = ir + ldwrkr*n
1560  iwork = itau + n
1561 *
1562 * Compute A=Q*R, copying result to U
1563 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1564 * (RWorkspace: 0)
1565 *
1566  CALL zgeqrf( m, n, a, lda, work( itau ),
1567  $ work( iwork ), lwork-iwork+1, ierr )
1568  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1569 *
1570 * Copy R to WORK(IR), zeroing out below it
1571 *
1572  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1573  $ ldwrkr )
1574  CALL zlaset( 'L', n-1, n-1, czero, czero,
1575  $ work( ir+1 ), ldwrkr )
1576 *
1577 * Generate Q in U
1578 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1579 * (RWorkspace: 0)
1580 *
1581  CALL zungqr( m, m, n, u, ldu, work( itau ),
1582  $ work( iwork ), lwork-iwork+1, ierr )
1583  ie = 1
1584  itauq = itau
1585  itaup = itauq + n
1586  iwork = itaup + n
1587 *
1588 * Bidiagonalize R in WORK(IR)
1589 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1590 * (RWorkspace: need N)
1591 *
1592  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1593  $ rwork( ie ), work( itauq ),
1594  $ work( itaup ), work( iwork ),
1595  $ lwork-iwork+1, ierr )
1596 *
1597 * Generate left bidiagonalizing vectors in WORK(IR)
1598 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1599 * (RWorkspace: 0)
1600 *
1601  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1602  $ work( itauq ), work( iwork ),
1603  $ lwork-iwork+1, ierr )
1604  irwork = ie + n
1605 *
1606 * Perform bidiagonal QR iteration, computing left
1607 * singular vectors of R in WORK(IR)
1608 * (CWorkspace: need N*N)
1609 * (RWorkspace: need BDSPAC)
1610 *
1611  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1612  $ 1, work( ir ), ldwrkr, cdum, 1,
1613  $ rwork( irwork ), info )
1614 *
1615 * Multiply Q in U by left singular vectors of R in
1616 * WORK(IR), storing result in A
1617 * (CWorkspace: need N*N)
1618 * (RWorkspace: 0)
1619 *
1620  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1621  $ work( ir ), ldwrkr, czero, a, lda )
1622 *
1623 * Copy left singular vectors of A from A to U
1624 *
1625  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1626 *
1627  ELSE
1628 *
1629 * Insufficient workspace for a fast algorithm
1630 *
1631  itau = 1
1632  iwork = itau + n
1633 *
1634 * Compute A=Q*R, copying result to U
1635 * (CWorkspace: need 2*N, prefer N+N*NB)
1636 * (RWorkspace: 0)
1637 *
1638  CALL zgeqrf( m, n, a, lda, work( itau ),
1639  $ work( iwork ), lwork-iwork+1, ierr )
1640  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1641 *
1642 * Generate Q in U
1643 * (CWorkspace: need N+M, prefer N+M*NB)
1644 * (RWorkspace: 0)
1645 *
1646  CALL zungqr( m, m, n, u, ldu, work( itau ),
1647  $ work( iwork ), lwork-iwork+1, ierr )
1648  ie = 1
1649  itauq = itau
1650  itaup = itauq + n
1651  iwork = itaup + n
1652 *
1653 * Zero out below R in A
1654 *
1655  IF( n .GT. 1 ) THEN
1656  CALL zlaset( 'L', n-1, n-1, czero, czero,
1657  $ a( 2, 1 ), lda )
1658  END IF
1659 *
1660 * Bidiagonalize R in A
1661 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1662 * (RWorkspace: need N)
1663 *
1664  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1665  $ work( itauq ), work( itaup ),
1666  $ work( iwork ), lwork-iwork+1, ierr )
1667 *
1668 * Multiply Q in U by left bidiagonalizing vectors
1669 * in A
1670 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1671 * (RWorkspace: 0)
1672 *
1673  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1674  $ work( itauq ), u, ldu, work( iwork ),
1675  $ lwork-iwork+1, ierr )
1676  irwork = ie + n
1677 *
1678 * Perform bidiagonal QR iteration, computing left
1679 * singular vectors of A in U
1680 * (CWorkspace: 0)
1681 * (RWorkspace: need BDSPAC)
1682 *
1683  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1684  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1685  $ info )
1686 *
1687  END IF
1688 *
1689  ELSE IF( wntvo ) THEN
1690 *
1691 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1692 * M left singular vectors to be computed in U and
1693 * N right singular vectors to be overwritten on A
1694 *
1695  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1696 *
1697 * Sufficient workspace for a fast algorithm
1698 *
1699  iu = 1
1700  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1701 *
1702 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1703 *
1704  ldwrku = lda
1705  ir = iu + ldwrku*n
1706  ldwrkr = lda
1707  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1708 *
1709 * WORK(IU) is LDA by N and WORK(IR) is N by N
1710 *
1711  ldwrku = lda
1712  ir = iu + ldwrku*n
1713  ldwrkr = n
1714  ELSE
1715 *
1716 * WORK(IU) is N by N and WORK(IR) is N by N
1717 *
1718  ldwrku = n
1719  ir = iu + ldwrku*n
1720  ldwrkr = n
1721  END IF
1722  itau = ir + ldwrkr*n
1723  iwork = itau + n
1724 *
1725 * Compute A=Q*R, copying result to U
1726 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1727 * (RWorkspace: 0)
1728 *
1729  CALL zgeqrf( m, n, a, lda, work( itau ),
1730  $ work( iwork ), lwork-iwork+1, ierr )
1731  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1732 *
1733 * Generate Q in U
1734 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1735 * (RWorkspace: 0)
1736 *
1737  CALL zungqr( m, m, n, u, ldu, work( itau ),
1738  $ work( iwork ), lwork-iwork+1, ierr )
1739 *
1740 * Copy R to WORK(IU), zeroing out below it
1741 *
1742  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1743  $ ldwrku )
1744  CALL zlaset( 'L', n-1, n-1, czero, czero,
1745  $ work( iu+1 ), ldwrku )
1746  ie = 1
1747  itauq = itau
1748  itaup = itauq + n
1749  iwork = itaup + n
1750 *
1751 * Bidiagonalize R in WORK(IU), copying result to
1752 * WORK(IR)
1753 * (CWorkspace: need 2*N*N+3*N,
1754 * prefer 2*N*N+2*N+2*N*NB)
1755 * (RWorkspace: need N)
1756 *
1757  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1758  $ rwork( ie ), work( itauq ),
1759  $ work( itaup ), work( iwork ),
1760  $ lwork-iwork+1, ierr )
1761  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1762  $ work( ir ), ldwrkr )
1763 *
1764 * Generate left bidiagonalizing vectors in WORK(IU)
1765 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1766 * (RWorkspace: 0)
1767 *
1768  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1769  $ work( itauq ), work( iwork ),
1770  $ lwork-iwork+1, ierr )
1771 *
1772 * Generate right bidiagonalizing vectors in WORK(IR)
1773 * (CWorkspace: need 2*N*N+3*N-1,
1774 * prefer 2*N*N+2*N+(N-1)*NB)
1775 * (RWorkspace: 0)
1776 *
1777  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1778  $ work( itaup ), work( iwork ),
1779  $ lwork-iwork+1, ierr )
1780  irwork = ie + n
1781 *
1782 * Perform bidiagonal QR iteration, computing left
1783 * singular vectors of R in WORK(IU) and computing
1784 * right singular vectors of R in WORK(IR)
1785 * (CWorkspace: need 2*N*N)
1786 * (RWorkspace: need BDSPAC)
1787 *
1788  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1789  $ work( ir ), ldwrkr, work( iu ),
1790  $ ldwrku, cdum, 1, rwork( irwork ),
1791  $ info )
1792 *
1793 * Multiply Q in U by left singular vectors of R in
1794 * WORK(IU), storing result in A
1795 * (CWorkspace: need N*N)
1796 * (RWorkspace: 0)
1797 *
1798  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1799  $ work( iu ), ldwrku, czero, a, lda )
1800 *
1801 * Copy left singular vectors of A from A to U
1802 *
1803  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1804 *
1805 * Copy right singular vectors of R from WORK(IR) to A
1806 *
1807  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1808  $ lda )
1809 *
1810  ELSE
1811 *
1812 * Insufficient workspace for a fast algorithm
1813 *
1814  itau = 1
1815  iwork = itau + n
1816 *
1817 * Compute A=Q*R, copying result to U
1818 * (CWorkspace: need 2*N, prefer N+N*NB)
1819 * (RWorkspace: 0)
1820 *
1821  CALL zgeqrf( m, n, a, lda, work( itau ),
1822  $ work( iwork ), lwork-iwork+1, ierr )
1823  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1824 *
1825 * Generate Q in U
1826 * (CWorkspace: need N+M, prefer N+M*NB)
1827 * (RWorkspace: 0)
1828 *
1829  CALL zungqr( m, m, n, u, ldu, work( itau ),
1830  $ work( iwork ), lwork-iwork+1, ierr )
1831  ie = 1
1832  itauq = itau
1833  itaup = itauq + n
1834  iwork = itaup + n
1835 *
1836 * Zero out below R in A
1837 *
1838  IF( n .GT. 1 ) THEN
1839  CALL zlaset( 'L', n-1, n-1, czero, czero,
1840  $ a( 2, 1 ), lda )
1841  END IF
1842 *
1843 * Bidiagonalize R in A
1844 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1845 * (RWorkspace: need N)
1846 *
1847  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1848  $ work( itauq ), work( itaup ),
1849  $ work( iwork ), lwork-iwork+1, ierr )
1850 *
1851 * Multiply Q in U by left bidiagonalizing vectors
1852 * in A
1853 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1854 * (RWorkspace: 0)
1855 *
1856  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1857  $ work( itauq ), u, ldu, work( iwork ),
1858  $ lwork-iwork+1, ierr )
1859 *
1860 * Generate right bidiagonalizing vectors in A
1861 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1862 * (RWorkspace: 0)
1863 *
1864  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1865  $ work( iwork ), lwork-iwork+1, ierr )
1866  irwork = ie + n
1867 *
1868 * Perform bidiagonal QR iteration, computing left
1869 * singular vectors of A in U and computing right
1870 * singular vectors of A in A
1871 * (CWorkspace: 0)
1872 * (RWorkspace: need BDSPAC)
1873 *
1874  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1875  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1876  $ info )
1877 *
1878  END IF
1879 *
1880  ELSE IF( wntvas ) THEN
1881 *
1882 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1883 * or 'A')
1884 * M left singular vectors to be computed in U and
1885 * N right singular vectors to be computed in VT
1886 *
1887  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1888 *
1889 * Sufficient workspace for a fast algorithm
1890 *
1891  iu = 1
1892  IF( lwork.GE.wrkbl+lda*n ) THEN
1893 *
1894 * WORK(IU) is LDA by N
1895 *
1896  ldwrku = lda
1897  ELSE
1898 *
1899 * WORK(IU) is N by N
1900 *
1901  ldwrku = n
1902  END IF
1903  itau = iu + ldwrku*n
1904  iwork = itau + n
1905 *
1906 * Compute A=Q*R, copying result to U
1907 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1908 * (RWorkspace: 0)
1909 *
1910  CALL zgeqrf( m, n, a, lda, work( itau ),
1911  $ work( iwork ), lwork-iwork+1, ierr )
1912  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1913 *
1914 * Generate Q in U
1915 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1916 * (RWorkspace: 0)
1917 *
1918  CALL zungqr( m, m, n, u, ldu, work( itau ),
1919  $ work( iwork ), lwork-iwork+1, ierr )
1920 *
1921 * Copy R to WORK(IU), zeroing out below it
1922 *
1923  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1924  $ ldwrku )
1925  CALL zlaset( 'L', n-1, n-1, czero, czero,
1926  $ work( iu+1 ), ldwrku )
1927  ie = 1
1928  itauq = itau
1929  itaup = itauq + n
1930  iwork = itaup + n
1931 *
1932 * Bidiagonalize R in WORK(IU), copying result to VT
1933 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1934 * (RWorkspace: need N)
1935 *
1936  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1937  $ rwork( ie ), work( itauq ),
1938  $ work( itaup ), work( iwork ),
1939  $ lwork-iwork+1, ierr )
1940  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1941  $ ldvt )
1942 *
1943 * Generate left bidiagonalizing vectors in WORK(IU)
1944 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1945 * (RWorkspace: 0)
1946 *
1947  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1948  $ work( itauq ), work( iwork ),
1949  $ lwork-iwork+1, ierr )
1950 *
1951 * Generate right bidiagonalizing vectors in VT
1952 * (CWorkspace: need N*N+3*N-1,
1953 * prefer N*N+2*N+(N-1)*NB)
1954 * (RWorkspace: need 0)
1955 *
1956  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1957  $ work( iwork ), lwork-iwork+1, ierr )
1958  irwork = ie + n
1959 *
1960 * Perform bidiagonal QR iteration, computing left
1961 * singular vectors of R in WORK(IU) and computing
1962 * right singular vectors of R in VT
1963 * (CWorkspace: need N*N)
1964 * (RWorkspace: need BDSPAC)
1965 *
1966  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1967  $ ldvt, work( iu ), ldwrku, cdum, 1,
1968  $ rwork( irwork ), info )
1969 *
1970 * Multiply Q in U by left singular vectors of R in
1971 * WORK(IU), storing result in A
1972 * (CWorkspace: need N*N)
1973 * (RWorkspace: 0)
1974 *
1975  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1976  $ work( iu ), ldwrku, czero, a, lda )
1977 *
1978 * Copy left singular vectors of A from A to U
1979 *
1980  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1981 *
1982  ELSE
1983 *
1984 * Insufficient workspace for a fast algorithm
1985 *
1986  itau = 1
1987  iwork = itau + n
1988 *
1989 * Compute A=Q*R, copying result to U
1990 * (CWorkspace: need 2*N, prefer N+N*NB)
1991 * (RWorkspace: 0)
1992 *
1993  CALL zgeqrf( m, n, a, lda, work( itau ),
1994  $ work( iwork ), lwork-iwork+1, ierr )
1995  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1996 *
1997 * Generate Q in U
1998 * (CWorkspace: need N+M, prefer N+M*NB)
1999 * (RWorkspace: 0)
2000 *
2001  CALL zungqr( m, m, n, u, ldu, work( itau ),
2002  $ work( iwork ), lwork-iwork+1, ierr )
2003 *
2004 * Copy R from A to VT, zeroing out below it
2005 *
2006  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2007  IF( n.GT.1 )
2008  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
2009  $ vt( 2, 1 ), ldvt )
2010  ie = 1
2011  itauq = itau
2012  itaup = itauq + n
2013  iwork = itaup + n
2014 *
2015 * Bidiagonalize R in VT
2016 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2017 * (RWorkspace: need N)
2018 *
2019  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
2020  $ work( itauq ), work( itaup ),
2021  $ work( iwork ), lwork-iwork+1, ierr )
2022 *
2023 * Multiply Q in U by left bidiagonalizing vectors
2024 * in VT
2025 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2026 * (RWorkspace: 0)
2027 *
2028  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2029  $ work( itauq ), u, ldu, work( iwork ),
2030  $ lwork-iwork+1, ierr )
2031 *
2032 * Generate right bidiagonalizing vectors in VT
2033 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2034 * (RWorkspace: 0)
2035 *
2036  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2037  $ work( iwork ), lwork-iwork+1, ierr )
2038  irwork = ie + n
2039 *
2040 * Perform bidiagonal QR iteration, computing left
2041 * singular vectors of A in U and computing right
2042 * singular vectors of A in VT
2043 * (CWorkspace: 0)
2044 * (RWorkspace: need BDSPAC)
2045 *
2046  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2047  $ ldvt, u, ldu, cdum, 1,
2048  $ rwork( irwork ), info )
2049 *
2050  END IF
2051 *
2052  END IF
2053 *
2054  END IF
2055 *
2056  ELSE
2057 *
2058 * M .LT. MNTHR
2059 *
2060 * Path 10 (M at least N, but not much larger)
2061 * Reduce to bidiagonal form without QR decomposition
2062 *
2063  ie = 1
2064  itauq = 1
2065  itaup = itauq + n
2066  iwork = itaup + n
2067 *
2068 * Bidiagonalize A
2069 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2070 * (RWorkspace: need N)
2071 *
2072  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2073  $ work( itaup ), work( iwork ), lwork-iwork+1,
2074  $ ierr )
2075  IF( wntuas ) THEN
2076 *
2077 * If left singular vectors desired in U, copy result to U
2078 * and generate left bidiagonalizing vectors in U
2079 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2080 * (RWorkspace: 0)
2081 *
2082  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
2083  IF( wntus )
2084  $ ncu = n
2085  IF( wntua )
2086  $ ncu = m
2087  CALL zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2088  $ work( iwork ), lwork-iwork+1, ierr )
2089  END IF
2090  IF( wntvas ) THEN
2091 *
2092 * If right singular vectors desired in VT, copy result to
2093 * VT and generate right bidiagonalizing vectors in VT
2094 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2095 * (RWorkspace: 0)
2096 *
2097  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2098  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2099  $ work( iwork ), lwork-iwork+1, ierr )
2100  END IF
2101  IF( wntuo ) THEN
2102 *
2103 * If left singular vectors desired in A, generate left
2104 * bidiagonalizing vectors in A
2105 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2106 * (RWorkspace: 0)
2107 *
2108  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
2109  $ work( iwork ), lwork-iwork+1, ierr )
2110  END IF
2111  IF( wntvo ) THEN
2112 *
2113 * If right singular vectors desired in A, generate right
2114 * bidiagonalizing vectors in A
2115 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2116 * (RWorkspace: 0)
2117 *
2118  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
2119  $ work( iwork ), lwork-iwork+1, ierr )
2120  END IF
2121  irwork = ie + n
2122  IF( wntuas .OR. wntuo )
2123  $ nru = m
2124  IF( wntun )
2125  $ nru = 0
2126  IF( wntvas .OR. wntvo )
2127  $ ncvt = n
2128  IF( wntvn )
2129  $ ncvt = 0
2130  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2131 *
2132 * Perform bidiagonal QR iteration, if desired, computing
2133 * left singular vectors in U and computing right singular
2134 * vectors in VT
2135 * (CWorkspace: 0)
2136 * (RWorkspace: need BDSPAC)
2137 *
2138  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2139  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2140  $ info )
2141  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2142 *
2143 * Perform bidiagonal QR iteration, if desired, computing
2144 * left singular vectors in U and computing right singular
2145 * vectors in A
2146 * (CWorkspace: 0)
2147 * (RWorkspace: need BDSPAC)
2148 *
2149  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2150  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2151  $ info )
2152  ELSE
2153 *
2154 * Perform bidiagonal QR iteration, if desired, computing
2155 * left singular vectors in A and computing right singular
2156 * vectors in VT
2157 * (CWorkspace: 0)
2158 * (RWorkspace: need BDSPAC)
2159 *
2160  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2161  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2162  $ info )
2163  END IF
2164 *
2165  END IF
2166 *
2167  ELSE
2168 *
2169 * A has more columns than rows. If A has sufficiently more
2170 * columns than rows, first reduce using the LQ decomposition (if
2171 * sufficient workspace available)
2172 *
2173  IF( n.GE.mnthr ) THEN
2174 *
2175  IF( wntvn ) THEN
2176 *
2177 * Path 1t(N much larger than M, JOBVT='N')
2178 * No right singular vectors to be computed
2179 *
2180  itau = 1
2181  iwork = itau + m
2182 *
2183 * Compute A=L*Q
2184 * (CWorkspace: need 2*M, prefer M+M*NB)
2185 * (RWorkspace: 0)
2186 *
2187  CALL zgelqf( m, n, a, lda, work( itau ), work( iwork ),
2188  $ lwork-iwork+1, ierr )
2189 *
2190 * Zero out above L
2191 *
2192  CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2193  $ lda )
2194  ie = 1
2195  itauq = 1
2196  itaup = itauq + m
2197  iwork = itaup + m
2198 *
2199 * Bidiagonalize L in A
2200 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2201 * (RWorkspace: need M)
2202 *
2203  CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2204  $ work( itaup ), work( iwork ), lwork-iwork+1,
2205  $ ierr )
2206  IF( wntuo .OR. wntuas ) THEN
2207 *
2208 * If left singular vectors desired, generate Q
2209 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2210 * (RWorkspace: 0)
2211 *
2212  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2213  $ work( iwork ), lwork-iwork+1, ierr )
2214  END IF
2215  irwork = ie + m
2216  nru = 0
2217  IF( wntuo .OR. wntuas )
2218  $ nru = m
2219 *
2220 * Perform bidiagonal QR iteration, computing left singular
2221 * vectors of A in A if desired
2222 * (CWorkspace: 0)
2223 * (RWorkspace: need BDSPAC)
2224 *
2225  CALL zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2226  $ a, lda, cdum, 1, rwork( irwork ), info )
2227 *
2228 * If left singular vectors desired in U, copy them there
2229 *
2230  IF( wntuas )
2231  $ CALL zlacpy( 'F', m, m, a, lda, u, ldu )
2232 *
2233  ELSE IF( wntvo .AND. wntun ) THEN
2234 *
2235 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2236 * M right singular vectors to be overwritten on A and
2237 * no left singular vectors to be computed
2238 *
2239  IF( lwork.GE.m*m+3*m ) THEN
2240 *
2241 * Sufficient workspace for a fast algorithm
2242 *
2243  ir = 1
2244  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2245 *
2246 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2247 *
2248  ldwrku = lda
2249  chunk = n
2250  ldwrkr = lda
2251  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2252 *
2253 * WORK(IU) is LDA by N and WORK(IR) is M by M
2254 *
2255  ldwrku = lda
2256  chunk = n
2257  ldwrkr = m
2258  ELSE
2259 *
2260 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2261 *
2262  ldwrku = m
2263  chunk = ( lwork-m*m ) / m
2264  ldwrkr = m
2265  END IF
2266  itau = ir + ldwrkr*m
2267  iwork = itau + m
2268 *
2269 * Compute A=L*Q
2270 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2271 * (RWorkspace: 0)
2272 *
2273  CALL zgelqf( m, n, a, lda, work( itau ),
2274  $ work( iwork ), lwork-iwork+1, ierr )
2275 *
2276 * Copy L to WORK(IR) and zero out above it
2277 *
2278  CALL zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2279  CALL zlaset( 'U', m-1, m-1, czero, czero,
2280  $ work( ir+ldwrkr ), ldwrkr )
2281 *
2282 * Generate Q in A
2283 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2284 * (RWorkspace: 0)
2285 *
2286  CALL zunglq( m, n, m, a, lda, work( itau ),
2287  $ work( iwork ), lwork-iwork+1, ierr )
2288  ie = 1
2289  itauq = itau
2290  itaup = itauq + m
2291  iwork = itaup + m
2292 *
2293 * Bidiagonalize L in WORK(IR)
2294 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2295 * (RWorkspace: need M)
2296 *
2297  CALL zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2298  $ work( itauq ), work( itaup ),
2299  $ work( iwork ), lwork-iwork+1, ierr )
2300 *
2301 * Generate right vectors bidiagonalizing L
2302 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2303 * (RWorkspace: 0)
2304 *
2305  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2306  $ work( itaup ), work( iwork ),
2307  $ lwork-iwork+1, ierr )
2308  irwork = ie + m
2309 *
2310 * Perform bidiagonal QR iteration, computing right
2311 * singular vectors of L in WORK(IR)
2312 * (CWorkspace: need M*M)
2313 * (RWorkspace: need BDSPAC)
2314 *
2315  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2316  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2317  $ rwork( irwork ), info )
2318  iu = itauq
2319 *
2320 * Multiply right singular vectors of L in WORK(IR) by Q
2321 * in A, storing result in WORK(IU) and copying to A
2322 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2323 * (RWorkspace: 0)
2324 *
2325  DO 30 i = 1, n, chunk
2326  blk = min( n-i+1, chunk )
2327  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2328  $ ldwrkr, a( 1, i ), lda, czero,
2329  $ work( iu ), ldwrku )
2330  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2331  $ a( 1, i ), lda )
2332  30 CONTINUE
2333 *
2334  ELSE
2335 *
2336 * Insufficient workspace for a fast algorithm
2337 *
2338  ie = 1
2339  itauq = 1
2340  itaup = itauq + m
2341  iwork = itaup + m
2342 *
2343 * Bidiagonalize A
2344 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2345 * (RWorkspace: need M)
2346 *
2347  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
2348  $ work( itauq ), work( itaup ),
2349  $ work( iwork ), lwork-iwork+1, ierr )
2350 *
2351 * Generate right vectors bidiagonalizing A
2352 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2353 * (RWorkspace: 0)
2354 *
2355  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
2356  $ work( iwork ), lwork-iwork+1, ierr )
2357  irwork = ie + m
2358 *
2359 * Perform bidiagonal QR iteration, computing right
2360 * singular vectors of A in A
2361 * (CWorkspace: 0)
2362 * (RWorkspace: need BDSPAC)
2363 *
2364  CALL zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2365  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2366 *
2367  END IF
2368 *
2369  ELSE IF( wntvo .AND. wntuas ) THEN
2370 *
2371 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2372 * M right singular vectors to be overwritten on A and
2373 * M left singular vectors to be computed in U
2374 *
2375  IF( lwork.GE.m*m+3*m ) THEN
2376 *
2377 * Sufficient workspace for a fast algorithm
2378 *
2379  ir = 1
2380  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2381 *
2382 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2383 *
2384  ldwrku = lda
2385  chunk = n
2386  ldwrkr = lda
2387  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2388 *
2389 * WORK(IU) is LDA by N and WORK(IR) is M by M
2390 *
2391  ldwrku = lda
2392  chunk = n
2393  ldwrkr = m
2394  ELSE
2395 *
2396 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2397 *
2398  ldwrku = m
2399  chunk = ( lwork-m*m ) / m
2400  ldwrkr = m
2401  END IF
2402  itau = ir + ldwrkr*m
2403  iwork = itau + m
2404 *
2405 * Compute A=L*Q
2406 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2407 * (RWorkspace: 0)
2408 *
2409  CALL zgelqf( m, n, a, lda, work( itau ),
2410  $ work( iwork ), lwork-iwork+1, ierr )
2411 *
2412 * Copy L to U, zeroing about above it
2413 *
2414  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2415  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2416  $ ldu )
2417 *
2418 * Generate Q in A
2419 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2420 * (RWorkspace: 0)
2421 *
2422  CALL zunglq( m, n, m, a, lda, work( itau ),
2423  $ work( iwork ), lwork-iwork+1, ierr )
2424  ie = 1
2425  itauq = itau
2426  itaup = itauq + m
2427  iwork = itaup + m
2428 *
2429 * Bidiagonalize L in U, copying result to WORK(IR)
2430 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2431 * (RWorkspace: need M)
2432 *
2433  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2434  $ work( itauq ), work( itaup ),
2435  $ work( iwork ), lwork-iwork+1, ierr )
2436  CALL zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2437 *
2438 * Generate right vectors bidiagonalizing L in WORK(IR)
2439 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2440 * (RWorkspace: 0)
2441 *
2442  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2443  $ work( itaup ), work( iwork ),
2444  $ lwork-iwork+1, ierr )
2445 *
2446 * Generate left vectors bidiagonalizing L in U
2447 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2448 * (RWorkspace: 0)
2449 *
2450  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2451  $ work( iwork ), lwork-iwork+1, ierr )
2452  irwork = ie + m
2453 *
2454 * Perform bidiagonal QR iteration, computing left
2455 * singular vectors of L in U, and computing right
2456 * singular vectors of L in WORK(IR)
2457 * (CWorkspace: need M*M)
2458 * (RWorkspace: need BDSPAC)
2459 *
2460  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2461  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2462  $ rwork( irwork ), info )
2463  iu = itauq
2464 *
2465 * Multiply right singular vectors of L in WORK(IR) by Q
2466 * in A, storing result in WORK(IU) and copying to A
2467 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2468 * (RWorkspace: 0)
2469 *
2470  DO 40 i = 1, n, chunk
2471  blk = min( n-i+1, chunk )
2472  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2473  $ ldwrkr, a( 1, i ), lda, czero,
2474  $ work( iu ), ldwrku )
2475  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2476  $ a( 1, i ), lda )
2477  40 CONTINUE
2478 *
2479  ELSE
2480 *
2481 * Insufficient workspace for a fast algorithm
2482 *
2483  itau = 1
2484  iwork = itau + m
2485 *
2486 * Compute A=L*Q
2487 * (CWorkspace: need 2*M, prefer M+M*NB)
2488 * (RWorkspace: 0)
2489 *
2490  CALL zgelqf( m, n, a, lda, work( itau ),
2491  $ work( iwork ), lwork-iwork+1, ierr )
2492 *
2493 * Copy L to U, zeroing out above it
2494 *
2495  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2496  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2497  $ ldu )
2498 *
2499 * Generate Q in A
2500 * (CWorkspace: need 2*M, prefer M+M*NB)
2501 * (RWorkspace: 0)
2502 *
2503  CALL zunglq( m, n, m, a, lda, work( itau ),
2504  $ work( iwork ), lwork-iwork+1, ierr )
2505  ie = 1
2506  itauq = itau
2507  itaup = itauq + m
2508  iwork = itaup + m
2509 *
2510 * Bidiagonalize L in U
2511 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2512 * (RWorkspace: need M)
2513 *
2514  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2515  $ work( itauq ), work( itaup ),
2516  $ work( iwork ), lwork-iwork+1, ierr )
2517 *
2518 * Multiply right vectors bidiagonalizing L by Q in A
2519 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2520 * (RWorkspace: 0)
2521 *
2522  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2523  $ work( itaup ), a, lda, work( iwork ),
2524  $ lwork-iwork+1, ierr )
2525 *
2526 * Generate left vectors bidiagonalizing L in U
2527 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2528 * (RWorkspace: 0)
2529 *
2530  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2531  $ work( iwork ), lwork-iwork+1, ierr )
2532  irwork = ie + m
2533 *
2534 * Perform bidiagonal QR iteration, computing left
2535 * singular vectors of A in U and computing right
2536 * singular vectors of A in A
2537 * (CWorkspace: 0)
2538 * (RWorkspace: need BDSPAC)
2539 *
2540  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2541  $ u, ldu, cdum, 1, rwork( irwork ), info )
2542 *
2543  END IF
2544 *
2545  ELSE IF( wntvs ) THEN
2546 *
2547  IF( wntun ) THEN
2548 *
2549 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2550 * M right singular vectors to be computed in VT and
2551 * no left singular vectors to be computed
2552 *
2553  IF( lwork.GE.m*m+3*m ) THEN
2554 *
2555 * Sufficient workspace for a fast algorithm
2556 *
2557  ir = 1
2558  IF( lwork.GE.wrkbl+lda*m ) THEN
2559 *
2560 * WORK(IR) is LDA by M
2561 *
2562  ldwrkr = lda
2563  ELSE
2564 *
2565 * WORK(IR) is M by M
2566 *
2567  ldwrkr = m
2568  END IF
2569  itau = ir + ldwrkr*m
2570  iwork = itau + m
2571 *
2572 * Compute A=L*Q
2573 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2574 * (RWorkspace: 0)
2575 *
2576  CALL zgelqf( m, n, a, lda, work( itau ),
2577  $ work( iwork ), lwork-iwork+1, ierr )
2578 *
2579 * Copy L to WORK(IR), zeroing out above it
2580 *
2581  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
2582  $ ldwrkr )
2583  CALL zlaset( 'U', m-1, m-1, czero, czero,
2584  $ work( ir+ldwrkr ), ldwrkr )
2585 *
2586 * Generate Q in A
2587 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2588 * (RWorkspace: 0)
2589 *
2590  CALL zunglq( m, n, m, a, lda, work( itau ),
2591  $ work( iwork ), lwork-iwork+1, ierr )
2592  ie = 1
2593  itauq = itau
2594  itaup = itauq + m
2595  iwork = itaup + m
2596 *
2597 * Bidiagonalize L in WORK(IR)
2598 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2599 * (RWorkspace: need M)
2600 *
2601  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
2602  $ rwork( ie ), work( itauq ),
2603  $ work( itaup ), work( iwork ),
2604  $ lwork-iwork+1, ierr )
2605 *
2606 * Generate right vectors bidiagonalizing L in
2607 * WORK(IR)
2608 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2609 * (RWorkspace: 0)
2610 *
2611  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2612  $ work( itaup ), work( iwork ),
2613  $ lwork-iwork+1, ierr )
2614  irwork = ie + m
2615 *
2616 * Perform bidiagonal QR iteration, computing right
2617 * singular vectors of L in WORK(IR)
2618 * (CWorkspace: need M*M)
2619 * (RWorkspace: need BDSPAC)
2620 *
2621  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2622  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2623  $ rwork( irwork ), info )
2624 *
2625 * Multiply right singular vectors of L in WORK(IR) by
2626 * Q in A, storing result in VT
2627 * (CWorkspace: need M*M)
2628 * (RWorkspace: 0)
2629 *
2630  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
2631  $ ldwrkr, a, lda, czero, vt, ldvt )
2632 *
2633  ELSE
2634 *
2635 * Insufficient workspace for a fast algorithm
2636 *
2637  itau = 1
2638  iwork = itau + m
2639 *
2640 * Compute A=L*Q
2641 * (CWorkspace: need 2*M, prefer M+M*NB)
2642 * (RWorkspace: 0)
2643 *
2644  CALL zgelqf( m, n, a, lda, work( itau ),
2645  $ work( iwork ), lwork-iwork+1, ierr )
2646 *
2647 * Copy result to VT
2648 *
2649  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2650 *
2651 * Generate Q in VT
2652 * (CWorkspace: need 2*M, prefer M+M*NB)
2653 * (RWorkspace: 0)
2654 *
2655  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2656  $ work( iwork ), lwork-iwork+1, ierr )
2657  ie = 1
2658  itauq = itau
2659  itaup = itauq + m
2660  iwork = itaup + m
2661 *
2662 * Zero out above L in A
2663 *
2664  CALL zlaset( 'U', m-1, m-1, czero, czero,
2665  $ a( 1, 2 ), lda )
2666 *
2667 * Bidiagonalize L in A
2668 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2669 * (RWorkspace: need M)
2670 *
2671  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2672  $ work( itauq ), work( itaup ),
2673  $ work( iwork ), lwork-iwork+1, ierr )
2674 *
2675 * Multiply right vectors bidiagonalizing L by Q in VT
2676 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2677 * (RWorkspace: 0)
2678 *
2679  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2680  $ work( itaup ), vt, ldvt,
2681  $ work( iwork ), lwork-iwork+1, ierr )
2682  irwork = ie + m
2683 *
2684 * Perform bidiagonal QR iteration, computing right
2685 * singular vectors of A in VT
2686 * (CWorkspace: 0)
2687 * (RWorkspace: need BDSPAC)
2688 *
2689  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2690  $ ldvt, cdum, 1, cdum, 1,
2691  $ rwork( irwork ), info )
2692 *
2693  END IF
2694 *
2695  ELSE IF( wntuo ) THEN
2696 *
2697 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2698 * M right singular vectors to be computed in VT and
2699 * M left singular vectors to be overwritten on A
2700 *
2701  IF( lwork.GE.2*m*m+3*m ) THEN
2702 *
2703 * Sufficient workspace for a fast algorithm
2704 *
2705  iu = 1
2706  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2707 *
2708 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2709 *
2710  ldwrku = lda
2711  ir = iu + ldwrku*m
2712  ldwrkr = lda
2713  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2714 *
2715 * WORK(IU) is LDA by M and WORK(IR) is M by M
2716 *
2717  ldwrku = lda
2718  ir = iu + ldwrku*m
2719  ldwrkr = m
2720  ELSE
2721 *
2722 * WORK(IU) is M by M and WORK(IR) is M by M
2723 *
2724  ldwrku = m
2725  ir = iu + ldwrku*m
2726  ldwrkr = m
2727  END IF
2728  itau = ir + ldwrkr*m
2729  iwork = itau + m
2730 *
2731 * Compute A=L*Q
2732 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2733 * (RWorkspace: 0)
2734 *
2735  CALL zgelqf( m, n, a, lda, work( itau ),
2736  $ work( iwork ), lwork-iwork+1, ierr )
2737 *
2738 * Copy L to WORK(IU), zeroing out below it
2739 *
2740  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2741  $ ldwrku )
2742  CALL zlaset( 'U', m-1, m-1, czero, czero,
2743  $ work( iu+ldwrku ), ldwrku )
2744 *
2745 * Generate Q in A
2746 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2747 * (RWorkspace: 0)
2748 *
2749  CALL zunglq( m, n, m, a, lda, work( itau ),
2750  $ work( iwork ), lwork-iwork+1, ierr )
2751  ie = 1
2752  itauq = itau
2753  itaup = itauq + m
2754  iwork = itaup + m
2755 *
2756 * Bidiagonalize L in WORK(IU), copying result to
2757 * WORK(IR)
2758 * (CWorkspace: need 2*M*M+3*M,
2759 * prefer 2*M*M+2*M+2*M*NB)
2760 * (RWorkspace: need M)
2761 *
2762  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2763  $ rwork( ie ), work( itauq ),
2764  $ work( itaup ), work( iwork ),
2765  $ lwork-iwork+1, ierr )
2766  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
2767  $ work( ir ), ldwrkr )
2768 *
2769 * Generate right bidiagonalizing vectors in WORK(IU)
2770 * (CWorkspace: need 2*M*M+3*M-1,
2771 * prefer 2*M*M+2*M+(M-1)*NB)
2772 * (RWorkspace: 0)
2773 *
2774  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2775  $ work( itaup ), work( iwork ),
2776  $ lwork-iwork+1, ierr )
2777 *
2778 * Generate left bidiagonalizing vectors in WORK(IR)
2779 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2780 * (RWorkspace: 0)
2781 *
2782  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2783  $ work( itauq ), work( iwork ),
2784  $ lwork-iwork+1, ierr )
2785  irwork = ie + m
2786 *
2787 * Perform bidiagonal QR iteration, computing left
2788 * singular vectors of L in WORK(IR) and computing
2789 * right singular vectors of L in WORK(IU)
2790 * (CWorkspace: need 2*M*M)
2791 * (RWorkspace: need BDSPAC)
2792 *
2793  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2794  $ work( iu ), ldwrku, work( ir ),
2795  $ ldwrkr, cdum, 1, rwork( irwork ),
2796  $ info )
2797 *
2798 * Multiply right singular vectors of L in WORK(IU) by
2799 * Q in A, storing result in VT
2800 * (CWorkspace: need M*M)
2801 * (RWorkspace: 0)
2802 *
2803  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2804  $ ldwrku, a, lda, czero, vt, ldvt )
2805 *
2806 * Copy left singular vectors of L to A
2807 * (CWorkspace: need M*M)
2808 * (RWorkspace: 0)
2809 *
2810  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2811  $ lda )
2812 *
2813  ELSE
2814 *
2815 * Insufficient workspace for a fast algorithm
2816 *
2817  itau = 1
2818  iwork = itau + m
2819 *
2820 * Compute A=L*Q, copying result to VT
2821 * (CWorkspace: need 2*M, prefer M+M*NB)
2822 * (RWorkspace: 0)
2823 *
2824  CALL zgelqf( m, n, a, lda, work( itau ),
2825  $ work( iwork ), lwork-iwork+1, ierr )
2826  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2827 *
2828 * Generate Q in VT
2829 * (CWorkspace: need 2*M, prefer M+M*NB)
2830 * (RWorkspace: 0)
2831 *
2832  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2833  $ work( iwork ), lwork-iwork+1, ierr )
2834  ie = 1
2835  itauq = itau
2836  itaup = itauq + m
2837  iwork = itaup + m
2838 *
2839 * Zero out above L in A
2840 *
2841  CALL zlaset( 'U', m-1, m-1, czero, czero,
2842  $ a( 1, 2 ), lda )
2843 *
2844 * Bidiagonalize L in A
2845 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2846 * (RWorkspace: need M)
2847 *
2848  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2849  $ work( itauq ), work( itaup ),
2850  $ work( iwork ), lwork-iwork+1, ierr )
2851 *
2852 * Multiply right vectors bidiagonalizing L by Q in VT
2853 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2854 * (RWorkspace: 0)
2855 *
2856  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2857  $ work( itaup ), vt, ldvt,
2858  $ work( iwork ), lwork-iwork+1, ierr )
2859 *
2860 * Generate left bidiagonalizing vectors of L in A
2861 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2862 * (RWorkspace: 0)
2863 *
2864  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2865  $ work( iwork ), lwork-iwork+1, ierr )
2866  irwork = ie + m
2867 *
2868 * Perform bidiagonal QR iteration, computing left
2869 * singular vectors of A in A and computing right
2870 * singular vectors of A in VT
2871 * (CWorkspace: 0)
2872 * (RWorkspace: need BDSPAC)
2873 *
2874  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2875  $ ldvt, a, lda, cdum, 1,
2876  $ rwork( irwork ), info )
2877 *
2878  END IF
2879 *
2880  ELSE IF( wntuas ) THEN
2881 *
2882 * Path 6t(N much larger than M, JOBU='S' or 'A',
2883 * JOBVT='S')
2884 * M right singular vectors to be computed in VT and
2885 * M left singular vectors to be computed in U
2886 *
2887  IF( lwork.GE.m*m+3*m ) THEN
2888 *
2889 * Sufficient workspace for a fast algorithm
2890 *
2891  iu = 1
2892  IF( lwork.GE.wrkbl+lda*m ) THEN
2893 *
2894 * WORK(IU) is LDA by N
2895 *
2896  ldwrku = lda
2897  ELSE
2898 *
2899 * WORK(IU) is LDA by M
2900 *
2901  ldwrku = m
2902  END IF
2903  itau = iu + ldwrku*m
2904  iwork = itau + m
2905 *
2906 * Compute A=L*Q
2907 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2908 * (RWorkspace: 0)
2909 *
2910  CALL zgelqf( m, n, a, lda, work( itau ),
2911  $ work( iwork ), lwork-iwork+1, ierr )
2912 *
2913 * Copy L to WORK(IU), zeroing out above it
2914 *
2915  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2916  $ ldwrku )
2917  CALL zlaset( 'U', m-1, m-1, czero, czero,
2918  $ work( iu+ldwrku ), ldwrku )
2919 *
2920 * Generate Q in A
2921 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2922 * (RWorkspace: 0)
2923 *
2924  CALL zunglq( m, n, m, a, lda, work( itau ),
2925  $ work( iwork ), lwork-iwork+1, ierr )
2926  ie = 1
2927  itauq = itau
2928  itaup = itauq + m
2929  iwork = itaup + m
2930 *
2931 * Bidiagonalize L in WORK(IU), copying result to U
2932 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2933 * (RWorkspace: need M)
2934 *
2935  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2936  $ rwork( ie ), work( itauq ),
2937  $ work( itaup ), work( iwork ),
2938  $ lwork-iwork+1, ierr )
2939  CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
2940  $ ldu )
2941 *
2942 * Generate right bidiagonalizing vectors in WORK(IU)
2943 * (CWorkspace: need M*M+3*M-1,
2944 * prefer M*M+2*M+(M-1)*NB)
2945 * (RWorkspace: 0)
2946 *
2947  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2948  $ work( itaup ), work( iwork ),
2949  $ lwork-iwork+1, ierr )
2950 *
2951 * Generate left bidiagonalizing vectors in U
2952 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2953 * (RWorkspace: 0)
2954 *
2955  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2956  $ work( iwork ), lwork-iwork+1, ierr )
2957  irwork = ie + m
2958 *
2959 * Perform bidiagonal QR iteration, computing left
2960 * singular vectors of L in U and computing right
2961 * singular vectors of L in WORK(IU)
2962 * (CWorkspace: need M*M)
2963 * (RWorkspace: need BDSPAC)
2964 *
2965  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2966  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2967  $ rwork( irwork ), info )
2968 *
2969 * Multiply right singular vectors of L in WORK(IU) by
2970 * Q in A, storing result in VT
2971 * (CWorkspace: need M*M)
2972 * (RWorkspace: 0)
2973 *
2974  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2975  $ ldwrku, a, lda, czero, vt, ldvt )
2976 *
2977  ELSE
2978 *
2979 * Insufficient workspace for a fast algorithm
2980 *
2981  itau = 1
2982  iwork = itau + m
2983 *
2984 * Compute A=L*Q, copying result to VT
2985 * (CWorkspace: need 2*M, prefer M+M*NB)
2986 * (RWorkspace: 0)
2987 *
2988  CALL zgelqf( m, n, a, lda, work( itau ),
2989  $ work( iwork ), lwork-iwork+1, ierr )
2990  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2991 *
2992 * Generate Q in VT
2993 * (CWorkspace: need 2*M, prefer M+M*NB)
2994 * (RWorkspace: 0)
2995 *
2996  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2997  $ work( iwork ), lwork-iwork+1, ierr )
2998 *
2999 * Copy L to U, zeroing out above it
3000 *
3001  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3002  CALL zlaset( 'U', m-1, m-1, czero, czero,
3003  $ u( 1, 2 ), ldu )
3004  ie = 1
3005  itauq = itau
3006  itaup = itauq + m
3007  iwork = itaup + m
3008 *
3009 * Bidiagonalize L in U
3010 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3011 * (RWorkspace: need M)
3012 *
3013  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3014  $ work( itauq ), work( itaup ),
3015  $ work( iwork ), lwork-iwork+1, ierr )
3016 *
3017 * Multiply right bidiagonalizing vectors in U by Q
3018 * in VT
3019 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3020 * (RWorkspace: 0)
3021 *
3022  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3023  $ work( itaup ), vt, ldvt,
3024  $ work( iwork ), lwork-iwork+1, ierr )
3025 *
3026 * Generate left bidiagonalizing vectors in U
3027 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3028 * (RWorkspace: 0)
3029 *
3030  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3031  $ work( iwork ), lwork-iwork+1, ierr )
3032  irwork = ie + m
3033 *
3034 * Perform bidiagonal QR iteration, computing left
3035 * singular vectors of A in U and computing right
3036 * singular vectors of A in VT
3037 * (CWorkspace: 0)
3038 * (RWorkspace: need BDSPAC)
3039 *
3040  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3041  $ ldvt, u, ldu, cdum, 1,
3042  $ rwork( irwork ), info )
3043 *
3044  END IF
3045 *
3046  END IF
3047 *
3048  ELSE IF( wntva ) THEN
3049 *
3050  IF( wntun ) THEN
3051 *
3052 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3053 * N right singular vectors to be computed in VT and
3054 * no left singular vectors to be computed
3055 *
3056  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3057 *
3058 * Sufficient workspace for a fast algorithm
3059 *
3060  ir = 1
3061  IF( lwork.GE.wrkbl+lda*m ) THEN
3062 *
3063 * WORK(IR) is LDA by M
3064 *
3065  ldwrkr = lda
3066  ELSE
3067 *
3068 * WORK(IR) is M by M
3069 *
3070  ldwrkr = m
3071  END IF
3072  itau = ir + ldwrkr*m
3073  iwork = itau + m
3074 *
3075 * Compute A=L*Q, copying result to VT
3076 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3077 * (RWorkspace: 0)
3078 *
3079  CALL zgelqf( m, n, a, lda, work( itau ),
3080  $ work( iwork ), lwork-iwork+1, ierr )
3081  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3082 *
3083 * Copy L to WORK(IR), zeroing out above it
3084 *
3085  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
3086  $ ldwrkr )
3087  CALL zlaset( 'U', m-1, m-1, czero, czero,
3088  $ work( ir+ldwrkr ), ldwrkr )
3089 *
3090 * Generate Q in VT
3091 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3092 * (RWorkspace: 0)
3093 *
3094  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3095  $ work( iwork ), lwork-iwork+1, ierr )
3096  ie = 1
3097  itauq = itau
3098  itaup = itauq + m
3099  iwork = itaup + m
3100 *
3101 * Bidiagonalize L in WORK(IR)
3102 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3103 * (RWorkspace: need M)
3104 *
3105  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
3106  $ rwork( ie ), work( itauq ),
3107  $ work( itaup ), work( iwork ),
3108  $ lwork-iwork+1, ierr )
3109 *
3110 * Generate right bidiagonalizing vectors in WORK(IR)
3111 * (CWorkspace: need M*M+3*M-1,
3112 * prefer M*M+2*M+(M-1)*NB)
3113 * (RWorkspace: 0)
3114 *
3115  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
3116  $ work( itaup ), work( iwork ),
3117  $ lwork-iwork+1, ierr )
3118  irwork = ie + m
3119 *
3120 * Perform bidiagonal QR iteration, computing right
3121 * singular vectors of L in WORK(IR)
3122 * (CWorkspace: need M*M)
3123 * (RWorkspace: need BDSPAC)
3124 *
3125  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3126  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3127  $ rwork( irwork ), info )
3128 *
3129 * Multiply right singular vectors of L in WORK(IR) by
3130 * Q in VT, storing result in A
3131 * (CWorkspace: need M*M)
3132 * (RWorkspace: 0)
3133 *
3134  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
3135  $ ldwrkr, vt, ldvt, czero, a, lda )
3136 *
3137 * Copy right singular vectors of A from A to VT
3138 *
3139  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3140 *
3141  ELSE
3142 *
3143 * Insufficient workspace for a fast algorithm
3144 *
3145  itau = 1
3146  iwork = itau + m
3147 *
3148 * Compute A=L*Q, copying result to VT
3149 * (CWorkspace: need 2*M, prefer M+M*NB)
3150 * (RWorkspace: 0)
3151 *
3152  CALL zgelqf( m, n, a, lda, work( itau ),
3153  $ work( iwork ), lwork-iwork+1, ierr )
3154  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3155 *
3156 * Generate Q in VT
3157 * (CWorkspace: need M+N, prefer M+N*NB)
3158 * (RWorkspace: 0)
3159 *
3160  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3161  $ work( iwork ), lwork-iwork+1, ierr )
3162  ie = 1
3163  itauq = itau
3164  itaup = itauq + m
3165  iwork = itaup + m
3166 *
3167 * Zero out above L in A
3168 *
3169  CALL zlaset( 'U', m-1, m-1, czero, czero,
3170  $ a( 1, 2 ), lda )
3171 *
3172 * Bidiagonalize L in A
3173 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3174 * (RWorkspace: need M)
3175 *
3176  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3177  $ work( itauq ), work( itaup ),
3178  $ work( iwork ), lwork-iwork+1, ierr )
3179 *
3180 * Multiply right bidiagonalizing vectors in A by Q
3181 * in VT
3182 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3183 * (RWorkspace: 0)
3184 *
3185  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3186  $ work( itaup ), vt, ldvt,
3187  $ work( iwork ), lwork-iwork+1, ierr )
3188  irwork = ie + m
3189 *
3190 * Perform bidiagonal QR iteration, computing right
3191 * singular vectors of A in VT
3192 * (CWorkspace: 0)
3193 * (RWorkspace: need BDSPAC)
3194 *
3195  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3196  $ ldvt, cdum, 1, cdum, 1,
3197  $ rwork( irwork ), info )
3198 *
3199  END IF
3200 *
3201  ELSE IF( wntuo ) THEN
3202 *
3203 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3204 * N right singular vectors to be computed in VT and
3205 * M left singular vectors to be overwritten on A
3206 *
3207  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3208 *
3209 * Sufficient workspace for a fast algorithm
3210 *
3211  iu = 1
3212  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3213 *
3214 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3215 *
3216  ldwrku = lda
3217  ir = iu + ldwrku*m
3218  ldwrkr = lda
3219  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3220 *
3221 * WORK(IU) is LDA by M and WORK(IR) is M by M
3222 *
3223  ldwrku = lda
3224  ir = iu + ldwrku*m
3225  ldwrkr = m
3226  ELSE
3227 *
3228 * WORK(IU) is M by M and WORK(IR) is M by M
3229 *
3230  ldwrku = m
3231  ir = iu + ldwrku*m
3232  ldwrkr = m
3233  END IF
3234  itau = ir + ldwrkr*m
3235  iwork = itau + m
3236 *
3237 * Compute A=L*Q, copying result to VT
3238 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3239 * (RWorkspace: 0)
3240 *
3241  CALL zgelqf( m, n, a, lda, work( itau ),
3242  $ work( iwork ), lwork-iwork+1, ierr )
3243  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3244 *
3245 * Generate Q in VT
3246 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3247 * (RWorkspace: 0)
3248 *
3249  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3250  $ work( iwork ), lwork-iwork+1, ierr )
3251 *
3252 * Copy L to WORK(IU), zeroing out above it
3253 *
3254  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3255  $ ldwrku )
3256  CALL zlaset( 'U', m-1, m-1, czero, czero,
3257  $ work( iu+ldwrku ), ldwrku )
3258  ie = 1
3259  itauq = itau
3260  itaup = itauq + m
3261  iwork = itaup + m
3262 *
3263 * Bidiagonalize L in WORK(IU), copying result to
3264 * WORK(IR)
3265 * (CWorkspace: need 2*M*M+3*M,
3266 * prefer 2*M*M+2*M+2*M*NB)
3267 * (RWorkspace: need M)
3268 *
3269  CALL zgebrd( m, m, work( iu ), ldwrku, s,
3270  $ rwork( ie ), work( itauq ),
3271  $ work( itaup ), work( iwork ),
3272  $ lwork-iwork+1, ierr )
3273  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
3274  $ work( ir ), ldwrkr )
3275 *
3276 * Generate right bidiagonalizing vectors in WORK(IU)
3277 * (CWorkspace: need 2*M*M+3*M-1,
3278 * prefer 2*M*M+2*M+(M-1)*NB)
3279 * (RWorkspace: 0)
3280 *
3281  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3282  $ work( itaup ), work( iwork ),
3283  $ lwork-iwork+1, ierr )
3284 *
3285 * Generate left bidiagonalizing vectors in WORK(IR)
3286 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3287 * (RWorkspace: 0)
3288 *
3289  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3290  $ work( itauq ), work( iwork ),
3291  $ lwork-iwork+1, ierr )
3292  irwork = ie + m
3293 *
3294 * Perform bidiagonal QR iteration, computing left
3295 * singular vectors of L in WORK(IR) and computing
3296 * right singular vectors of L in WORK(IU)
3297 * (CWorkspace: need 2*M*M)
3298 * (RWorkspace: need BDSPAC)
3299 *
3300  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3301  $ work( iu ), ldwrku, work( ir ),
3302  $ ldwrkr, cdum, 1, rwork( irwork ),
3303  $ info )
3304 *
3305 * Multiply right singular vectors of L in WORK(IU) by
3306 * Q in VT, storing result in A
3307 * (CWorkspace: need M*M)
3308 * (RWorkspace: 0)
3309 *
3310  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3311  $ ldwrku, vt, ldvt, czero, a, lda )
3312 *
3313 * Copy right singular vectors of A from A to VT
3314 *
3315  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3316 *
3317 * Copy left singular vectors of A from WORK(IR) to A
3318 *
3319  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3320  $ lda )
3321 *
3322  ELSE
3323 *
3324 * Insufficient workspace for a fast algorithm
3325 *
3326  itau = 1
3327  iwork = itau + m
3328 *
3329 * Compute A=L*Q, copying result to VT
3330 * (CWorkspace: need 2*M, prefer M+M*NB)
3331 * (RWorkspace: 0)
3332 *
3333  CALL zgelqf( m, n, a, lda, work( itau ),
3334  $ work( iwork ), lwork-iwork+1, ierr )
3335  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3336 *
3337 * Generate Q in VT
3338 * (CWorkspace: need M+N, prefer M+N*NB)
3339 * (RWorkspace: 0)
3340 *
3341  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3342  $ work( iwork ), lwork-iwork+1, ierr )
3343  ie = 1
3344  itauq = itau
3345  itaup = itauq + m
3346  iwork = itaup + m
3347 *
3348 * Zero out above L in A
3349 *
3350  CALL zlaset( 'U', m-1, m-1, czero, czero,
3351  $ a( 1, 2 ), lda )
3352 *
3353 * Bidiagonalize L in A
3354 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3355 * (RWorkspace: need M)
3356 *
3357  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3358  $ work( itauq ), work( itaup ),
3359  $ work( iwork ), lwork-iwork+1, ierr )
3360 *
3361 * Multiply right bidiagonalizing vectors in A by Q
3362 * in VT
3363 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3364 * (RWorkspace: 0)
3365 *
3366  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3367  $ work( itaup ), vt, ldvt,
3368  $ work( iwork ), lwork-iwork+1, ierr )
3369 *
3370 * Generate left bidiagonalizing vectors in A
3371 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3372 * (RWorkspace: 0)
3373 *
3374  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
3375  $ work( iwork ), lwork-iwork+1, ierr )
3376  irwork = ie + m
3377 *
3378 * Perform bidiagonal QR iteration, computing left
3379 * singular vectors of A in A and computing right
3380 * singular vectors of A in VT
3381 * (CWorkspace: 0)
3382 * (RWorkspace: need BDSPAC)
3383 *
3384  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3385  $ ldvt, a, lda, cdum, 1,
3386  $ rwork( irwork ), info )
3387 *
3388  END IF
3389 *
3390  ELSE IF( wntuas ) THEN
3391 *
3392 * Path 9t(N much larger than M, JOBU='S' or 'A',
3393 * JOBVT='A')
3394 * N right singular vectors to be computed in VT and
3395 * M left singular vectors to be computed in U
3396 *
3397  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3398 *
3399 * Sufficient workspace for a fast algorithm
3400 *
3401  iu = 1
3402  IF( lwork.GE.wrkbl+lda*m ) THEN
3403 *
3404 * WORK(IU) is LDA by M
3405 *
3406  ldwrku = lda
3407  ELSE
3408 *
3409 * WORK(IU) is M by M
3410 *
3411  ldwrku = m
3412  END IF
3413  itau = iu + ldwrku*m
3414  iwork = itau + m
3415 *
3416 * Compute A=L*Q, copying result to VT
3417 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3418 * (RWorkspace: 0)
3419 *
3420  CALL zgelqf( m, n, a, lda, work( itau ),
3421  $ work( iwork ), lwork-iwork+1, ierr )
3422  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3423 *
3424 * Generate Q in VT
3425 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3426 * (RWorkspace: 0)
3427 *
3428  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3429  $ work( iwork ), lwork-iwork+1, ierr )
3430 *
3431 * Copy L to WORK(IU), zeroing out above it
3432 *
3433  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3434  $ ldwrku )
3435  CALL zlaset( 'U', m-1, m-1, czero, czero,
3436  $ work( iu+ldwrku ), ldwrku )
3437  ie = 1
3438  itauq = itau
3439  itaup = itauq + m
3440  iwork = itaup + m
3441 *
3442 * Bidiagonalize L in WORK(IU), copying result to U
3443 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3444 * (RWorkspace: need M)
3445 *
3446  CALL zgebrd( m, m, work( iu ), ldwrku, s,
3447  $ rwork( ie ), work( itauq ),
3448  $ work( itaup ), work( iwork ),
3449  $ lwork-iwork+1, ierr )
3450  CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
3451  $ ldu )
3452 *
3453 * Generate right bidiagonalizing vectors in WORK(IU)
3454 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3455 * (RWorkspace: 0)
3456 *
3457  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3458  $ work( itaup ), work( iwork ),
3459  $ lwork-iwork+1, ierr )
3460 *
3461 * Generate left bidiagonalizing vectors in U
3462 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3463 * (RWorkspace: 0)
3464 *
3465  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3466  $ work( iwork ), lwork-iwork+1, ierr )
3467  irwork = ie + m
3468 *
3469 * Perform bidiagonal QR iteration, computing left
3470 * singular vectors of L in U and computing right
3471 * singular vectors of L in WORK(IU)
3472 * (CWorkspace: need M*M)
3473 * (RWorkspace: need BDSPAC)
3474 *
3475  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3476  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3477  $ rwork( irwork ), info )
3478 *
3479 * Multiply right singular vectors of L in WORK(IU) by
3480 * Q in VT, storing result in A
3481 * (CWorkspace: need M*M)
3482 * (RWorkspace: 0)
3483 *
3484  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3485  $ ldwrku, vt, ldvt, czero, a, lda )
3486 *
3487 * Copy right singular vectors of A from A to VT
3488 *
3489  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3490 *
3491  ELSE
3492 *
3493 * Insufficient workspace for a fast algorithm
3494 *
3495  itau = 1
3496  iwork = itau + m
3497 *
3498 * Compute A=L*Q, copying result to VT
3499 * (CWorkspace: need 2*M, prefer M+M*NB)
3500 * (RWorkspace: 0)
3501 *
3502  CALL zgelqf( m, n, a, lda, work( itau ),
3503  $ work( iwork ), lwork-iwork+1, ierr )
3504  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3505 *
3506 * Generate Q in VT
3507 * (CWorkspace: need M+N, prefer M+N*NB)
3508 * (RWorkspace: 0)
3509 *
3510  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3511  $ work( iwork ), lwork-iwork+1, ierr )
3512 *
3513 * Copy L to U, zeroing out above it
3514 *
3515  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3516  CALL zlaset( 'U', m-1, m-1, czero, czero,
3517  $ u( 1, 2 ), ldu )
3518  ie = 1
3519  itauq = itau
3520  itaup = itauq + m
3521  iwork = itaup + m
3522 *
3523 * Bidiagonalize L in U
3524 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3525 * (RWorkspace: need M)
3526 *
3527  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3528  $ work( itauq ), work( itaup ),
3529  $ work( iwork ), lwork-iwork+1, ierr )
3530 *
3531 * Multiply right bidiagonalizing vectors in U by Q
3532 * in VT
3533 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3534 * (RWorkspace: 0)
3535 *
3536  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3537  $ work( itaup ), vt, ldvt,
3538  $ work( iwork ), lwork-iwork+1, ierr )
3539 *
3540 * Generate left bidiagonalizing vectors in U
3541 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3542 * (RWorkspace: 0)
3543 *
3544  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3545  $ work( iwork ), lwork-iwork+1, ierr )
3546  irwork = ie + m
3547 *
3548 * Perform bidiagonal QR iteration, computing left
3549 * singular vectors of A in U and computing right
3550 * singular vectors of A in VT
3551 * (CWorkspace: 0)
3552 * (RWorkspace: need BDSPAC)
3553 *
3554  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3555  $ ldvt, u, ldu, cdum, 1,
3556  $ rwork( irwork ), info )
3557 *
3558  END IF
3559 *
3560  END IF
3561 *
3562  END IF
3563 *
3564  ELSE
3565 *
3566 * N .LT. MNTHR
3567 *
3568 * Path 10t(N greater than M, but not much larger)
3569 * Reduce to bidiagonal form without LQ decomposition
3570 *
3571  ie = 1
3572  itauq = 1
3573  itaup = itauq + m
3574  iwork = itaup + m
3575 *
3576 * Bidiagonalize A
3577 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3578 * (RWorkspace: M)
3579 *
3580  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3581  $ work( itaup ), work( iwork ), lwork-iwork+1,
3582  $ ierr )
3583  IF( wntuas ) THEN
3584 *
3585 * If left singular vectors desired in U, copy result to U
3586 * and generate left bidiagonalizing vectors in U
3587 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3588 * (RWorkspace: 0)
3589 *
3590  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3591  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3592  $ work( iwork ), lwork-iwork+1, ierr )
3593  END IF
3594  IF( wntvas ) THEN
3595 *
3596 * If right singular vectors desired in VT, copy result to
3597 * VT and generate right bidiagonalizing vectors in VT
3598 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3599 * (RWorkspace: 0)
3600 *
3601  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3602  IF( wntva )
3603  $ nrvt = n
3604  IF( wntvs )
3605  $ nrvt = m
3606  CALL zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3607  $ work( iwork ), lwork-iwork+1, ierr )
3608  END IF
3609  IF( wntuo ) THEN
3610 *
3611 * If left singular vectors desired in A, generate left
3612 * bidiagonalizing vectors in A
3613 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3614 * (RWorkspace: 0)
3615 *
3616  CALL zungbr( 'Q', m, m, n, a, lda, work( itauq ),
3617  $ work( iwork ), lwork-iwork+1, ierr )
3618  END IF
3619  IF( wntvo ) THEN
3620 *
3621 * If right singular vectors desired in A, generate right
3622 * bidiagonalizing vectors in A
3623 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3624 * (RWorkspace: 0)
3625 *
3626  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
3627  $ work( iwork ), lwork-iwork+1, ierr )
3628  END IF
3629  irwork = ie + m
3630  IF( wntuas .OR. wntuo )
3631  $ nru = m
3632  IF( wntun )
3633  $ nru = 0
3634  IF( wntvas .OR. wntvo )
3635  $ ncvt = n
3636  IF( wntvn )
3637  $ ncvt = 0
3638  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3639 *
3640 * Perform bidiagonal QR iteration, if desired, computing
3641 * left singular vectors in U and computing right singular
3642 * vectors in VT
3643 * (CWorkspace: 0)
3644 * (RWorkspace: need BDSPAC)
3645 *
3646  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3647  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3648  $ info )
3649  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3650 *
3651 * Perform bidiagonal QR iteration, if desired, computing
3652 * left singular vectors in U and computing right singular
3653 * vectors in A
3654 * (CWorkspace: 0)
3655 * (RWorkspace: need BDSPAC)
3656 *
3657  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3658  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3659  $ info )
3660  ELSE
3661 *
3662 * Perform bidiagonal QR iteration, if desired, computing
3663 * left singular vectors in A and computing right singular
3664 * vectors in VT
3665 * (CWorkspace: 0)
3666 * (RWorkspace: need BDSPAC)
3667 *
3668  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3669  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3670  $ info )
3671  END IF
3672 *
3673  END IF
3674 *
3675  END IF
3676 *
3677 * Undo scaling if necessary
3678 *
3679  IF( iscl.EQ.1 ) THEN
3680  IF( anrm.GT.bignum )
3681  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3682  $ ierr )
3683  IF( info.NE.0 .AND. anrm.GT.bignum )
3684  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3685  $ rwork( ie ), minmn, ierr )
3686  IF( anrm.LT.smlnum )
3687  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3688  $ ierr )
3689  IF( info.NE.0 .AND. anrm.LT.smlnum )
3690  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3691  $ rwork( ie ), minmn, ierr )
3692  END IF
3693 *
3694 * Return optimal workspace in WORK(1)
3695 *
3696  work( 1 ) = maxwrk
3697 *
3698  RETURN
3699 *
3700 * End of ZGESVD
3701 *
3702  END
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:143
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:187
subroutine zungbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGBR
Definition: zungbr.f:157
subroutine zgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGELQF
Definition: zgelqf.f:143
subroutine zgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
ZGEBRD
Definition: zgebrd.f:205
subroutine zgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO)
ZGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: zgesvd.f:214
subroutine zlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: zlascl.f:143
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:103
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: zlaset.f:106
subroutine zunglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGLQ
Definition: zunglq.f:127
subroutine zungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGQR
Definition: zungqr.f:128
subroutine zbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, RWORK, INFO)
ZBDSQR
Definition: zbdsqr.f:222
subroutine zunmbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
ZUNMBR
Definition: zunmbr.f:196
subroutine zgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition: zgeqrf.f:151