LAPACK  3.7.1
LAPACK: Linear Algebra PACKage
zgesvd.f
Go to the documentation of this file.
1 *> \brief <b> ZGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION RWORK( * ), S( * )
30 * COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> ZGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX*16 array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is DOUBLE PRECISION array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX*16 array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX*16 array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if ZBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \date April 2012
210 *
211 *> \ingroup complex16GEsing
212 *
213 * =====================================================================
214  SUBROUTINE zgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
215  $ VT, LDVT, WORK, LWORK, RWORK, INFO )
216 *
217 * -- LAPACK driver routine (version 3.7.0) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220 * April 2012
221 *
222 * .. Scalar Arguments ..
223  CHARACTER JOBU, JOBVT
224  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
225 * ..
226 * .. Array Arguments ..
227  DOUBLE PRECISION RWORK( * ), S( * )
228  COMPLEX*16 A( lda, * ), U( ldu, * ), VT( ldvt, * ),
229  $ work( * )
230 * ..
231 *
232 * =====================================================================
233 *
234 * .. Parameters ..
235  COMPLEX*16 CZERO, CONE
236  parameter( czero = ( 0.0d0, 0.0d0 ),
237  $ cone = ( 1.0d0, 0.0d0 ) )
238  DOUBLE PRECISION ZERO, ONE
239  parameter( zero = 0.0d0, one = 1.0d0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
243  $ wntva, wntvas, wntvn, wntvo, wntvs
244  INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
245  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
246  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
247  $ nrvt, wrkbl
248  INTEGER LWORK_ZGEQRF, LWORK_ZUNGQR_N, LWORK_ZUNGQR_M,
249  $ lwork_zgebrd, lwork_zungbr_p, lwork_zungbr_q,
250  $ lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m
251  DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
252 * ..
253 * .. Local Arrays ..
254  DOUBLE PRECISION DUM( 1 )
255  COMPLEX*16 CDUM( 1 )
256 * ..
257 * .. External Subroutines ..
258  EXTERNAL dlascl, xerbla, zbdsqr, zgebrd, zgelqf, zgemm,
260  $ zungqr, zunmbr
261 * ..
262 * .. External Functions ..
263  LOGICAL LSAME
264  INTEGER ILAENV
265  DOUBLE PRECISION DLAMCH, ZLANGE
266  EXTERNAL lsame, ilaenv, dlamch, zlange
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, min, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input arguments
274 *
275  info = 0
276  minmn = min( m, n )
277  wntua = lsame( jobu, 'A' )
278  wntus = lsame( jobu, 'S' )
279  wntuas = wntua .OR. wntus
280  wntuo = lsame( jobu, 'O' )
281  wntun = lsame( jobu, 'N' )
282  wntva = lsame( jobvt, 'A' )
283  wntvs = lsame( jobvt, 'S' )
284  wntvas = wntva .OR. wntvs
285  wntvo = lsame( jobvt, 'O' )
286  wntvn = lsame( jobvt, 'N' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
290  info = -1
291  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
292  $ ( wntvo .AND. wntuo ) ) THEN
293  info = -2
294  ELSE IF( m.LT.0 ) THEN
295  info = -3
296  ELSE IF( n.LT.0 ) THEN
297  info = -4
298  ELSE IF( lda.LT.max( 1, m ) ) THEN
299  info = -6
300  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
301  info = -9
302  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
303  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
304  info = -11
305  END IF
306 *
307 * Compute workspace
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  IF( m.GE.n .AND. minmn.GT.0 ) THEN
319 *
320 * Space needed for ZBDSQR is BDSPAC = 5*N
321 *
322  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
323 * Compute space needed for ZGEQRF
324  CALL zgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
325  lwork_zgeqrf = int( cdum(1) )
326 * Compute space needed for ZUNGQR
327  CALL zungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
328  lwork_zungqr_n = int( cdum(1) )
329  CALL zungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
330  lwork_zungqr_m = int( cdum(1) )
331 * Compute space needed for ZGEBRD
332  CALL zgebrd( n, n, a, lda, s, dum(1), cdum(1),
333  $ cdum(1), cdum(1), -1, ierr )
334  lwork_zgebrd = int( cdum(1) )
335 * Compute space needed for ZUNGBR
336  CALL zungbr( 'P', n, n, n, a, lda, cdum(1),
337  $ cdum(1), -1, ierr )
338  lwork_zungbr_p = int( cdum(1) )
339  CALL zungbr( 'Q', n, n, n, a, lda, cdum(1),
340  $ cdum(1), -1, ierr )
341  lwork_zungbr_q = int( cdum(1) )
342 *
343  IF( m.GE.mnthr ) THEN
344  IF( wntun ) THEN
345 *
346 * Path 1 (M much larger than N, JOBU='N')
347 *
348  maxwrk = n + lwork_zgeqrf
349  maxwrk = max( maxwrk, 2*n+lwork_zgebrd )
350  IF( wntvo .OR. wntvas )
351  $ maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
352  minwrk = 3*n
353  ELSE IF( wntuo .AND. wntvn ) THEN
354 *
355 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
356 *
357  wrkbl = n + lwork_zgeqrf
358  wrkbl = max( wrkbl, n+lwork_zungqr_n )
359  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
360  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
361  maxwrk = max( n*n+wrkbl, n*n+m*n )
362  minwrk = 2*n + m
363  ELSE IF( wntuo .AND. wntvas ) THEN
364 *
365 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
366 * 'A')
367 *
368  wrkbl = n + lwork_zgeqrf
369  wrkbl = max( wrkbl, n+lwork_zungqr_n )
370  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
371  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
372  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
373  maxwrk = max( n*n+wrkbl, n*n+m*n )
374  minwrk = 2*n + m
375  ELSE IF( wntus .AND. wntvn ) THEN
376 *
377 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
378 *
379  wrkbl = n + lwork_zgeqrf
380  wrkbl = max( wrkbl, n+lwork_zungqr_n )
381  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
382  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
383  maxwrk = n*n + wrkbl
384  minwrk = 2*n + m
385  ELSE IF( wntus .AND. wntvo ) THEN
386 *
387 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
388 *
389  wrkbl = n + lwork_zgeqrf
390  wrkbl = max( wrkbl, n+lwork_zungqr_n )
391  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
392  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
393  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
394  maxwrk = 2*n*n + wrkbl
395  minwrk = 2*n + m
396  ELSE IF( wntus .AND. wntvas ) THEN
397 *
398 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
399 * 'A')
400 *
401  wrkbl = n + lwork_zgeqrf
402  wrkbl = max( wrkbl, n+lwork_zungqr_n )
403  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
404  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
405  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
406  maxwrk = n*n + wrkbl
407  minwrk = 2*n + m
408  ELSE IF( wntua .AND. wntvn ) THEN
409 *
410 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
411 *
412  wrkbl = n + lwork_zgeqrf
413  wrkbl = max( wrkbl, n+lwork_zungqr_m )
414  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
415  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
416  maxwrk = n*n + wrkbl
417  minwrk = 2*n + m
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_zgeqrf
423  wrkbl = max( wrkbl, n+lwork_zungqr_m )
424  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
425  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
426  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
427  maxwrk = 2*n*n + wrkbl
428  minwrk = 2*n + m
429  ELSE IF( wntua .AND. wntvas ) THEN
430 *
431 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
432 * 'A')
433 *
434  wrkbl = n + lwork_zgeqrf
435  wrkbl = max( wrkbl, n+lwork_zungqr_m )
436  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
437  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
438  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
439  maxwrk = n*n + wrkbl
440  minwrk = 2*n + m
441  END IF
442  ELSE
443 *
444 * Path 10 (M at least N, but not much larger)
445 *
446  CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
447  $ cdum(1), cdum(1), -1, ierr )
448  lwork_zgebrd = int( cdum(1) )
449  maxwrk = 2*n + lwork_zgebrd
450  IF( wntus .OR. wntuo ) THEN
451  CALL zungbr( 'Q', m, n, n, a, lda, cdum(1),
452  $ cdum(1), -1, ierr )
453  lwork_zungbr_q = int( cdum(1) )
454  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
455  END IF
456  IF( wntua ) THEN
457  CALL zungbr( 'Q', m, m, n, a, lda, cdum(1),
458  $ cdum(1), -1, ierr )
459  lwork_zungbr_q = int( cdum(1) )
460  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
461  END IF
462  IF( .NOT.wntvn ) THEN
463  maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
464  END IF
465  minwrk = 2*n + m
466  END IF
467  ELSE IF( minmn.GT.0 ) THEN
468 *
469 * Space needed for ZBDSQR is BDSPAC = 5*M
470 *
471  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
472 * Compute space needed for ZGELQF
473  CALL zgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
474  lwork_zgelqf = int( cdum(1) )
475 * Compute space needed for ZUNGLQ
476  CALL zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
477  $ ierr )
478  lwork_zunglq_n = int( cdum(1) )
479  CALL zunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
480  lwork_zunglq_m = int( cdum(1) )
481 * Compute space needed for ZGEBRD
482  CALL zgebrd( m, m, a, lda, s, dum(1), cdum(1),
483  $ cdum(1), cdum(1), -1, ierr )
484  lwork_zgebrd = int( cdum(1) )
485 * Compute space needed for ZUNGBR P
486  CALL zungbr( 'P', m, m, m, a, n, cdum(1),
487  $ cdum(1), -1, ierr )
488  lwork_zungbr_p = int( cdum(1) )
489 * Compute space needed for ZUNGBR Q
490  CALL zungbr( 'Q', m, m, m, a, n, cdum(1),
491  $ cdum(1), -1, ierr )
492  lwork_zungbr_q = int( cdum(1) )
493  IF( n.GE.mnthr ) THEN
494  IF( wntvn ) THEN
495 *
496 * Path 1t(N much larger than M, JOBVT='N')
497 *
498  maxwrk = m + lwork_zgelqf
499  maxwrk = max( maxwrk, 2*m+lwork_zgebrd )
500  IF( wntuo .OR. wntuas )
501  $ maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
502  minwrk = 3*m
503  ELSE IF( wntvo .AND. wntun ) THEN
504 *
505 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
506 *
507  wrkbl = m + lwork_zgelqf
508  wrkbl = max( wrkbl, m+lwork_zunglq_m )
509  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
510  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
511  maxwrk = max( m*m+wrkbl, m*m+m*n )
512  minwrk = 2*m + n
513  ELSE IF( wntvo .AND. wntuas ) THEN
514 *
515 * Path 3t(N much larger than M, JOBU='S' or 'A',
516 * JOBVT='O')
517 *
518  wrkbl = m + lwork_zgelqf
519  wrkbl = max( wrkbl, m+lwork_zunglq_m )
520  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
521  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
522  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
523  maxwrk = max( m*m+wrkbl, m*m+m*n )
524  minwrk = 2*m + n
525  ELSE IF( wntvs .AND. wntun ) THEN
526 *
527 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
528 *
529  wrkbl = m + lwork_zgelqf
530  wrkbl = max( wrkbl, m+lwork_zunglq_m )
531  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
532  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
533  maxwrk = m*m + wrkbl
534  minwrk = 2*m + n
535  ELSE IF( wntvs .AND. wntuo ) THEN
536 *
537 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
538 *
539  wrkbl = m + lwork_zgelqf
540  wrkbl = max( wrkbl, m+lwork_zunglq_m )
541  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
542  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
543  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
544  maxwrk = 2*m*m + wrkbl
545  minwrk = 2*m + n
546  ELSE IF( wntvs .AND. wntuas ) THEN
547 *
548 * Path 6t(N much larger than M, JOBU='S' or 'A',
549 * JOBVT='S')
550 *
551  wrkbl = m + lwork_zgelqf
552  wrkbl = max( wrkbl, m+lwork_zunglq_m )
553  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
554  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
555  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
556  maxwrk = m*m + wrkbl
557  minwrk = 2*m + n
558  ELSE IF( wntva .AND. wntun ) THEN
559 *
560 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
561 *
562  wrkbl = m + lwork_zgelqf
563  wrkbl = max( wrkbl, m+lwork_zunglq_n )
564  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
565  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
566  maxwrk = m*m + wrkbl
567  minwrk = 2*m + n
568  ELSE IF( wntva .AND. wntuo ) THEN
569 *
570 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
571 *
572  wrkbl = m + lwork_zgelqf
573  wrkbl = max( wrkbl, m+lwork_zunglq_n )
574  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
575  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
576  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
577  maxwrk = 2*m*m + wrkbl
578  minwrk = 2*m + n
579  ELSE IF( wntva .AND. wntuas ) THEN
580 *
581 * Path 9t(N much larger than M, JOBU='S' or 'A',
582 * JOBVT='A')
583 *
584  wrkbl = m + lwork_zgelqf
585  wrkbl = max( wrkbl, m+lwork_zunglq_n )
586  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
587  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
588  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
589  maxwrk = m*m + wrkbl
590  minwrk = 2*m + n
591  END IF
592  ELSE
593 *
594 * Path 10t(N greater than M, but not much larger)
595 *
596  CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
597  $ cdum(1), cdum(1), -1, ierr )
598  lwork_zgebrd = int( cdum(1) )
599  maxwrk = 2*m + lwork_zgebrd
600  IF( wntvs .OR. wntvo ) THEN
601 * Compute space needed for ZUNGBR P
602  CALL zungbr( 'P', m, n, m, a, n, cdum(1),
603  $ cdum(1), -1, ierr )
604  lwork_zungbr_p = int( cdum(1) )
605  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
606  END IF
607  IF( wntva ) THEN
608  CALL zungbr( 'P', n, n, m, a, n, cdum(1),
609  $ cdum(1), -1, ierr )
610  lwork_zungbr_p = int( cdum(1) )
611  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
612  END IF
613  IF( .NOT.wntun ) THEN
614  maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
615  END IF
616  minwrk = 2*m + n
617  END IF
618  END IF
619  maxwrk = max( maxwrk, minwrk )
620  work( 1 ) = maxwrk
621 *
622  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
623  info = -13
624  END IF
625  END IF
626 *
627  IF( info.NE.0 ) THEN
628  CALL xerbla( 'ZGESVD', -info )
629  RETURN
630  ELSE IF( lquery ) THEN
631  RETURN
632  END IF
633 *
634 * Quick return if possible
635 *
636  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
637  RETURN
638  END IF
639 *
640 * Get machine constants
641 *
642  eps = dlamch( 'P' )
643  smlnum = sqrt( dlamch( 'S' ) ) / eps
644  bignum = one / smlnum
645 *
646 * Scale A if max element outside range [SMLNUM,BIGNUM]
647 *
648  anrm = zlange( 'M', m, n, a, lda, dum )
649  iscl = 0
650  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
651  iscl = 1
652  CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
653  ELSE IF( anrm.GT.bignum ) THEN
654  iscl = 1
655  CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
656  END IF
657 *
658  IF( m.GE.n ) THEN
659 *
660 * A has at least as many rows as columns. If A has sufficiently
661 * more rows than columns, first reduce using the QR
662 * decomposition (if sufficient workspace available)
663 *
664  IF( m.GE.mnthr ) THEN
665 *
666  IF( wntun ) THEN
667 *
668 * Path 1 (M much larger than N, JOBU='N')
669 * No left singular vectors to be computed
670 *
671  itau = 1
672  iwork = itau + n
673 *
674 * Compute A=Q*R
675 * (CWorkspace: need 2*N, prefer N+N*NB)
676 * (RWorkspace: need 0)
677 *
678  CALL zgeqrf( m, n, a, lda, work( itau ), work( iwork ),
679  $ lwork-iwork+1, ierr )
680 *
681 * Zero out below R
682 *
683  IF( n .GT. 1 ) THEN
684  CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
685  $ lda )
686  END IF
687  ie = 1
688  itauq = 1
689  itaup = itauq + n
690  iwork = itaup + n
691 *
692 * Bidiagonalize R in A
693 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
694 * (RWorkspace: need N)
695 *
696  CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
697  $ work( itaup ), work( iwork ), lwork-iwork+1,
698  $ ierr )
699  ncvt = 0
700  IF( wntvo .OR. wntvas ) THEN
701 *
702 * If right singular vectors desired, generate P'.
703 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
704 * (RWorkspace: 0)
705 *
706  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
707  $ work( iwork ), lwork-iwork+1, ierr )
708  ncvt = n
709  END IF
710  irwork = ie + n
711 *
712 * Perform bidiagonal QR iteration, computing right
713 * singular vectors of A in A if desired
714 * (CWorkspace: 0)
715 * (RWorkspace: need BDSPAC)
716 *
717  CALL zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
718  $ cdum, 1, cdum, 1, rwork( irwork ), info )
719 *
720 * If right singular vectors desired in VT, copy them there
721 *
722  IF( wntvas )
723  $ CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
724 *
725  ELSE IF( wntuo .AND. wntvn ) THEN
726 *
727 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
728 * N left singular vectors to be overwritten on A and
729 * no right singular vectors to be computed
730 *
731  IF( lwork.GE.n*n+3*n ) THEN
732 *
733 * Sufficient workspace for a fast algorithm
734 *
735  ir = 1
736  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
737 *
738 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
739 *
740  ldwrku = lda
741  ldwrkr = lda
742  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
743 *
744 * WORK(IU) is LDA by N, WORK(IR) is N by N
745 *
746  ldwrku = lda
747  ldwrkr = n
748  ELSE
749 *
750 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
751 *
752  ldwrku = ( lwork-n*n ) / n
753  ldwrkr = n
754  END IF
755  itau = ir + ldwrkr*n
756  iwork = itau + n
757 *
758 * Compute A=Q*R
759 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
760 * (RWorkspace: 0)
761 *
762  CALL zgeqrf( m, n, a, lda, work( itau ),
763  $ work( iwork ), lwork-iwork+1, ierr )
764 *
765 * Copy R to WORK(IR) and zero out below it
766 *
767  CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
768  CALL zlaset( 'L', n-1, n-1, czero, czero,
769  $ work( ir+1 ), ldwrkr )
770 *
771 * Generate Q in A
772 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
773 * (RWorkspace: 0)
774 *
775  CALL zungqr( m, n, n, a, lda, work( itau ),
776  $ work( iwork ), lwork-iwork+1, ierr )
777  ie = 1
778  itauq = itau
779  itaup = itauq + n
780  iwork = itaup + n
781 *
782 * Bidiagonalize R in WORK(IR)
783 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
784 * (RWorkspace: need N)
785 *
786  CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
787  $ work( itauq ), work( itaup ),
788  $ work( iwork ), lwork-iwork+1, ierr )
789 *
790 * Generate left vectors bidiagonalizing R
791 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
792 * (RWorkspace: need 0)
793 *
794  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
795  $ work( itauq ), work( iwork ),
796  $ lwork-iwork+1, ierr )
797  irwork = ie + n
798 *
799 * Perform bidiagonal QR iteration, computing left
800 * singular vectors of R in WORK(IR)
801 * (CWorkspace: need N*N)
802 * (RWorkspace: need BDSPAC)
803 *
804  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
805  $ work( ir ), ldwrkr, cdum, 1,
806  $ rwork( irwork ), info )
807  iu = itauq
808 *
809 * Multiply Q in A by left singular vectors of R in
810 * WORK(IR), storing result in WORK(IU) and copying to A
811 * (CWorkspace: need N*N+N, prefer N*N+M*N)
812 * (RWorkspace: 0)
813 *
814  DO 10 i = 1, m, ldwrku
815  chunk = min( m-i+1, ldwrku )
816  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
817  $ lda, work( ir ), ldwrkr, czero,
818  $ work( iu ), ldwrku )
819  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
820  $ a( i, 1 ), lda )
821  10 CONTINUE
822 *
823  ELSE
824 *
825 * Insufficient workspace for a fast algorithm
826 *
827  ie = 1
828  itauq = 1
829  itaup = itauq + n
830  iwork = itaup + n
831 *
832 * Bidiagonalize A
833 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
834 * (RWorkspace: N)
835 *
836  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
837  $ work( itauq ), work( itaup ),
838  $ work( iwork ), lwork-iwork+1, ierr )
839 *
840 * Generate left vectors bidiagonalizing A
841 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
842 * (RWorkspace: 0)
843 *
844  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
845  $ work( iwork ), lwork-iwork+1, ierr )
846  irwork = ie + n
847 *
848 * Perform bidiagonal QR iteration, computing left
849 * singular vectors of A in A
850 * (CWorkspace: need 0)
851 * (RWorkspace: need BDSPAC)
852 *
853  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
854  $ a, lda, cdum, 1, rwork( irwork ), info )
855 *
856  END IF
857 *
858  ELSE IF( wntuo .AND. wntvas ) THEN
859 *
860 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
861 * N left singular vectors to be overwritten on A and
862 * N right singular vectors to be computed in VT
863 *
864  IF( lwork.GE.n*n+3*n ) THEN
865 *
866 * Sufficient workspace for a fast algorithm
867 *
868  ir = 1
869  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
870 *
871 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
872 *
873  ldwrku = lda
874  ldwrkr = lda
875  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
876 *
877 * WORK(IU) is LDA by N and WORK(IR) is N by N
878 *
879  ldwrku = lda
880  ldwrkr = n
881  ELSE
882 *
883 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
884 *
885  ldwrku = ( lwork-n*n ) / n
886  ldwrkr = n
887  END IF
888  itau = ir + ldwrkr*n
889  iwork = itau + n
890 *
891 * Compute A=Q*R
892 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
893 * (RWorkspace: 0)
894 *
895  CALL zgeqrf( m, n, a, lda, work( itau ),
896  $ work( iwork ), lwork-iwork+1, ierr )
897 *
898 * Copy R to VT, zeroing out below it
899 *
900  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
901  IF( n.GT.1 )
902  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
903  $ vt( 2, 1 ), ldvt )
904 *
905 * Generate Q in A
906 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
907 * (RWorkspace: 0)
908 *
909  CALL zungqr( m, n, n, a, lda, work( itau ),
910  $ work( iwork ), lwork-iwork+1, ierr )
911  ie = 1
912  itauq = itau
913  itaup = itauq + n
914  iwork = itaup + n
915 *
916 * Bidiagonalize R in VT, copying result to WORK(IR)
917 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
918 * (RWorkspace: need N)
919 *
920  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
921  $ work( itauq ), work( itaup ),
922  $ work( iwork ), lwork-iwork+1, ierr )
923  CALL zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
924 *
925 * Generate left vectors bidiagonalizing R in WORK(IR)
926 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
927 * (RWorkspace: 0)
928 *
929  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
930  $ work( itauq ), work( iwork ),
931  $ lwork-iwork+1, ierr )
932 *
933 * Generate right vectors bidiagonalizing R in VT
934 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
935 * (RWorkspace: 0)
936 *
937  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
938  $ work( iwork ), lwork-iwork+1, ierr )
939  irwork = ie + n
940 *
941 * Perform bidiagonal QR iteration, computing left
942 * singular vectors of R in WORK(IR) and computing right
943 * singular vectors of R in VT
944 * (CWorkspace: need N*N)
945 * (RWorkspace: need BDSPAC)
946 *
947  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
948  $ ldvt, work( ir ), ldwrkr, cdum, 1,
949  $ rwork( irwork ), info )
950  iu = itauq
951 *
952 * Multiply Q in A by left singular vectors of R in
953 * WORK(IR), storing result in WORK(IU) and copying to A
954 * (CWorkspace: need N*N+N, prefer N*N+M*N)
955 * (RWorkspace: 0)
956 *
957  DO 20 i = 1, m, ldwrku
958  chunk = min( m-i+1, ldwrku )
959  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
960  $ lda, work( ir ), ldwrkr, czero,
961  $ work( iu ), ldwrku )
962  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
963  $ a( i, 1 ), lda )
964  20 CONTINUE
965 *
966  ELSE
967 *
968 * Insufficient workspace for a fast algorithm
969 *
970  itau = 1
971  iwork = itau + n
972 *
973 * Compute A=Q*R
974 * (CWorkspace: need 2*N, prefer N+N*NB)
975 * (RWorkspace: 0)
976 *
977  CALL zgeqrf( m, n, a, lda, work( itau ),
978  $ work( iwork ), lwork-iwork+1, ierr )
979 *
980 * Copy R to VT, zeroing out below it
981 *
982  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
983  IF( n.GT.1 )
984  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
985  $ vt( 2, 1 ), ldvt )
986 *
987 * Generate Q in A
988 * (CWorkspace: need 2*N, prefer N+N*NB)
989 * (RWorkspace: 0)
990 *
991  CALL zungqr( m, n, n, a, lda, work( itau ),
992  $ work( iwork ), lwork-iwork+1, ierr )
993  ie = 1
994  itauq = itau
995  itaup = itauq + n
996  iwork = itaup + n
997 *
998 * Bidiagonalize R in VT
999 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1000 * (RWorkspace: N)
1001 *
1002  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1003  $ work( itauq ), work( itaup ),
1004  $ work( iwork ), lwork-iwork+1, ierr )
1005 *
1006 * Multiply Q in A by left vectors bidiagonalizing R
1007 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1008 * (RWorkspace: 0)
1009 *
1010  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1011  $ work( itauq ), a, lda, work( iwork ),
1012  $ lwork-iwork+1, ierr )
1013 *
1014 * Generate right vectors bidiagonalizing R in VT
1015 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1016 * (RWorkspace: 0)
1017 *
1018  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1019  $ work( iwork ), lwork-iwork+1, ierr )
1020  irwork = ie + n
1021 *
1022 * Perform bidiagonal QR iteration, computing left
1023 * singular vectors of A in A and computing right
1024 * singular vectors of A in VT
1025 * (CWorkspace: 0)
1026 * (RWorkspace: need BDSPAC)
1027 *
1028  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1029  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1030  $ info )
1031 *
1032  END IF
1033 *
1034  ELSE IF( wntus ) THEN
1035 *
1036  IF( wntvn ) THEN
1037 *
1038 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1039 * N left singular vectors to be computed in U and
1040 * no right singular vectors to be computed
1041 *
1042  IF( lwork.GE.n*n+3*n ) THEN
1043 *
1044 * Sufficient workspace for a fast algorithm
1045 *
1046  ir = 1
1047  IF( lwork.GE.wrkbl+lda*n ) THEN
1048 *
1049 * WORK(IR) is LDA by N
1050 *
1051  ldwrkr = lda
1052  ELSE
1053 *
1054 * WORK(IR) is N by N
1055 *
1056  ldwrkr = n
1057  END IF
1058  itau = ir + ldwrkr*n
1059  iwork = itau + n
1060 *
1061 * Compute A=Q*R
1062 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1063 * (RWorkspace: 0)
1064 *
1065  CALL zgeqrf( m, n, a, lda, work( itau ),
1066  $ work( iwork ), lwork-iwork+1, ierr )
1067 *
1068 * Copy R to WORK(IR), zeroing out below it
1069 *
1070  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1071  $ ldwrkr )
1072  CALL zlaset( 'L', n-1, n-1, czero, czero,
1073  $ work( ir+1 ), ldwrkr )
1074 *
1075 * Generate Q in A
1076 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1077 * (RWorkspace: 0)
1078 *
1079  CALL zungqr( m, n, n, a, lda, work( itau ),
1080  $ work( iwork ), lwork-iwork+1, ierr )
1081  ie = 1
1082  itauq = itau
1083  itaup = itauq + n
1084  iwork = itaup + n
1085 *
1086 * Bidiagonalize R in WORK(IR)
1087 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1088 * (RWorkspace: need N)
1089 *
1090  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1091  $ rwork( ie ), work( itauq ),
1092  $ work( itaup ), work( iwork ),
1093  $ lwork-iwork+1, ierr )
1094 *
1095 * Generate left vectors bidiagonalizing R in WORK(IR)
1096 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1097 * (RWorkspace: 0)
1098 *
1099  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1100  $ work( itauq ), work( iwork ),
1101  $ lwork-iwork+1, ierr )
1102  irwork = ie + n
1103 *
1104 * Perform bidiagonal QR iteration, computing left
1105 * singular vectors of R in WORK(IR)
1106 * (CWorkspace: need N*N)
1107 * (RWorkspace: need BDSPAC)
1108 *
1109  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1110  $ 1, work( ir ), ldwrkr, cdum, 1,
1111  $ rwork( irwork ), info )
1112 *
1113 * Multiply Q in A by left singular vectors of R in
1114 * WORK(IR), storing result in U
1115 * (CWorkspace: need N*N)
1116 * (RWorkspace: 0)
1117 *
1118  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1119  $ work( ir ), ldwrkr, czero, u, ldu )
1120 *
1121  ELSE
1122 *
1123 * Insufficient workspace for a fast algorithm
1124 *
1125  itau = 1
1126  iwork = itau + n
1127 *
1128 * Compute A=Q*R, copying result to U
1129 * (CWorkspace: need 2*N, prefer N+N*NB)
1130 * (RWorkspace: 0)
1131 *
1132  CALL zgeqrf( m, n, a, lda, work( itau ),
1133  $ work( iwork ), lwork-iwork+1, ierr )
1134  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1135 *
1136 * Generate Q in U
1137 * (CWorkspace: need 2*N, prefer N+N*NB)
1138 * (RWorkspace: 0)
1139 *
1140  CALL zungqr( m, n, n, u, ldu, work( itau ),
1141  $ work( iwork ), lwork-iwork+1, ierr )
1142  ie = 1
1143  itauq = itau
1144  itaup = itauq + n
1145  iwork = itaup + n
1146 *
1147 * Zero out below R in A
1148 *
1149  IF( n .GT. 1 ) THEN
1150  CALL zlaset( 'L', n-1, n-1, czero, czero,
1151  $ a( 2, 1 ), lda )
1152  END IF
1153 *
1154 * Bidiagonalize R in A
1155 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1156 * (RWorkspace: need N)
1157 *
1158  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1159  $ work( itauq ), work( itaup ),
1160  $ work( iwork ), lwork-iwork+1, ierr )
1161 *
1162 * Multiply Q in U by left vectors bidiagonalizing R
1163 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1164 * (RWorkspace: 0)
1165 *
1166  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1167  $ work( itauq ), u, ldu, work( iwork ),
1168  $ lwork-iwork+1, ierr )
1169  irwork = ie + n
1170 *
1171 * Perform bidiagonal QR iteration, computing left
1172 * singular vectors of A in U
1173 * (CWorkspace: 0)
1174 * (RWorkspace: need BDSPAC)
1175 *
1176  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1177  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1178  $ info )
1179 *
1180  END IF
1181 *
1182  ELSE IF( wntvo ) THEN
1183 *
1184 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1185 * N left singular vectors to be computed in U and
1186 * N right singular vectors to be overwritten on A
1187 *
1188  IF( lwork.GE.2*n*n+3*n ) THEN
1189 *
1190 * Sufficient workspace for a fast algorithm
1191 *
1192  iu = 1
1193  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1194 *
1195 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1196 *
1197  ldwrku = lda
1198  ir = iu + ldwrku*n
1199  ldwrkr = lda
1200  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1201 *
1202 * WORK(IU) is LDA by N and WORK(IR) is N by N
1203 *
1204  ldwrku = lda
1205  ir = iu + ldwrku*n
1206  ldwrkr = n
1207  ELSE
1208 *
1209 * WORK(IU) is N by N and WORK(IR) is N by N
1210 *
1211  ldwrku = n
1212  ir = iu + ldwrku*n
1213  ldwrkr = n
1214  END IF
1215  itau = ir + ldwrkr*n
1216  iwork = itau + n
1217 *
1218 * Compute A=Q*R
1219 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1220 * (RWorkspace: 0)
1221 *
1222  CALL zgeqrf( m, n, a, lda, work( itau ),
1223  $ work( iwork ), lwork-iwork+1, ierr )
1224 *
1225 * Copy R to WORK(IU), zeroing out below it
1226 *
1227  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1228  $ ldwrku )
1229  CALL zlaset( 'L', n-1, n-1, czero, czero,
1230  $ work( iu+1 ), ldwrku )
1231 *
1232 * Generate Q in A
1233 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1234 * (RWorkspace: 0)
1235 *
1236  CALL zungqr( m, n, n, a, lda, work( itau ),
1237  $ work( iwork ), lwork-iwork+1, ierr )
1238  ie = 1
1239  itauq = itau
1240  itaup = itauq + n
1241  iwork = itaup + n
1242 *
1243 * Bidiagonalize R in WORK(IU), copying result to
1244 * WORK(IR)
1245 * (CWorkspace: need 2*N*N+3*N,
1246 * prefer 2*N*N+2*N+2*N*NB)
1247 * (RWorkspace: need N)
1248 *
1249  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1250  $ rwork( ie ), work( itauq ),
1251  $ work( itaup ), work( iwork ),
1252  $ lwork-iwork+1, ierr )
1253  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1254  $ work( ir ), ldwrkr )
1255 *
1256 * Generate left bidiagonalizing vectors in WORK(IU)
1257 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1258 * (RWorkspace: 0)
1259 *
1260  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1261  $ work( itauq ), work( iwork ),
1262  $ lwork-iwork+1, ierr )
1263 *
1264 * Generate right bidiagonalizing vectors in WORK(IR)
1265 * (CWorkspace: need 2*N*N+3*N-1,
1266 * prefer 2*N*N+2*N+(N-1)*NB)
1267 * (RWorkspace: 0)
1268 *
1269  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1270  $ work( itaup ), work( iwork ),
1271  $ lwork-iwork+1, ierr )
1272  irwork = ie + n
1273 *
1274 * Perform bidiagonal QR iteration, computing left
1275 * singular vectors of R in WORK(IU) and computing
1276 * right singular vectors of R in WORK(IR)
1277 * (CWorkspace: need 2*N*N)
1278 * (RWorkspace: need BDSPAC)
1279 *
1280  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1281  $ work( ir ), ldwrkr, work( iu ),
1282  $ ldwrku, cdum, 1, rwork( irwork ),
1283  $ info )
1284 *
1285 * Multiply Q in A by left singular vectors of R in
1286 * WORK(IU), storing result in U
1287 * (CWorkspace: need N*N)
1288 * (RWorkspace: 0)
1289 *
1290  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1291  $ work( iu ), ldwrku, czero, u, ldu )
1292 *
1293 * Copy right singular vectors of R to A
1294 * (CWorkspace: need N*N)
1295 * (RWorkspace: 0)
1296 *
1297  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1298  $ lda )
1299 *
1300  ELSE
1301 *
1302 * Insufficient workspace for a fast algorithm
1303 *
1304  itau = 1
1305  iwork = itau + n
1306 *
1307 * Compute A=Q*R, copying result to U
1308 * (CWorkspace: need 2*N, prefer N+N*NB)
1309 * (RWorkspace: 0)
1310 *
1311  CALL zgeqrf( m, n, a, lda, work( itau ),
1312  $ work( iwork ), lwork-iwork+1, ierr )
1313  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1314 *
1315 * Generate Q in U
1316 * (CWorkspace: need 2*N, prefer N+N*NB)
1317 * (RWorkspace: 0)
1318 *
1319  CALL zungqr( m, n, n, u, ldu, work( itau ),
1320  $ work( iwork ), lwork-iwork+1, ierr )
1321  ie = 1
1322  itauq = itau
1323  itaup = itauq + n
1324  iwork = itaup + n
1325 *
1326 * Zero out below R in A
1327 *
1328  IF( n .GT. 1 ) THEN
1329  CALL zlaset( 'L', n-1, n-1, czero, czero,
1330  $ a( 2, 1 ), lda )
1331  END IF
1332 *
1333 * Bidiagonalize R in A
1334 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1335 * (RWorkspace: need N)
1336 *
1337  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1338  $ work( itauq ), work( itaup ),
1339  $ work( iwork ), lwork-iwork+1, ierr )
1340 *
1341 * Multiply Q in U by left vectors bidiagonalizing R
1342 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1343 * (RWorkspace: 0)
1344 *
1345  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1346  $ work( itauq ), u, ldu, work( iwork ),
1347  $ lwork-iwork+1, ierr )
1348 *
1349 * Generate right vectors bidiagonalizing R in A
1350 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1351 * (RWorkspace: 0)
1352 *
1353  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1354  $ work( iwork ), lwork-iwork+1, ierr )
1355  irwork = ie + n
1356 *
1357 * Perform bidiagonal QR iteration, computing left
1358 * singular vectors of A in U and computing right
1359 * singular vectors of A in A
1360 * (CWorkspace: 0)
1361 * (RWorkspace: need BDSPAC)
1362 *
1363  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1364  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1365  $ info )
1366 *
1367  END IF
1368 *
1369  ELSE IF( wntvas ) THEN
1370 *
1371 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1372 * or 'A')
1373 * N left singular vectors to be computed in U and
1374 * N right singular vectors to be computed in VT
1375 *
1376  IF( lwork.GE.n*n+3*n ) THEN
1377 *
1378 * Sufficient workspace for a fast algorithm
1379 *
1380  iu = 1
1381  IF( lwork.GE.wrkbl+lda*n ) THEN
1382 *
1383 * WORK(IU) is LDA by N
1384 *
1385  ldwrku = lda
1386  ELSE
1387 *
1388 * WORK(IU) is N by N
1389 *
1390  ldwrku = n
1391  END IF
1392  itau = iu + ldwrku*n
1393  iwork = itau + n
1394 *
1395 * Compute A=Q*R
1396 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1397 * (RWorkspace: 0)
1398 *
1399  CALL zgeqrf( m, n, a, lda, work( itau ),
1400  $ work( iwork ), lwork-iwork+1, ierr )
1401 *
1402 * Copy R to WORK(IU), zeroing out below it
1403 *
1404  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1405  $ ldwrku )
1406  CALL zlaset( 'L', n-1, n-1, czero, czero,
1407  $ work( iu+1 ), ldwrku )
1408 *
1409 * Generate Q in A
1410 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1411 * (RWorkspace: 0)
1412 *
1413  CALL zungqr( m, n, n, a, lda, work( itau ),
1414  $ work( iwork ), lwork-iwork+1, ierr )
1415  ie = 1
1416  itauq = itau
1417  itaup = itauq + n
1418  iwork = itaup + n
1419 *
1420 * Bidiagonalize R in WORK(IU), copying result to VT
1421 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1422 * (RWorkspace: need N)
1423 *
1424  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1425  $ rwork( ie ), work( itauq ),
1426  $ work( itaup ), work( iwork ),
1427  $ lwork-iwork+1, ierr )
1428  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1429  $ ldvt )
1430 *
1431 * Generate left bidiagonalizing vectors in WORK(IU)
1432 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1433 * (RWorkspace: 0)
1434 *
1435  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1436  $ work( itauq ), work( iwork ),
1437  $ lwork-iwork+1, ierr )
1438 *
1439 * Generate right bidiagonalizing vectors in VT
1440 * (CWorkspace: need N*N+3*N-1,
1441 * prefer N*N+2*N+(N-1)*NB)
1442 * (RWorkspace: 0)
1443 *
1444  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1445  $ work( iwork ), lwork-iwork+1, ierr )
1446  irwork = ie + n
1447 *
1448 * Perform bidiagonal QR iteration, computing left
1449 * singular vectors of R in WORK(IU) and computing
1450 * right singular vectors of R in VT
1451 * (CWorkspace: need N*N)
1452 * (RWorkspace: need BDSPAC)
1453 *
1454  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1455  $ ldvt, work( iu ), ldwrku, cdum, 1,
1456  $ rwork( irwork ), info )
1457 *
1458 * Multiply Q in A by left singular vectors of R in
1459 * WORK(IU), storing result in U
1460 * (CWorkspace: need N*N)
1461 * (RWorkspace: 0)
1462 *
1463  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1464  $ work( iu ), ldwrku, czero, u, ldu )
1465 *
1466  ELSE
1467 *
1468 * Insufficient workspace for a fast algorithm
1469 *
1470  itau = 1
1471  iwork = itau + n
1472 *
1473 * Compute A=Q*R, copying result to U
1474 * (CWorkspace: need 2*N, prefer N+N*NB)
1475 * (RWorkspace: 0)
1476 *
1477  CALL zgeqrf( m, n, a, lda, work( itau ),
1478  $ work( iwork ), lwork-iwork+1, ierr )
1479  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1480 *
1481 * Generate Q in U
1482 * (CWorkspace: need 2*N, prefer N+N*NB)
1483 * (RWorkspace: 0)
1484 *
1485  CALL zungqr( m, n, n, u, ldu, work( itau ),
1486  $ work( iwork ), lwork-iwork+1, ierr )
1487 *
1488 * Copy R to VT, zeroing out below it
1489 *
1490  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1491  IF( n.GT.1 )
1492  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
1493  $ vt( 2, 1 ), ldvt )
1494  ie = 1
1495  itauq = itau
1496  itaup = itauq + n
1497  iwork = itaup + n
1498 *
1499 * Bidiagonalize R in VT
1500 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1501 * (RWorkspace: need N)
1502 *
1503  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1504  $ work( itauq ), work( itaup ),
1505  $ work( iwork ), lwork-iwork+1, ierr )
1506 *
1507 * Multiply Q in U by left bidiagonalizing vectors
1508 * in VT
1509 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1510 * (RWorkspace: 0)
1511 *
1512  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1513  $ work( itauq ), u, ldu, work( iwork ),
1514  $ lwork-iwork+1, ierr )
1515 *
1516 * Generate right bidiagonalizing vectors in VT
1517 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1518 * (RWorkspace: 0)
1519 *
1520  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1521  $ work( iwork ), lwork-iwork+1, ierr )
1522  irwork = ie + n
1523 *
1524 * Perform bidiagonal QR iteration, computing left
1525 * singular vectors of A in U and computing right
1526 * singular vectors of A in VT
1527 * (CWorkspace: 0)
1528 * (RWorkspace: need BDSPAC)
1529 *
1530  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1531  $ ldvt, u, ldu, cdum, 1,
1532  $ rwork( irwork ), info )
1533 *
1534  END IF
1535 *
1536  END IF
1537 *
1538  ELSE IF( wntua ) THEN
1539 *
1540  IF( wntvn ) THEN
1541 *
1542 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1543 * M left singular vectors to be computed in U and
1544 * no right singular vectors to be computed
1545 *
1546  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1547 *
1548 * Sufficient workspace for a fast algorithm
1549 *
1550  ir = 1
1551  IF( lwork.GE.wrkbl+lda*n ) THEN
1552 *
1553 * WORK(IR) is LDA by N
1554 *
1555  ldwrkr = lda
1556  ELSE
1557 *
1558 * WORK(IR) is N by N
1559 *
1560  ldwrkr = n
1561  END IF
1562  itau = ir + ldwrkr*n
1563  iwork = itau + n
1564 *
1565 * Compute A=Q*R, copying result to U
1566 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1567 * (RWorkspace: 0)
1568 *
1569  CALL zgeqrf( m, n, a, lda, work( itau ),
1570  $ work( iwork ), lwork-iwork+1, ierr )
1571  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1572 *
1573 * Copy R to WORK(IR), zeroing out below it
1574 *
1575  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1576  $ ldwrkr )
1577  CALL zlaset( 'L', n-1, n-1, czero, czero,
1578  $ work( ir+1 ), ldwrkr )
1579 *
1580 * Generate Q in U
1581 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1582 * (RWorkspace: 0)
1583 *
1584  CALL zungqr( m, m, n, u, ldu, work( itau ),
1585  $ work( iwork ), lwork-iwork+1, ierr )
1586  ie = 1
1587  itauq = itau
1588  itaup = itauq + n
1589  iwork = itaup + n
1590 *
1591 * Bidiagonalize R in WORK(IR)
1592 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1593 * (RWorkspace: need N)
1594 *
1595  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1596  $ rwork( ie ), work( itauq ),
1597  $ work( itaup ), work( iwork ),
1598  $ lwork-iwork+1, ierr )
1599 *
1600 * Generate left bidiagonalizing vectors in WORK(IR)
1601 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1602 * (RWorkspace: 0)
1603 *
1604  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1605  $ work( itauq ), work( iwork ),
1606  $ lwork-iwork+1, ierr )
1607  irwork = ie + n
1608 *
1609 * Perform bidiagonal QR iteration, computing left
1610 * singular vectors of R in WORK(IR)
1611 * (CWorkspace: need N*N)
1612 * (RWorkspace: need BDSPAC)
1613 *
1614  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1615  $ 1, work( ir ), ldwrkr, cdum, 1,
1616  $ rwork( irwork ), info )
1617 *
1618 * Multiply Q in U by left singular vectors of R in
1619 * WORK(IR), storing result in A
1620 * (CWorkspace: need N*N)
1621 * (RWorkspace: 0)
1622 *
1623  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1624  $ work( ir ), ldwrkr, czero, a, lda )
1625 *
1626 * Copy left singular vectors of A from A to U
1627 *
1628  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1629 *
1630  ELSE
1631 *
1632 * Insufficient workspace for a fast algorithm
1633 *
1634  itau = 1
1635  iwork = itau + n
1636 *
1637 * Compute A=Q*R, copying result to U
1638 * (CWorkspace: need 2*N, prefer N+N*NB)
1639 * (RWorkspace: 0)
1640 *
1641  CALL zgeqrf( m, n, a, lda, work( itau ),
1642  $ work( iwork ), lwork-iwork+1, ierr )
1643  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1644 *
1645 * Generate Q in U
1646 * (CWorkspace: need N+M, prefer N+M*NB)
1647 * (RWorkspace: 0)
1648 *
1649  CALL zungqr( m, m, n, u, ldu, work( itau ),
1650  $ work( iwork ), lwork-iwork+1, ierr )
1651  ie = 1
1652  itauq = itau
1653  itaup = itauq + n
1654  iwork = itaup + n
1655 *
1656 * Zero out below R in A
1657 *
1658  IF( n .GT. 1 ) THEN
1659  CALL zlaset( 'L', n-1, n-1, czero, czero,
1660  $ a( 2, 1 ), lda )
1661  END IF
1662 *
1663 * Bidiagonalize R in A
1664 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1665 * (RWorkspace: need N)
1666 *
1667  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1668  $ work( itauq ), work( itaup ),
1669  $ work( iwork ), lwork-iwork+1, ierr )
1670 *
1671 * Multiply Q in U by left bidiagonalizing vectors
1672 * in A
1673 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1674 * (RWorkspace: 0)
1675 *
1676  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1677  $ work( itauq ), u, ldu, work( iwork ),
1678  $ lwork-iwork+1, ierr )
1679  irwork = ie + n
1680 *
1681 * Perform bidiagonal QR iteration, computing left
1682 * singular vectors of A in U
1683 * (CWorkspace: 0)
1684 * (RWorkspace: need BDSPAC)
1685 *
1686  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1687  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1688  $ info )
1689 *
1690  END IF
1691 *
1692  ELSE IF( wntvo ) THEN
1693 *
1694 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1695 * M left singular vectors to be computed in U and
1696 * N right singular vectors to be overwritten on A
1697 *
1698  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1699 *
1700 * Sufficient workspace for a fast algorithm
1701 *
1702  iu = 1
1703  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1704 *
1705 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1706 *
1707  ldwrku = lda
1708  ir = iu + ldwrku*n
1709  ldwrkr = lda
1710  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1711 *
1712 * WORK(IU) is LDA by N and WORK(IR) is N by N
1713 *
1714  ldwrku = lda
1715  ir = iu + ldwrku*n
1716  ldwrkr = n
1717  ELSE
1718 *
1719 * WORK(IU) is N by N and WORK(IR) is N by N
1720 *
1721  ldwrku = n
1722  ir = iu + ldwrku*n
1723  ldwrkr = n
1724  END IF
1725  itau = ir + ldwrkr*n
1726  iwork = itau + n
1727 *
1728 * Compute A=Q*R, copying result to U
1729 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1730 * (RWorkspace: 0)
1731 *
1732  CALL zgeqrf( m, n, a, lda, work( itau ),
1733  $ work( iwork ), lwork-iwork+1, ierr )
1734  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1735 *
1736 * Generate Q in U
1737 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1738 * (RWorkspace: 0)
1739 *
1740  CALL zungqr( m, m, n, u, ldu, work( itau ),
1741  $ work( iwork ), lwork-iwork+1, ierr )
1742 *
1743 * Copy R to WORK(IU), zeroing out below it
1744 *
1745  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1746  $ ldwrku )
1747  CALL zlaset( 'L', n-1, n-1, czero, czero,
1748  $ work( iu+1 ), ldwrku )
1749  ie = 1
1750  itauq = itau
1751  itaup = itauq + n
1752  iwork = itaup + n
1753 *
1754 * Bidiagonalize R in WORK(IU), copying result to
1755 * WORK(IR)
1756 * (CWorkspace: need 2*N*N+3*N,
1757 * prefer 2*N*N+2*N+2*N*NB)
1758 * (RWorkspace: need N)
1759 *
1760  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1761  $ rwork( ie ), work( itauq ),
1762  $ work( itaup ), work( iwork ),
1763  $ lwork-iwork+1, ierr )
1764  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1765  $ work( ir ), ldwrkr )
1766 *
1767 * Generate left bidiagonalizing vectors in WORK(IU)
1768 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1769 * (RWorkspace: 0)
1770 *
1771  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1772  $ work( itauq ), work( iwork ),
1773  $ lwork-iwork+1, ierr )
1774 *
1775 * Generate right bidiagonalizing vectors in WORK(IR)
1776 * (CWorkspace: need 2*N*N+3*N-1,
1777 * prefer 2*N*N+2*N+(N-1)*NB)
1778 * (RWorkspace: 0)
1779 *
1780  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1781  $ work( itaup ), work( iwork ),
1782  $ lwork-iwork+1, ierr )
1783  irwork = ie + n
1784 *
1785 * Perform bidiagonal QR iteration, computing left
1786 * singular vectors of R in WORK(IU) and computing
1787 * right singular vectors of R in WORK(IR)
1788 * (CWorkspace: need 2*N*N)
1789 * (RWorkspace: need BDSPAC)
1790 *
1791  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1792  $ work( ir ), ldwrkr, work( iu ),
1793  $ ldwrku, cdum, 1, rwork( irwork ),
1794  $ info )
1795 *
1796 * Multiply Q in U by left singular vectors of R in
1797 * WORK(IU), storing result in A
1798 * (CWorkspace: need N*N)
1799 * (RWorkspace: 0)
1800 *
1801  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1802  $ work( iu ), ldwrku, czero, a, lda )
1803 *
1804 * Copy left singular vectors of A from A to U
1805 *
1806  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1807 *
1808 * Copy right singular vectors of R from WORK(IR) to A
1809 *
1810  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1811  $ lda )
1812 *
1813  ELSE
1814 *
1815 * Insufficient workspace for a fast algorithm
1816 *
1817  itau = 1
1818  iwork = itau + n
1819 *
1820 * Compute A=Q*R, copying result to U
1821 * (CWorkspace: need 2*N, prefer N+N*NB)
1822 * (RWorkspace: 0)
1823 *
1824  CALL zgeqrf( m, n, a, lda, work( itau ),
1825  $ work( iwork ), lwork-iwork+1, ierr )
1826  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1827 *
1828 * Generate Q in U
1829 * (CWorkspace: need N+M, prefer N+M*NB)
1830 * (RWorkspace: 0)
1831 *
1832  CALL zungqr( m, m, n, u, ldu, work( itau ),
1833  $ work( iwork ), lwork-iwork+1, ierr )
1834  ie = 1
1835  itauq = itau
1836  itaup = itauq + n
1837  iwork = itaup + n
1838 *
1839 * Zero out below R in A
1840 *
1841  IF( n .GT. 1 ) THEN
1842  CALL zlaset( 'L', n-1, n-1, czero, czero,
1843  $ a( 2, 1 ), lda )
1844  END IF
1845 *
1846 * Bidiagonalize R in A
1847 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1848 * (RWorkspace: need N)
1849 *
1850  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1851  $ work( itauq ), work( itaup ),
1852  $ work( iwork ), lwork-iwork+1, ierr )
1853 *
1854 * Multiply Q in U by left bidiagonalizing vectors
1855 * in A
1856 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1857 * (RWorkspace: 0)
1858 *
1859  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1860  $ work( itauq ), u, ldu, work( iwork ),
1861  $ lwork-iwork+1, ierr )
1862 *
1863 * Generate right bidiagonalizing vectors in A
1864 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1865 * (RWorkspace: 0)
1866 *
1867  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1868  $ work( iwork ), lwork-iwork+1, ierr )
1869  irwork = ie + n
1870 *
1871 * Perform bidiagonal QR iteration, computing left
1872 * singular vectors of A in U and computing right
1873 * singular vectors of A in A
1874 * (CWorkspace: 0)
1875 * (RWorkspace: need BDSPAC)
1876 *
1877  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1878  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1879  $ info )
1880 *
1881  END IF
1882 *
1883  ELSE IF( wntvas ) THEN
1884 *
1885 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1886 * or 'A')
1887 * M left singular vectors to be computed in U and
1888 * N right singular vectors to be computed in VT
1889 *
1890  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1891 *
1892 * Sufficient workspace for a fast algorithm
1893 *
1894  iu = 1
1895  IF( lwork.GE.wrkbl+lda*n ) THEN
1896 *
1897 * WORK(IU) is LDA by N
1898 *
1899  ldwrku = lda
1900  ELSE
1901 *
1902 * WORK(IU) is N by N
1903 *
1904  ldwrku = n
1905  END IF
1906  itau = iu + ldwrku*n
1907  iwork = itau + n
1908 *
1909 * Compute A=Q*R, copying result to U
1910 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1911 * (RWorkspace: 0)
1912 *
1913  CALL zgeqrf( m, n, a, lda, work( itau ),
1914  $ work( iwork ), lwork-iwork+1, ierr )
1915  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1916 *
1917 * Generate Q in U
1918 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1919 * (RWorkspace: 0)
1920 *
1921  CALL zungqr( m, m, n, u, ldu, work( itau ),
1922  $ work( iwork ), lwork-iwork+1, ierr )
1923 *
1924 * Copy R to WORK(IU), zeroing out below it
1925 *
1926  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1927  $ ldwrku )
1928  CALL zlaset( 'L', n-1, n-1, czero, czero,
1929  $ work( iu+1 ), ldwrku )
1930  ie = 1
1931  itauq = itau
1932  itaup = itauq + n
1933  iwork = itaup + n
1934 *
1935 * Bidiagonalize R in WORK(IU), copying result to VT
1936 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1937 * (RWorkspace: need N)
1938 *
1939  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1940  $ rwork( ie ), work( itauq ),
1941  $ work( itaup ), work( iwork ),
1942  $ lwork-iwork+1, ierr )
1943  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1944  $ ldvt )
1945 *
1946 * Generate left bidiagonalizing vectors in WORK(IU)
1947 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1948 * (RWorkspace: 0)
1949 *
1950  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1951  $ work( itauq ), work( iwork ),
1952  $ lwork-iwork+1, ierr )
1953 *
1954 * Generate right bidiagonalizing vectors in VT
1955 * (CWorkspace: need N*N+3*N-1,
1956 * prefer N*N+2*N+(N-1)*NB)
1957 * (RWorkspace: need 0)
1958 *
1959  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1960  $ work( iwork ), lwork-iwork+1, ierr )
1961  irwork = ie + n
1962 *
1963 * Perform bidiagonal QR iteration, computing left
1964 * singular vectors of R in WORK(IU) and computing
1965 * right singular vectors of R in VT
1966 * (CWorkspace: need N*N)
1967 * (RWorkspace: need BDSPAC)
1968 *
1969  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1970  $ ldvt, work( iu ), ldwrku, cdum, 1,
1971  $ rwork( irwork ), info )
1972 *
1973 * Multiply Q in U by left singular vectors of R in
1974 * WORK(IU), storing result in A
1975 * (CWorkspace: need N*N)
1976 * (RWorkspace: 0)
1977 *
1978  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1979  $ work( iu ), ldwrku, czero, a, lda )
1980 *
1981 * Copy left singular vectors of A from A to U
1982 *
1983  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1984 *
1985  ELSE
1986 *
1987 * Insufficient workspace for a fast algorithm
1988 *
1989  itau = 1
1990  iwork = itau + n
1991 *
1992 * Compute A=Q*R, copying result to U
1993 * (CWorkspace: need 2*N, prefer N+N*NB)
1994 * (RWorkspace: 0)
1995 *
1996  CALL zgeqrf( m, n, a, lda, work( itau ),
1997  $ work( iwork ), lwork-iwork+1, ierr )
1998  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1999 *
2000 * Generate Q in U
2001 * (CWorkspace: need N+M, prefer N+M*NB)
2002 * (RWorkspace: 0)
2003 *
2004  CALL zungqr( m, m, n, u, ldu, work( itau ),
2005  $ work( iwork ), lwork-iwork+1, ierr )
2006 *
2007 * Copy R from A to VT, zeroing out below it
2008 *
2009  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2010  IF( n.GT.1 )
2011  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
2012  $ vt( 2, 1 ), ldvt )
2013  ie = 1
2014  itauq = itau
2015  itaup = itauq + n
2016  iwork = itaup + n
2017 *
2018 * Bidiagonalize R in VT
2019 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2020 * (RWorkspace: need N)
2021 *
2022  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
2023  $ work( itauq ), work( itaup ),
2024  $ work( iwork ), lwork-iwork+1, ierr )
2025 *
2026 * Multiply Q in U by left bidiagonalizing vectors
2027 * in VT
2028 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2029 * (RWorkspace: 0)
2030 *
2031  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2032  $ work( itauq ), u, ldu, work( iwork ),
2033  $ lwork-iwork+1, ierr )
2034 *
2035 * Generate right bidiagonalizing vectors in VT
2036 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2037 * (RWorkspace: 0)
2038 *
2039  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2040  $ work( iwork ), lwork-iwork+1, ierr )
2041  irwork = ie + n
2042 *
2043 * Perform bidiagonal QR iteration, computing left
2044 * singular vectors of A in U and computing right
2045 * singular vectors of A in VT
2046 * (CWorkspace: 0)
2047 * (RWorkspace: need BDSPAC)
2048 *
2049  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2050  $ ldvt, u, ldu, cdum, 1,
2051  $ rwork( irwork ), info )
2052 *
2053  END IF
2054 *
2055  END IF
2056 *
2057  END IF
2058 *
2059  ELSE
2060 *
2061 * M .LT. MNTHR
2062 *
2063 * Path 10 (M at least N, but not much larger)
2064 * Reduce to bidiagonal form without QR decomposition
2065 *
2066  ie = 1
2067  itauq = 1
2068  itaup = itauq + n
2069  iwork = itaup + n
2070 *
2071 * Bidiagonalize A
2072 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2073 * (RWorkspace: need N)
2074 *
2075  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2076  $ work( itaup ), work( iwork ), lwork-iwork+1,
2077  $ ierr )
2078  IF( wntuas ) THEN
2079 *
2080 * If left singular vectors desired in U, copy result to U
2081 * and generate left bidiagonalizing vectors in U
2082 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2083 * (RWorkspace: 0)
2084 *
2085  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
2086  IF( wntus )
2087  $ ncu = n
2088  IF( wntua )
2089  $ ncu = m
2090  CALL zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2091  $ work( iwork ), lwork-iwork+1, ierr )
2092  END IF
2093  IF( wntvas ) THEN
2094 *
2095 * If right singular vectors desired in VT, copy result to
2096 * VT and generate right bidiagonalizing vectors in VT
2097 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2098 * (RWorkspace: 0)
2099 *
2100  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2101  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2102  $ work( iwork ), lwork-iwork+1, ierr )
2103  END IF
2104  IF( wntuo ) THEN
2105 *
2106 * If left singular vectors desired in A, generate left
2107 * bidiagonalizing vectors in A
2108 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2109 * (RWorkspace: 0)
2110 *
2111  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
2112  $ work( iwork ), lwork-iwork+1, ierr )
2113  END IF
2114  IF( wntvo ) THEN
2115 *
2116 * If right singular vectors desired in A, generate right
2117 * bidiagonalizing vectors in A
2118 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2119 * (RWorkspace: 0)
2120 *
2121  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
2122  $ work( iwork ), lwork-iwork+1, ierr )
2123  END IF
2124  irwork = ie + n
2125  IF( wntuas .OR. wntuo )
2126  $ nru = m
2127  IF( wntun )
2128  $ nru = 0
2129  IF( wntvas .OR. wntvo )
2130  $ ncvt = n
2131  IF( wntvn )
2132  $ ncvt = 0
2133  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2134 *
2135 * Perform bidiagonal QR iteration, if desired, computing
2136 * left singular vectors in U and computing right singular
2137 * vectors in VT
2138 * (CWorkspace: 0)
2139 * (RWorkspace: need BDSPAC)
2140 *
2141  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2142  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2143  $ info )
2144  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2145 *
2146 * Perform bidiagonal QR iteration, if desired, computing
2147 * left singular vectors in U and computing right singular
2148 * vectors in A
2149 * (CWorkspace: 0)
2150 * (RWorkspace: need BDSPAC)
2151 *
2152  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2153  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2154  $ info )
2155  ELSE
2156 *
2157 * Perform bidiagonal QR iteration, if desired, computing
2158 * left singular vectors in A and computing right singular
2159 * vectors in VT
2160 * (CWorkspace: 0)
2161 * (RWorkspace: need BDSPAC)
2162 *
2163  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2164  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2165  $ info )
2166  END IF
2167 *
2168  END IF
2169 *
2170  ELSE
2171 *
2172 * A has more columns than rows. If A has sufficiently more
2173 * columns than rows, first reduce using the LQ decomposition (if
2174 * sufficient workspace available)
2175 *
2176  IF( n.GE.mnthr ) THEN
2177 *
2178  IF( wntvn ) THEN
2179 *
2180 * Path 1t(N much larger than M, JOBVT='N')
2181 * No right singular vectors to be computed
2182 *
2183  itau = 1
2184  iwork = itau + m
2185 *
2186 * Compute A=L*Q
2187 * (CWorkspace: need 2*M, prefer M+M*NB)
2188 * (RWorkspace: 0)
2189 *
2190  CALL zgelqf( m, n, a, lda, work( itau ), work( iwork ),
2191  $ lwork-iwork+1, ierr )
2192 *
2193 * Zero out above L
2194 *
2195  CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2196  $ lda )
2197  ie = 1
2198  itauq = 1
2199  itaup = itauq + m
2200  iwork = itaup + m
2201 *
2202 * Bidiagonalize L in A
2203 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2204 * (RWorkspace: need M)
2205 *
2206  CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2207  $ work( itaup ), work( iwork ), lwork-iwork+1,
2208  $ ierr )
2209  IF( wntuo .OR. wntuas ) THEN
2210 *
2211 * If left singular vectors desired, generate Q
2212 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2213 * (RWorkspace: 0)
2214 *
2215  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2216  $ work( iwork ), lwork-iwork+1, ierr )
2217  END IF
2218  irwork = ie + m
2219  nru = 0
2220  IF( wntuo .OR. wntuas )
2221  $ nru = m
2222 *
2223 * Perform bidiagonal QR iteration, computing left singular
2224 * vectors of A in A if desired
2225 * (CWorkspace: 0)
2226 * (RWorkspace: need BDSPAC)
2227 *
2228  CALL zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2229  $ a, lda, cdum, 1, rwork( irwork ), info )
2230 *
2231 * If left singular vectors desired in U, copy them there
2232 *
2233  IF( wntuas )
2234  $ CALL zlacpy( 'F', m, m, a, lda, u, ldu )
2235 *
2236  ELSE IF( wntvo .AND. wntun ) THEN
2237 *
2238 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2239 * M right singular vectors to be overwritten on A and
2240 * no left singular vectors to be computed
2241 *
2242  IF( lwork.GE.m*m+3*m ) THEN
2243 *
2244 * Sufficient workspace for a fast algorithm
2245 *
2246  ir = 1
2247  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2248 *
2249 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2250 *
2251  ldwrku = lda
2252  chunk = n
2253  ldwrkr = lda
2254  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2255 *
2256 * WORK(IU) is LDA by N and WORK(IR) is M by M
2257 *
2258  ldwrku = lda
2259  chunk = n
2260  ldwrkr = m
2261  ELSE
2262 *
2263 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2264 *
2265  ldwrku = m
2266  chunk = ( lwork-m*m ) / m
2267  ldwrkr = m
2268  END IF
2269  itau = ir + ldwrkr*m
2270  iwork = itau + m
2271 *
2272 * Compute A=L*Q
2273 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2274 * (RWorkspace: 0)
2275 *
2276  CALL zgelqf( m, n, a, lda, work( itau ),
2277  $ work( iwork ), lwork-iwork+1, ierr )
2278 *
2279 * Copy L to WORK(IR) and zero out above it
2280 *
2281  CALL zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2282  CALL zlaset( 'U', m-1, m-1, czero, czero,
2283  $ work( ir+ldwrkr ), ldwrkr )
2284 *
2285 * Generate Q in A
2286 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2287 * (RWorkspace: 0)
2288 *
2289  CALL zunglq( m, n, m, a, lda, work( itau ),
2290  $ work( iwork ), lwork-iwork+1, ierr )
2291  ie = 1
2292  itauq = itau
2293  itaup = itauq + m
2294  iwork = itaup + m
2295 *
2296 * Bidiagonalize L in WORK(IR)
2297 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2298 * (RWorkspace: need M)
2299 *
2300  CALL zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2301  $ work( itauq ), work( itaup ),
2302  $ work( iwork ), lwork-iwork+1, ierr )
2303 *
2304 * Generate right vectors bidiagonalizing L
2305 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2306 * (RWorkspace: 0)
2307 *
2308  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2309  $ work( itaup ), work( iwork ),
2310  $ lwork-iwork+1, ierr )
2311  irwork = ie + m
2312 *
2313 * Perform bidiagonal QR iteration, computing right
2314 * singular vectors of L in WORK(IR)
2315 * (CWorkspace: need M*M)
2316 * (RWorkspace: need BDSPAC)
2317 *
2318  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2319  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2320  $ rwork( irwork ), info )
2321  iu = itauq
2322 *
2323 * Multiply right singular vectors of L in WORK(IR) by Q
2324 * in A, storing result in WORK(IU) and copying to A
2325 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2326 * (RWorkspace: 0)
2327 *
2328  DO 30 i = 1, n, chunk
2329  blk = min( n-i+1, chunk )
2330  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2331  $ ldwrkr, a( 1, i ), lda, czero,
2332  $ work( iu ), ldwrku )
2333  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2334  $ a( 1, i ), lda )
2335  30 CONTINUE
2336 *
2337  ELSE
2338 *
2339 * Insufficient workspace for a fast algorithm
2340 *
2341  ie = 1
2342  itauq = 1
2343  itaup = itauq + m
2344  iwork = itaup + m
2345 *
2346 * Bidiagonalize A
2347 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2348 * (RWorkspace: need M)
2349 *
2350  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
2351  $ work( itauq ), work( itaup ),
2352  $ work( iwork ), lwork-iwork+1, ierr )
2353 *
2354 * Generate right vectors bidiagonalizing A
2355 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2356 * (RWorkspace: 0)
2357 *
2358  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
2359  $ work( iwork ), lwork-iwork+1, ierr )
2360  irwork = ie + m
2361 *
2362 * Perform bidiagonal QR iteration, computing right
2363 * singular vectors of A in A
2364 * (CWorkspace: 0)
2365 * (RWorkspace: need BDSPAC)
2366 *
2367  CALL zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2368  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2369 *
2370  END IF
2371 *
2372  ELSE IF( wntvo .AND. wntuas ) THEN
2373 *
2374 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2375 * M right singular vectors to be overwritten on A and
2376 * M left singular vectors to be computed in U
2377 *
2378  IF( lwork.GE.m*m+3*m ) THEN
2379 *
2380 * Sufficient workspace for a fast algorithm
2381 *
2382  ir = 1
2383  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2384 *
2385 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2386 *
2387  ldwrku = lda
2388  chunk = n
2389  ldwrkr = lda
2390  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2391 *
2392 * WORK(IU) is LDA by N and WORK(IR) is M by M
2393 *
2394  ldwrku = lda
2395  chunk = n
2396  ldwrkr = m
2397  ELSE
2398 *
2399 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2400 *
2401  ldwrku = m
2402  chunk = ( lwork-m*m ) / m
2403  ldwrkr = m
2404  END IF
2405  itau = ir + ldwrkr*m
2406  iwork = itau + m
2407 *
2408 * Compute A=L*Q
2409 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2410 * (RWorkspace: 0)
2411 *
2412  CALL zgelqf( m, n, a, lda, work( itau ),
2413  $ work( iwork ), lwork-iwork+1, ierr )
2414 *
2415 * Copy L to U, zeroing about above it
2416 *
2417  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2418  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2419  $ ldu )
2420 *
2421 * Generate Q in A
2422 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2423 * (RWorkspace: 0)
2424 *
2425  CALL zunglq( m, n, m, a, lda, work( itau ),
2426  $ work( iwork ), lwork-iwork+1, ierr )
2427  ie = 1
2428  itauq = itau
2429  itaup = itauq + m
2430  iwork = itaup + m
2431 *
2432 * Bidiagonalize L in U, copying result to WORK(IR)
2433 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2434 * (RWorkspace: need M)
2435 *
2436  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2437  $ work( itauq ), work( itaup ),
2438  $ work( iwork ), lwork-iwork+1, ierr )
2439  CALL zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2440 *
2441 * Generate right vectors bidiagonalizing L in WORK(IR)
2442 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2443 * (RWorkspace: 0)
2444 *
2445  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2446  $ work( itaup ), work( iwork ),
2447  $ lwork-iwork+1, ierr )
2448 *
2449 * Generate left vectors bidiagonalizing L in U
2450 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2451 * (RWorkspace: 0)
2452 *
2453  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2454  $ work( iwork ), lwork-iwork+1, ierr )
2455  irwork = ie + m
2456 *
2457 * Perform bidiagonal QR iteration, computing left
2458 * singular vectors of L in U, and computing right
2459 * singular vectors of L in WORK(IR)
2460 * (CWorkspace: need M*M)
2461 * (RWorkspace: need BDSPAC)
2462 *
2463  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2464  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2465  $ rwork( irwork ), info )
2466  iu = itauq
2467 *
2468 * Multiply right singular vectors of L in WORK(IR) by Q
2469 * in A, storing result in WORK(IU) and copying to A
2470 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2471 * (RWorkspace: 0)
2472 *
2473  DO 40 i = 1, n, chunk
2474  blk = min( n-i+1, chunk )
2475  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2476  $ ldwrkr, a( 1, i ), lda, czero,
2477  $ work( iu ), ldwrku )
2478  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2479  $ a( 1, i ), lda )
2480  40 CONTINUE
2481 *
2482  ELSE
2483 *
2484 * Insufficient workspace for a fast algorithm
2485 *
2486  itau = 1
2487  iwork = itau + m
2488 *
2489 * Compute A=L*Q
2490 * (CWorkspace: need 2*M, prefer M+M*NB)
2491 * (RWorkspace: 0)
2492 *
2493  CALL zgelqf( m, n, a, lda, work( itau ),
2494  $ work( iwork ), lwork-iwork+1, ierr )
2495 *
2496 * Copy L to U, zeroing out above it
2497 *
2498  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2499  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2500  $ ldu )
2501 *
2502 * Generate Q in A
2503 * (CWorkspace: need 2*M, prefer M+M*NB)
2504 * (RWorkspace: 0)
2505 *
2506  CALL zunglq( m, n, m, a, lda, work( itau ),
2507  $ work( iwork ), lwork-iwork+1, ierr )
2508  ie = 1
2509  itauq = itau
2510  itaup = itauq + m
2511  iwork = itaup + m
2512 *
2513 * Bidiagonalize L in U
2514 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2515 * (RWorkspace: need M)
2516 *
2517  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2518  $ work( itauq ), work( itaup ),
2519  $ work( iwork ), lwork-iwork+1, ierr )
2520 *
2521 * Multiply right vectors bidiagonalizing L by Q in A
2522 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2523 * (RWorkspace: 0)
2524 *
2525  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2526  $ work( itaup ), a, lda, work( iwork ),
2527  $ lwork-iwork+1, ierr )
2528 *
2529 * Generate left vectors bidiagonalizing L in U
2530 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2531 * (RWorkspace: 0)
2532 *
2533  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2534  $ work( iwork ), lwork-iwork+1, ierr )
2535  irwork = ie + m
2536 *
2537 * Perform bidiagonal QR iteration, computing left
2538 * singular vectors of A in U and computing right
2539 * singular vectors of A in A
2540 * (CWorkspace: 0)
2541 * (RWorkspace: need BDSPAC)
2542 *
2543  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2544  $ u, ldu, cdum, 1, rwork( irwork ), info )
2545 *
2546  END IF
2547 *
2548  ELSE IF( wntvs ) THEN
2549 *
2550  IF( wntun ) THEN
2551 *
2552 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2553 * M right singular vectors to be computed in VT and
2554 * no left singular vectors to be computed
2555 *
2556  IF( lwork.GE.m*m+3*m ) THEN
2557 *
2558 * Sufficient workspace for a fast algorithm
2559 *
2560  ir = 1
2561  IF( lwork.GE.wrkbl+lda*m ) THEN
2562 *
2563 * WORK(IR) is LDA by M
2564 *
2565  ldwrkr = lda
2566  ELSE
2567 *
2568 * WORK(IR) is M by M
2569 *
2570  ldwrkr = m
2571  END IF
2572  itau = ir + ldwrkr*m
2573  iwork = itau + m
2574 *
2575 * Compute A=L*Q
2576 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2577 * (RWorkspace: 0)
2578 *
2579  CALL zgelqf( m, n, a, lda, work( itau ),
2580  $ work( iwork ), lwork-iwork+1, ierr )
2581 *
2582 * Copy L to WORK(IR), zeroing out above it
2583 *
2584  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
2585  $ ldwrkr )
2586  CALL zlaset( 'U', m-1, m-1, czero, czero,
2587  $ work( ir+ldwrkr ), ldwrkr )
2588 *
2589 * Generate Q in A
2590 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2591 * (RWorkspace: 0)
2592 *
2593  CALL zunglq( m, n, m, a, lda, work( itau ),
2594  $ work( iwork ), lwork-iwork+1, ierr )
2595  ie = 1
2596  itauq = itau
2597  itaup = itauq + m
2598  iwork = itaup + m
2599 *
2600 * Bidiagonalize L in WORK(IR)
2601 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2602 * (RWorkspace: need M)
2603 *
2604  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
2605  $ rwork( ie ), work( itauq ),
2606  $ work( itaup ), work( iwork ),
2607  $ lwork-iwork+1, ierr )
2608 *
2609 * Generate right vectors bidiagonalizing L in
2610 * WORK(IR)
2611 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2612 * (RWorkspace: 0)
2613 *
2614  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2615  $ work( itaup ), work( iwork ),
2616  $ lwork-iwork+1, ierr )
2617  irwork = ie + m
2618 *
2619 * Perform bidiagonal QR iteration, computing right
2620 * singular vectors of L in WORK(IR)
2621 * (CWorkspace: need M*M)
2622 * (RWorkspace: need BDSPAC)
2623 *
2624  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2625  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2626  $ rwork( irwork ), info )
2627 *
2628 * Multiply right singular vectors of L in WORK(IR) by
2629 * Q in A, storing result in VT
2630 * (CWorkspace: need M*M)
2631 * (RWorkspace: 0)
2632 *
2633  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
2634  $ ldwrkr, a, lda, czero, vt, ldvt )
2635 *
2636  ELSE
2637 *
2638 * Insufficient workspace for a fast algorithm
2639 *
2640  itau = 1
2641  iwork = itau + m
2642 *
2643 * Compute A=L*Q
2644 * (CWorkspace: need 2*M, prefer M+M*NB)
2645 * (RWorkspace: 0)
2646 *
2647  CALL zgelqf( m, n, a, lda, work( itau ),
2648  $ work( iwork ), lwork-iwork+1, ierr )
2649 *
2650 * Copy result to VT
2651 *
2652  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2653 *
2654 * Generate Q in VT
2655 * (CWorkspace: need 2*M, prefer M+M*NB)
2656 * (RWorkspace: 0)
2657 *
2658  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2659  $ work( iwork ), lwork-iwork+1, ierr )
2660  ie = 1
2661  itauq = itau
2662  itaup = itauq + m
2663  iwork = itaup + m
2664 *
2665 * Zero out above L in A
2666 *
2667  CALL zlaset( 'U', m-1, m-1, czero, czero,
2668  $ a( 1, 2 ), lda )
2669 *
2670 * Bidiagonalize L in A
2671 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2672 * (RWorkspace: need M)
2673 *
2674  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2675  $ work( itauq ), work( itaup ),
2676  $ work( iwork ), lwork-iwork+1, ierr )
2677 *
2678 * Multiply right vectors bidiagonalizing L by Q in VT
2679 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2680 * (RWorkspace: 0)
2681 *
2682  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2683  $ work( itaup ), vt, ldvt,
2684  $ work( iwork ), lwork-iwork+1, ierr )
2685  irwork = ie + m
2686 *
2687 * Perform bidiagonal QR iteration, computing right
2688 * singular vectors of A in VT
2689 * (CWorkspace: 0)
2690 * (RWorkspace: need BDSPAC)
2691 *
2692  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2693  $ ldvt, cdum, 1, cdum, 1,
2694  $ rwork( irwork ), info )
2695 *
2696  END IF
2697 *
2698  ELSE IF( wntuo ) THEN
2699 *
2700 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2701 * M right singular vectors to be computed in VT and
2702 * M left singular vectors to be overwritten on A
2703 *
2704  IF( lwork.GE.2*m*m+3*m ) THEN
2705 *
2706 * Sufficient workspace for a fast algorithm
2707 *
2708  iu = 1
2709  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2710 *
2711 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2712 *
2713  ldwrku = lda
2714  ir = iu + ldwrku*m
2715  ldwrkr = lda
2716  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2717 *
2718 * WORK(IU) is LDA by M and WORK(IR) is M by M
2719 *
2720  ldwrku = lda
2721  ir = iu + ldwrku*m
2722  ldwrkr = m
2723  ELSE
2724 *
2725 * WORK(IU) is M by M and WORK(IR) is M by M
2726 *
2727  ldwrku = m
2728  ir = iu + ldwrku*m
2729  ldwrkr = m
2730  END IF
2731  itau = ir + ldwrkr*m
2732  iwork = itau + m
2733 *
2734 * Compute A=L*Q
2735 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2736 * (RWorkspace: 0)
2737 *
2738  CALL zgelqf( m, n, a, lda, work( itau ),
2739  $ work( iwork ), lwork-iwork+1, ierr )
2740 *
2741 * Copy L to WORK(IU), zeroing out below it
2742 *
2743  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2744  $ ldwrku )
2745  CALL zlaset( 'U', m-1, m-1, czero, czero,
2746  $ work( iu+ldwrku ), ldwrku )
2747 *
2748 * Generate Q in A
2749 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2750 * (RWorkspace: 0)
2751 *
2752  CALL zunglq( m, n, m, a, lda, work( itau ),
2753  $ work( iwork ), lwork-iwork+1, ierr )
2754  ie = 1
2755  itauq = itau
2756  itaup = itauq + m
2757  iwork = itaup + m
2758 *
2759 * Bidiagonalize L in WORK(IU), copying result to
2760 * WORK(IR)
2761 * (CWorkspace: need 2*M*M+3*M,
2762 * prefer 2*M*M+2*M+2*M*NB)
2763 * (RWorkspace: need M)
2764 *
2765  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2766  $ rwork( ie ), work( itauq ),
2767  $ work( itaup ), work( iwork ),
2768  $ lwork-iwork+1, ierr )
2769  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
2770  $ work( ir ), ldwrkr )
2771 *
2772 * Generate right bidiagonalizing vectors in WORK(IU)
2773 * (CWorkspace: need 2*M*M+3*M-1,
2774 * prefer 2*M*M+2*M+(M-1)*NB)
2775 * (RWorkspace: 0)
2776 *
2777  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2778  $ work( itaup ), work( iwork ),
2779  $ lwork-iwork+1, ierr )
2780 *
2781 * Generate left bidiagonalizing vectors in WORK(IR)
2782 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2783 * (RWorkspace: 0)
2784 *
2785  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2786  $ work( itauq ), work( iwork ),
2787  $ lwork-iwork+1, ierr )
2788  irwork = ie + m
2789 *
2790 * Perform bidiagonal QR iteration, computing left
2791 * singular vectors of L in WORK(IR) and computing
2792 * right singular vectors of L in WORK(IU)
2793 * (CWorkspace: need 2*M*M)
2794 * (RWorkspace: need BDSPAC)
2795 *
2796  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2797  $ work( iu ), ldwrku, work( ir ),
2798  $ ldwrkr, cdum, 1, rwork( irwork ),
2799  $ info )
2800 *
2801 * Multiply right singular vectors of L in WORK(IU) by
2802 * Q in A, storing result in VT
2803 * (CWorkspace: need M*M)
2804 * (RWorkspace: 0)
2805 *
2806  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2807  $ ldwrku, a, lda, czero, vt, ldvt )
2808 *
2809 * Copy left singular vectors of L to A
2810 * (CWorkspace: need M*M)
2811 * (RWorkspace: 0)
2812 *
2813  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2814  $ lda )
2815 *
2816  ELSE
2817 *
2818 * Insufficient workspace for a fast algorithm
2819 *
2820  itau = 1
2821  iwork = itau + m
2822 *
2823 * Compute A=L*Q, copying result to VT
2824 * (CWorkspace: need 2*M, prefer M+M*NB)
2825 * (RWorkspace: 0)
2826 *
2827  CALL zgelqf( m, n, a, lda, work( itau ),
2828  $ work( iwork ), lwork-iwork+1, ierr )
2829  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2830 *
2831 * Generate Q in VT
2832 * (CWorkspace: need 2*M, prefer M+M*NB)
2833 * (RWorkspace: 0)
2834 *
2835  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2836  $ work( iwork ), lwork-iwork+1, ierr )
2837  ie = 1
2838  itauq = itau
2839  itaup = itauq + m
2840  iwork = itaup + m
2841 *
2842 * Zero out above L in A
2843 *
2844  CALL zlaset( 'U', m-1, m-1, czero, czero,
2845  $ a( 1, 2 ), lda )
2846 *
2847 * Bidiagonalize L in A
2848 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2849 * (RWorkspace: need M)
2850 *
2851  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2852  $ work( itauq ), work( itaup ),
2853  $ work( iwork ), lwork-iwork+1, ierr )
2854 *
2855 * Multiply right vectors bidiagonalizing L by Q in VT
2856 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2857 * (RWorkspace: 0)
2858 *
2859  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2860  $ work( itaup ), vt, ldvt,
2861  $ work( iwork ), lwork-iwork+1, ierr )
2862 *
2863 * Generate left bidiagonalizing vectors of L in A
2864 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2865 * (RWorkspace: 0)
2866 *
2867  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2868  $ work( iwork ), lwork-iwork+1, ierr )
2869  irwork = ie + m
2870 *
2871 * Perform bidiagonal QR iteration, computing left
2872 * singular vectors of A in A and computing right
2873 * singular vectors of A in VT
2874 * (CWorkspace: 0)
2875 * (RWorkspace: need BDSPAC)
2876 *
2877  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2878  $ ldvt, a, lda, cdum, 1,
2879  $ rwork( irwork ), info )
2880 *
2881  END IF
2882 *
2883  ELSE IF( wntuas ) THEN
2884 *
2885 * Path 6t(N much larger than M, JOBU='S' or 'A',
2886 * JOBVT='S')
2887 * M right singular vectors to be computed in VT and
2888 * M left singular vectors to be computed in U
2889 *
2890  IF( lwork.GE.m*m+3*m ) THEN
2891 *
2892 * Sufficient workspace for a fast algorithm
2893 *
2894  iu = 1
2895  IF( lwork.GE.wrkbl+lda*m ) THEN
2896 *
2897 * WORK(IU) is LDA by N
2898 *
2899  ldwrku = lda
2900  ELSE
2901 *
2902 * WORK(IU) is LDA by M
2903 *
2904  ldwrku = m
2905  END IF
2906  itau = iu + ldwrku*m
2907  iwork = itau + m
2908 *
2909 * Compute A=L*Q
2910 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2911 * (RWorkspace: 0)
2912 *
2913  CALL zgelqf( m, n, a, lda, work( itau ),
2914  $ work( iwork ), lwork-iwork+1, ierr )
2915 *
2916 * Copy L to WORK(IU), zeroing out above it
2917 *
2918  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2919  $ ldwrku )
2920  CALL zlaset( 'U', m-1, m-1, czero, czero,
2921  $ work( iu+ldwrku ), ldwrku )
2922 *
2923 * Generate Q in A
2924 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2925 * (RWorkspace: 0)
2926 *
2927  CALL zunglq( m, n, m, a, lda, work( itau ),
2928  $ work( iwork ), lwork-iwork+1, ierr )
2929  ie = 1
2930  itauq = itau
2931  itaup = itauq + m
2932  iwork = itaup + m
2933 *
2934 * Bidiagonalize L in WORK(IU), copying result to U
2935 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2936 * (RWorkspace: need M)
2937 *
2938  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2939  $ rwork( ie ), work( itauq ),
2940  $ work( itaup ), work( iwork ),
2941  $ lwork-iwork+1, ierr )
2942  CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
2943  $ ldu )
2944 *
2945 * Generate right bidiagonalizing vectors in WORK(IU)
2946 * (CWorkspace: need M*M+3*M-1,
2947 * prefer M*M+2*M+(M-1)*NB)
2948 * (RWorkspace: 0)
2949 *
2950  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2951  $ work( itaup ), work( iwork ),
2952  $ lwork-iwork+1, ierr )
2953 *
2954 * Generate left bidiagonalizing vectors in U
2955 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2956 * (RWorkspace: 0)
2957 *
2958  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2959  $ work( iwork ), lwork-iwork+1, ierr )
2960  irwork = ie + m
2961 *
2962 * Perform bidiagonal QR iteration, computing left
2963 * singular vectors of L in U and computing right
2964 * singular vectors of L in WORK(IU)
2965 * (CWorkspace: need M*M)
2966 * (RWorkspace: need BDSPAC)
2967 *
2968  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2969  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2970  $ rwork( irwork ), info )
2971 *
2972 * Multiply right singular vectors of L in WORK(IU) by
2973 * Q in A, storing result in VT
2974 * (CWorkspace: need M*M)
2975 * (RWorkspace: 0)
2976 *
2977  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2978  $ ldwrku, a, lda, czero, vt, ldvt )
2979 *
2980  ELSE
2981 *
2982 * Insufficient workspace for a fast algorithm
2983 *
2984  itau = 1
2985  iwork = itau + m
2986 *
2987 * Compute A=L*Q, copying result to VT
2988 * (CWorkspace: need 2*M, prefer M+M*NB)
2989 * (RWorkspace: 0)
2990 *
2991  CALL zgelqf( m, n, a, lda, work( itau ),
2992  $ work( iwork ), lwork-iwork+1, ierr )
2993  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2994 *
2995 * Generate Q in VT
2996 * (CWorkspace: need 2*M, prefer M+M*NB)
2997 * (RWorkspace: 0)
2998 *
2999  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
3000  $ work( iwork ), lwork-iwork+1, ierr )
3001 *
3002 * Copy L to U, zeroing out above it
3003 *
3004  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3005  CALL zlaset( 'U', m-1, m-1, czero, czero,
3006  $ u( 1, 2 ), ldu )
3007  ie = 1
3008  itauq = itau
3009  itaup = itauq + m
3010  iwork = itaup + m
3011 *
3012 * Bidiagonalize L in U
3013 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3014 * (RWorkspace: need M)
3015 *
3016  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3017  $ work( itauq ), work( itaup ),
3018  $ work( iwork ), lwork-iwork+1, ierr )
3019 *
3020 * Multiply right bidiagonalizing vectors in U by Q
3021 * in VT
3022 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3023 * (RWorkspace: 0)
3024 *
3025  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3026  $ work( itaup ), vt, ldvt,
3027  $ work( iwork ), lwork-iwork+1, ierr )
3028 *
3029 * Generate left bidiagonalizing vectors in U
3030 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3031 * (RWorkspace: 0)
3032 *
3033  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3034  $ work( iwork ), lwork-iwork+1, ierr )
3035  irwork = ie + m
3036 *
3037 * Perform bidiagonal QR iteration, computing left
3038 * singular vectors of A in U and computing right
3039 * singular vectors of A in VT
3040 * (CWorkspace: 0)
3041 * (RWorkspace: need BDSPAC)
3042 *
3043  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3044  $ ldvt, u, ldu, cdum, 1,
3045  $ rwork( irwork ), info )
3046 *
3047  END IF
3048 *
3049  END IF
3050 *
3051  ELSE IF( wntva ) THEN
3052 *
3053  IF( wntun ) THEN
3054 *
3055 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3056 * N right singular vectors to be computed in VT and
3057 * no left singular vectors to be computed
3058 *
3059  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3060 *
3061 * Sufficient workspace for a fast algorithm
3062 *
3063  ir = 1
3064  IF( lwork.GE.wrkbl+lda*m ) THEN
3065 *
3066 * WORK(IR) is LDA by M
3067 *
3068  ldwrkr = lda
3069  ELSE
3070 *
3071 * WORK(IR) is M by M
3072 *
3073  ldwrkr = m
3074  END IF
3075  itau = ir + ldwrkr*m
3076  iwork = itau + m
3077 *
3078 * Compute A=L*Q, copying result to VT
3079 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3080 * (RWorkspace: 0)
3081 *
3082  CALL zgelqf( m, n, a, lda, work( itau ),
3083  $ work( iwork ), lwork-iwork+1, ierr )
3084  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3085 *
3086 * Copy L to WORK(IR), zeroing out above it
3087 *
3088  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
3089  $ ldwrkr )
3090  CALL zlaset( 'U', m-1, m-1, czero, czero,
3091  $ work( ir+ldwrkr ), ldwrkr )
3092 *
3093 * Generate Q in VT
3094 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3095 * (RWorkspace: 0)
3096 *
3097  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3098  $ work( iwork ), lwork-iwork+1, ierr )
3099  ie = 1
3100  itauq = itau
3101  itaup = itauq + m
3102  iwork = itaup + m
3103 *
3104 * Bidiagonalize L in WORK(IR)
3105 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3106 * (RWorkspace: need M)
3107 *
3108  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
3109  $ rwork( ie ), work( itauq ),
3110  $ work( itaup ), work( iwork ),
3111  $ lwork-iwork+1, ierr )
3112 *
3113 * Generate right bidiagonalizing vectors in WORK(IR)
3114 * (CWorkspace: need M*M+3*M-1,
3115 * prefer M*M+2*M+(M-1)*NB)
3116 * (RWorkspace: 0)
3117 *
3118  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
3119  $ work( itaup ), work( iwork ),
3120  $ lwork-iwork+1, ierr )
3121  irwork = ie + m
3122 *
3123 * Perform bidiagonal QR iteration, computing right
3124 * singular vectors of L in WORK(IR)
3125 * (CWorkspace: need M*M)
3126 * (RWorkspace: need BDSPAC)
3127 *
3128  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3129  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3130  $ rwork( irwork ), info )
3131 *
3132 * Multiply right singular vectors of L in WORK(IR) by
3133 * Q in VT, storing result in A
3134 * (CWorkspace: need M*M)
3135 * (RWorkspace: 0)
3136 *
3137  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
3138  $ ldwrkr, vt, ldvt, czero, a, lda )
3139 *
3140 * Copy right singular vectors of A from A to VT
3141 *
3142  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3143 *
3144  ELSE
3145 *
3146 * Insufficient workspace for a fast algorithm
3147 *
3148  itau = 1
3149  iwork = itau + m
3150 *
3151 * Compute A=L*Q, copying result to VT
3152 * (CWorkspace: need 2*M, prefer M+M*NB)
3153 * (RWorkspace: 0)
3154 *
3155  CALL zgelqf( m, n, a, lda, work( itau ),
3156  $ work( iwork ), lwork-iwork+1, ierr )
3157  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3158 *
3159 * Generate Q in VT
3160 * (CWorkspace: need M+N, prefer M+N*NB)
3161 * (RWorkspace: 0)
3162 *
3163  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3164  $ work( iwork ), lwork-iwork+1, ierr )
3165  ie = 1
3166  itauq = itau
3167  itaup = itauq + m
3168  iwork = itaup + m
3169 *
3170 * Zero out above L in A
3171 *
3172  CALL zlaset( 'U', m-1, m-1, czero, czero,
3173  $ a( 1, 2 ), lda )
3174 *
3175 * Bidiagonalize L in A
3176 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3177 * (RWorkspace: need M)
3178 *
3179  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3180  $ work( itauq ), work( itaup ),
3181  $ work( iwork ), lwork-iwork+1, ierr )
3182 *
3183 * Multiply right bidiagonalizing vectors in A by Q
3184 * in VT
3185 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3186 * (RWorkspace: 0)
3187 *
3188  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3189  $ work( itaup ), vt, ldvt,
3190  $ work( iwork ), lwork-iwork+1, ierr )
3191  irwork = ie + m
3192 *
3193 * Perform bidiagonal QR iteration, computing right
3194 * singular vectors of A in VT
3195 * (CWorkspace: 0)
3196 * (RWorkspace: need BDSPAC)
3197 *
3198  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3199  $ ldvt, cdum, 1, cdum, 1,
3200  $ rwork( irwork ), info )
3201 *
3202  END IF
3203 *
3204  ELSE IF( wntuo ) THEN
3205 *
3206 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3207 * N right singular vectors to be computed in VT and
3208 * M left singular vectors to be overwritten on A
3209 *
3210  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3211 *
3212 * Sufficient workspace for a fast algorithm
3213 *
3214  iu = 1
3215  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3216 *
3217 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3218 *
3219  ldwrku = lda
3220  ir = iu + ldwrku*m
3221  ldwrkr = lda
3222  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3223 *
3224 * WORK(IU) is LDA by M and WORK(IR) is M by M
3225 *
3226  ldwrku = lda
3227  ir = iu + ldwrku*m
3228  ldwrkr = m
3229  ELSE
3230 *
3231 * WORK(IU) is M by M and WORK(IR) is M by M
3232 *
3233  ldwrku = m
3234  ir = iu + ldwrku*m
3235  ldwrkr = m
3236  END IF
3237  itau = ir + ldwrkr*m
3238  iwork = itau + m
3239 *
3240 * Compute A=L*Q, copying result to VT
3241 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3242 * (RWorkspace: 0)
3243 *
3244  CALL zgelqf( m, n, a, lda, work( itau ),
3245  $ work( iwork ), lwork-iwork+1, ierr )
3246  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3247 *
3248 * Generate Q in VT
3249 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3250 * (RWorkspace: 0)
3251 *
3252  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3253  $ work( iwork ), lwork-iwork+1, ierr )
3254 *
3255 * Copy L to WORK(IU), zeroing out above it
3256 *
3257  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3258  $ ldwrku )
3259  CALL zlaset( 'U', m-1, m-1, czero, czero,
3260  $ work( iu+ldwrku ), ldwrku )
3261  ie = 1
3262  itauq = itau
3263  itaup = itauq + m
3264  iwork = itaup + m
3265 *
3266 * Bidiagonalize L in WORK(IU), copying result to
3267 * WORK(IR)
3268 * (CWorkspace: need 2*M*M+3*M,
3269 * prefer 2*M*M+2*M+2*M*NB)
3270 * (RWorkspace: need M)
3271 *
3272  CALL zgebrd( m, m, work( iu ), ldwrku, s,
3273  $ rwork( ie ), work( itauq ),
3274  $ work( itaup ), work( iwork ),
3275  $ lwork-iwork+1, ierr )
3276  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
3277  $ work( ir ), ldwrkr )
3278 *
3279 * Generate right bidiagonalizing vectors in WORK(IU)
3280 * (CWorkspace: need 2*M*M+3*M-1,
3281 * prefer 2*M*M+2*M+(M-1)*NB)
3282 * (RWorkspace: 0)
3283 *
3284  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3285  $ work( itaup ), work( iwork ),
3286  $ lwork-iwork+1, ierr )
3287 *
3288 * Generate left bidiagonalizing vectors in WORK(IR)
3289 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3290 * (RWorkspace: 0)
3291 *
3292  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3293  $ work( itauq ), work( iwork ),
3294  $ lwork-iwork+1, ierr )
3295  irwork = ie + m
3296 *
3297 * Perform bidiagonal QR iteration, computing left
3298 * singular vectors of L in WORK(IR) and computing
3299 * right singular vectors of L in WORK(IU)
3300 * (CWorkspace: need 2*M*M)
3301 * (RWorkspace: need BDSPAC)
3302 *
3303  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3304  $ work( iu ), ldwrku, work( ir ),
3305  $ ldwrkr, cdum, 1, rwork( irwork ),
3306  $ info )
3307 *
3308 * Multiply right singular vectors of L in WORK(IU) by
3309 * Q in VT, storing result in A
3310 * (CWorkspace: need M*M)
3311 * (RWorkspace: 0)
3312 *
3313  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3314  $ ldwrku, vt, ldvt, czero, a, lda )
3315 *
3316 * Copy right singular vectors of A from A to VT
3317 *
3318  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3319 *
3320 * Copy left singular vectors of A from WORK(IR) to A
3321 *
3322  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3323  $ lda )
3324 *
3325  ELSE
3326 *
3327 * Insufficient workspace for a fast algorithm
3328 *
3329  itau = 1
3330  iwork = itau + m
3331 *
3332 * Compute A=L*Q, copying result to VT
3333 * (CWorkspace: need 2*M, prefer M+M*NB)
3334 * (RWorkspace: 0)
3335 *
3336  CALL zgelqf( m, n, a, lda, work( itau ),
3337  $ work( iwork ), lwork-iwork+1, ierr )
3338  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3339 *
3340 * Generate Q in VT
3341 * (CWorkspace: need M+N, prefer M+N*NB)
3342 * (RWorkspace: 0)
3343 *
3344  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3345  $ work( iwork ), lwork-iwork+1, ierr )
3346  ie = 1
3347  itauq = itau
3348  itaup = itauq + m
3349  iwork = itaup + m
3350 *
3351 * Zero out above L in A
3352 *
3353  CALL zlaset( 'U', m-1, m-1, czero, czero,
3354  $ a( 1, 2 ), lda )
3355 *
3356 * Bidiagonalize L in A
3357 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3358 * (RWorkspace: need M)
3359 *
3360  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3361  $ work( itauq ), work( itaup ),
3362  $ work( iwork ), lwork-iwork+1, ierr )
3363 *
3364 * Multiply right bidiagonalizing vectors in A by Q
3365 * in VT
3366 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3367 * (RWorkspace: 0)
3368 *
3369  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3370  $ work( itaup ), vt, ldvt,
3371  $ work( iwork ), lwork-iwork+1, ierr )
3372 *
3373 * Generate left bidiagonalizing vectors in A
3374 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3375 * (RWorkspace: 0)
3376 *
3377  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
3378  $ work( iwork ), lwork-iwork+1, ierr )
3379  irwork = ie + m
3380 *
3381 * Perform bidiagonal QR iteration, computing left
3382 * singular vectors of A in A and computing right
3383 * singular vectors of A in VT
3384 * (CWorkspace: 0)
3385 * (RWorkspace: need BDSPAC)
3386 *
3387  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3388  $ ldvt, a, lda, cdum, 1,
3389  $ rwork( irwork ), info )
3390 *
3391  END IF
3392 *
3393  ELSE IF( wntuas ) THEN
3394 *
3395 * Path 9t(N much larger than M, JOBU='S' or 'A',
3396 * JOBVT='A')
3397 * N right singular vectors to be computed in VT and
3398 * M left singular vectors to be computed in U
3399 *
3400  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3401 *
3402 * Sufficient workspace for a fast algorithm
3403 *
3404  iu = 1
3405  IF( lwork.GE.wrkbl+lda*m ) THEN
3406 *
3407 * WORK(IU) is LDA by M
3408 *
3409  ldwrku = lda
3410  ELSE
3411 *
3412 * WORK(IU) is M by M
3413 *
3414  ldwrku = m
3415  END IF
3416  itau = iu + ldwrku*m
3417  iwork = itau + m
3418 *
3419 * Compute A=L*Q, copying result to VT
3420 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3421 * (RWorkspace: 0)
3422 *
3423  CALL zgelqf( m, n, a, lda, work( itau ),
3424  $ work( iwork ), lwork-iwork+1, ierr )
3425  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3426 *
3427 * Generate Q in VT
3428 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3429 * (RWorkspace: 0)
3430 *
3431  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3432  $ work( iwork ), lwork-iwork+1, ierr )
3433 *
3434 * Copy L to WORK(IU), zeroing out above it
3435 *
3436  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3437  $ ldwrku )
3438  CALL zlaset( 'U', m-1, m-1, czero, czero,
3439  $ work( iu+ldwrku ), ldwrku )
3440  ie = 1
3441  itauq = itau
3442  itaup = itauq + m
3443  iwork = itaup + m
3444 *
3445 * Bidiagonalize L in WORK(IU), copying result to U
3446 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3447 * (RWorkspace: need M)
3448 *
3449  CALL zgebrd( m, m, work( iu ), ldwrku, s,
3450  $ rwork( ie ), work( itauq ),
3451  $ work( itaup ), work( iwork ),
3452  $ lwork-iwork+1, ierr )
3453  CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
3454  $ ldu )
3455 *
3456 * Generate right bidiagonalizing vectors in WORK(IU)
3457 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3458 * (RWorkspace: 0)
3459 *
3460  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3461  $ work( itaup ), work( iwork ),
3462  $ lwork-iwork+1, ierr )
3463 *
3464 * Generate left bidiagonalizing vectors in U
3465 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3466 * (RWorkspace: 0)
3467 *
3468  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3469  $ work( iwork ), lwork-iwork+1, ierr )
3470  irwork = ie + m
3471 *
3472 * Perform bidiagonal QR iteration, computing left
3473 * singular vectors of L in U and computing right
3474 * singular vectors of L in WORK(IU)
3475 * (CWorkspace: need M*M)
3476 * (RWorkspace: need BDSPAC)
3477 *
3478  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3479  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3480  $ rwork( irwork ), info )
3481 *
3482 * Multiply right singular vectors of L in WORK(IU) by
3483 * Q in VT, storing result in A
3484 * (CWorkspace: need M*M)
3485 * (RWorkspace: 0)
3486 *
3487  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3488  $ ldwrku, vt, ldvt, czero, a, lda )
3489 *
3490 * Copy right singular vectors of A from A to VT
3491 *
3492  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3493 *
3494  ELSE
3495 *
3496 * Insufficient workspace for a fast algorithm
3497 *
3498  itau = 1
3499  iwork = itau + m
3500 *
3501 * Compute A=L*Q, copying result to VT
3502 * (CWorkspace: need 2*M, prefer M+M*NB)
3503 * (RWorkspace: 0)
3504 *
3505  CALL zgelqf( m, n, a, lda, work( itau ),
3506  $ work( iwork ), lwork-iwork+1, ierr )
3507  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3508 *
3509 * Generate Q in VT
3510 * (CWorkspace: need M+N, prefer M+N*NB)
3511 * (RWorkspace: 0)
3512 *
3513  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3514  $ work( iwork ), lwork-iwork+1, ierr )
3515 *
3516 * Copy L to U, zeroing out above it
3517 *
3518  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3519  CALL zlaset( 'U', m-1, m-1, czero, czero,
3520  $ u( 1, 2 ), ldu )
3521  ie = 1
3522  itauq = itau
3523  itaup = itauq + m
3524  iwork = itaup + m
3525 *
3526 * Bidiagonalize L in U
3527 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3528 * (RWorkspace: need M)
3529 *
3530  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3531  $ work( itauq ), work( itaup ),
3532  $ work( iwork ), lwork-iwork+1, ierr )
3533 *
3534 * Multiply right bidiagonalizing vectors in U by Q
3535 * in VT
3536 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3537 * (RWorkspace: 0)
3538 *
3539  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3540  $ work( itaup ), vt, ldvt,
3541  $ work( iwork ), lwork-iwork+1, ierr )
3542 *
3543 * Generate left bidiagonalizing vectors in U
3544 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3545 * (RWorkspace: 0)
3546 *
3547  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3548  $ work( iwork ), lwork-iwork+1, ierr )
3549  irwork = ie + m
3550 *
3551 * Perform bidiagonal QR iteration, computing left
3552 * singular vectors of A in U and computing right
3553 * singular vectors of A in VT
3554 * (CWorkspace: 0)
3555 * (RWorkspace: need BDSPAC)
3556 *
3557  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3558  $ ldvt, u, ldu, cdum, 1,
3559  $ rwork( irwork ), info )
3560 *
3561  END IF
3562 *
3563  END IF
3564 *
3565  END IF
3566 *
3567  ELSE
3568 *
3569 * N .LT. MNTHR
3570 *
3571 * Path 10t(N greater than M, but not much larger)
3572 * Reduce to bidiagonal form without LQ decomposition
3573 *
3574  ie = 1
3575  itauq = 1
3576  itaup = itauq + m
3577  iwork = itaup + m
3578 *
3579 * Bidiagonalize A
3580 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3581 * (RWorkspace: M)
3582 *
3583  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3584  $ work( itaup ), work( iwork ), lwork-iwork+1,
3585  $ ierr )
3586  IF( wntuas ) THEN
3587 *
3588 * If left singular vectors desired in U, copy result to U
3589 * and generate left bidiagonalizing vectors in U
3590 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3591 * (RWorkspace: 0)
3592 *
3593  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3594  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3595  $ work( iwork ), lwork-iwork+1, ierr )
3596  END IF
3597  IF( wntvas ) THEN
3598 *
3599 * If right singular vectors desired in VT, copy result to
3600 * VT and generate right bidiagonalizing vectors in VT
3601 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3602 * (RWorkspace: 0)
3603 *
3604  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3605  IF( wntva )
3606  $ nrvt = n
3607  IF( wntvs )
3608  $ nrvt = m
3609  CALL zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3610  $ work( iwork ), lwork-iwork+1, ierr )
3611  END IF
3612  IF( wntuo ) THEN
3613 *
3614 * If left singular vectors desired in A, generate left
3615 * bidiagonalizing vectors in A
3616 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3617 * (RWorkspace: 0)
3618 *
3619  CALL zungbr( 'Q', m, m, n, a, lda, work( itauq ),
3620  $ work( iwork ), lwork-iwork+1, ierr )
3621  END IF
3622  IF( wntvo ) THEN
3623 *
3624 * If right singular vectors desired in A, generate right
3625 * bidiagonalizing vectors in A
3626 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3627 * (RWorkspace: 0)
3628 *
3629  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
3630  $ work( iwork ), lwork-iwork+1, ierr )
3631  END IF
3632  irwork = ie + m
3633  IF( wntuas .OR. wntuo )
3634  $ nru = m
3635  IF( wntun )
3636  $ nru = 0
3637  IF( wntvas .OR. wntvo )
3638  $ ncvt = n
3639  IF( wntvn )
3640  $ ncvt = 0
3641  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3642 *
3643 * Perform bidiagonal QR iteration, if desired, computing
3644 * left singular vectors in U and computing right singular
3645 * vectors in VT
3646 * (CWorkspace: 0)
3647 * (RWorkspace: need BDSPAC)
3648 *
3649  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3650  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3651  $ info )
3652  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3653 *
3654 * Perform bidiagonal QR iteration, if desired, computing
3655 * left singular vectors in U and computing right singular
3656 * vectors in A
3657 * (CWorkspace: 0)
3658 * (RWorkspace: need BDSPAC)
3659 *
3660  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3661  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3662  $ info )
3663  ELSE
3664 *
3665 * Perform bidiagonal QR iteration, if desired, computing
3666 * left singular vectors in A and computing right singular
3667 * vectors in VT
3668 * (CWorkspace: 0)
3669 * (RWorkspace: need BDSPAC)
3670 *
3671  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3672  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3673  $ info )
3674  END IF
3675 *
3676  END IF
3677 *
3678  END IF
3679 *
3680 * Undo scaling if necessary
3681 *
3682  IF( iscl.EQ.1 ) THEN
3683  IF( anrm.GT.bignum )
3684  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3685  $ ierr )
3686  IF( info.NE.0 .AND. anrm.GT.bignum )
3687  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3688  $ rwork( ie ), minmn, ierr )
3689  IF( anrm.LT.smlnum )
3690  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3691  $ ierr )
3692  IF( info.NE.0 .AND. anrm.LT.smlnum )
3693  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3694  $ rwork( ie ), minmn, ierr )
3695  END IF
3696 *
3697 * Return optimal workspace in WORK(1)
3698 *
3699  work( 1 ) = maxwrk
3700 *
3701  RETURN
3702 *
3703 * End of ZGESVD
3704 *
3705  END
subroutine zbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, RWORK, INFO)
ZBDSQR
Definition: zbdsqr.f:224
subroutine zgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
ZGEBRD
Definition: zgebrd.f:207
subroutine zungbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGBR
Definition: zungbr.f:159
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:145
subroutine zlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: zlascl.f:145
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:105
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189
subroutine zungqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGQR
Definition: zungqr.f:130
subroutine zgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO)
ZGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: zgesvd.f:216
subroutine zgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition: zgeqrf.f:151
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: zlaset.f:108
subroutine zunglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
ZUNGLQ
Definition: zunglq.f:129
subroutine zgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
ZGELQF
Definition: zgelqf.f:137
subroutine zunmbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
ZUNMBR
Definition: zunmbr.f:198