LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
dgesvj.f
Go to the documentation of this file.
1 *> \brief \b DGESVJ
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGESVJ + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvj.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvj.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvj.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V,
22 * LDV, WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, LDA, LDV, LWORK, M, MV, N
26 * CHARACTER*1 JOBA, JOBU, JOBV
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION A( LDA, * ), SVA( N ), V( LDV, * ),
30 * $ WORK( LWORK )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> DGESVJ computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, where M >= N. The SVD of A is written as
41 *> [++] [xx] [x0] [xx]
42 *> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx]
43 *> [++] [xx]
44 *> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
45 *> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements
46 *> of SIGMA are the singular values of A. The columns of U and V are the
47 *> left and the right singular vectors of A, respectively.
48 *> \endverbatim
49 *
50 * Arguments:
51 * ==========
52 *
53 *> \param[in] JOBA
54 *> \verbatim
55 *> JOBA is CHARACTER* 1
56 *> Specifies the structure of A.
57 *> = 'L': The input matrix A is lower triangular;
58 *> = 'U': The input matrix A is upper triangular;
59 *> = 'G': The input matrix A is general M-by-N matrix, M >= N.
60 *> \endverbatim
61 *>
62 *> \param[in] JOBU
63 *> \verbatim
64 *> JOBU is CHARACTER*1
65 *> Specifies whether to compute the left singular vectors
66 *> (columns of U):
67 *> = 'U': The left singular vectors corresponding to the nonzero
68 *> singular values are computed and returned in the leading
69 *> columns of A. See more details in the description of A.
70 *> The default numerical orthogonality threshold is set to
71 *> approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E').
72 *> = 'C': Analogous to JOBU='U', except that user can control the
73 *> level of numerical orthogonality of the computed left
74 *> singular vectors. TOL can be set to TOL = CTOL*EPS, where
75 *> CTOL is given on input in the array WORK.
76 *> No CTOL smaller than ONE is allowed. CTOL greater
77 *> than 1 / EPS is meaningless. The option 'C'
78 *> can be used if M*EPS is satisfactory orthogonality
79 *> of the computed left singular vectors, so CTOL=M could
80 *> save few sweeps of Jacobi rotations.
81 *> See the descriptions of A and WORK(1).
82 *> = 'N': The matrix U is not computed. However, see the
83 *> description of A.
84 *> \endverbatim
85 *>
86 *> \param[in] JOBV
87 *> \verbatim
88 *> JOBV is CHARACTER*1
89 *> Specifies whether to compute the right singular vectors, that
90 *> is, the matrix V:
91 *> = 'V' : the matrix V is computed and returned in the array V
92 *> = 'A' : the Jacobi rotations are applied to the MV-by-N
93 *> array V. In other words, the right singular vector
94 *> matrix V is not computed explicitly, instead it is
95 *> applied to an MV-by-N matrix initially stored in the
96 *> first MV rows of V.
97 *> = 'N' : the matrix V is not computed and the array V is not
98 *> referenced
99 *> \endverbatim
100 *>
101 *> \param[in] M
102 *> \verbatim
103 *> M is INTEGER
104 *> The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0.
105 *> \endverbatim
106 *>
107 *> \param[in] N
108 *> \verbatim
109 *> N is INTEGER
110 *> The number of columns of the input matrix A.
111 *> M >= N >= 0.
112 *> \endverbatim
113 *>
114 *> \param[in,out] A
115 *> \verbatim
116 *> A is DOUBLE PRECISION array, dimension (LDA,N)
117 *> On entry, the M-by-N matrix A.
118 *> On exit :
119 *> If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C' :
120 *> If INFO .EQ. 0 :
121 *> RANKA orthonormal columns of U are returned in the
122 *> leading RANKA columns of the array A. Here RANKA <= N
123 *> is the number of computed singular values of A that are
124 *> above the underflow threshold DLAMCH('S'). The singular
125 *> vectors corresponding to underflowed or zero singular
126 *> values are not computed. The value of RANKA is returned
127 *> in the array WORK as RANKA=NINT(WORK(2)). Also see the
128 *> descriptions of SVA and WORK. The computed columns of U
129 *> are mutually numerically orthogonal up to approximately
130 *> TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'),
131 *> see the description of JOBU.
132 *> If INFO .GT. 0 :
133 *> the procedure DGESVJ did not converge in the given number
134 *> of iterations (sweeps). In that case, the computed
135 *> columns of U may not be orthogonal up to TOL. The output
136 *> U (stored in A), SIGMA (given by the computed singular
137 *> values in SVA(1:N)) and V is still a decomposition of the
138 *> input matrix A in the sense that the residual
139 *> ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small.
140 *>
141 *> If JOBU .EQ. 'N' :
142 *> If INFO .EQ. 0 :
143 *> Note that the left singular vectors are 'for free' in the
144 *> one-sided Jacobi SVD algorithm. However, if only the
145 *> singular values are needed, the level of numerical
146 *> orthogonality of U is not an issue and iterations are
147 *> stopped when the columns of the iterated matrix are
148 *> numerically orthogonal up to approximately M*EPS. Thus,
149 *> on exit, A contains the columns of U scaled with the
150 *> corresponding singular values.
151 *> If INFO .GT. 0 :
152 *> the procedure DGESVJ did not converge in the given number
153 *> of iterations (sweeps).
154 *> \endverbatim
155 *>
156 *> \param[in] LDA
157 *> \verbatim
158 *> LDA is INTEGER
159 *> The leading dimension of the array A. LDA >= max(1,M).
160 *> \endverbatim
161 *>
162 *> \param[out] SVA
163 *> \verbatim
164 *> SVA is DOUBLE PRECISION array, dimension (N)
165 *> On exit :
166 *> If INFO .EQ. 0 :
167 *> depending on the value SCALE = WORK(1), we have:
168 *> If SCALE .EQ. ONE :
169 *> SVA(1:N) contains the computed singular values of A.
170 *> During the computation SVA contains the Euclidean column
171 *> norms of the iterated matrices in the array A.
172 *> If SCALE .NE. ONE :
173 *> The singular values of A are SCALE*SVA(1:N), and this
174 *> factored representation is due to the fact that some of the
175 *> singular values of A might underflow or overflow.
176 *> If INFO .GT. 0 :
177 *> the procedure DGESVJ did not converge in the given number of
178 *> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate.
179 *> \endverbatim
180 *>
181 *> \param[in] MV
182 *> \verbatim
183 *> MV is INTEGER
184 *> If JOBV .EQ. 'A', then the product of Jacobi rotations in DGESVJ
185 *> is applied to the first MV rows of V. See the description of JOBV.
186 *> \endverbatim
187 *>
188 *> \param[in,out] V
189 *> \verbatim
190 *> V is DOUBLE PRECISION array, dimension (LDV,N)
191 *> If JOBV = 'V', then V contains on exit the N-by-N matrix of
192 *> the right singular vectors;
193 *> If JOBV = 'A', then V contains the product of the computed right
194 *> singular vector matrix and the initial matrix in
195 *> the array V.
196 *> If JOBV = 'N', then V is not referenced.
197 *> \endverbatim
198 *>
199 *> \param[in] LDV
200 *> \verbatim
201 *> LDV is INTEGER
202 *> The leading dimension of the array V, LDV .GE. 1.
203 *> If JOBV .EQ. 'V', then LDV .GE. max(1,N).
204 *> If JOBV .EQ. 'A', then LDV .GE. max(1,MV) .
205 *> \endverbatim
206 *>
207 *> \param[in,out] WORK
208 *> \verbatim
209 *> WORK is DOUBLE PRECISION array, dimension max(4,M+N).
210 *> On entry :
211 *> If JOBU .EQ. 'C' :
212 *> WORK(1) = CTOL, where CTOL defines the threshold for convergence.
213 *> The process stops if all columns of A are mutually
214 *> orthogonal up to CTOL*EPS, EPS=DLAMCH('E').
215 *> It is required that CTOL >= ONE, i.e. it is not
216 *> allowed to force the routine to obtain orthogonality
217 *> below EPS.
218 *> On exit :
219 *> WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N)
220 *> are the computed singular values of A.
221 *> (See description of SVA().)
222 *> WORK(2) = NINT(WORK(2)) is the number of the computed nonzero
223 *> singular values.
224 *> WORK(3) = NINT(WORK(3)) is the number of the computed singular
225 *> values that are larger than the underflow threshold.
226 *> WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi
227 *> rotations needed for numerical convergence.
228 *> WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep.
229 *> This is useful information in cases when DGESVJ did
230 *> not converge, as it can be used to estimate whether
231 *> the output is stil useful and for post festum analysis.
232 *> WORK(6) = the largest absolute value over all sines of the
233 *> Jacobi rotation angles in the last sweep. It can be
234 *> useful for a post festum analysis.
235 *> \endverbatim
236 *>
237 *> \param[in] LWORK
238 *> \verbatim
239 *> LWORK is INTEGER
240 *> length of WORK, WORK >= MAX(6,M+N)
241 *> \endverbatim
242 *>
243 *> \param[out] INFO
244 *> \verbatim
245 *> INFO is INTEGER
246 *> = 0 : successful exit.
247 *> < 0 : if INFO = -i, then the i-th argument had an illegal value
248 *> > 0 : DGESVJ did not converge in the maximal allowed number (30)
249 *> of sweeps. The output may still be useful. See the
250 *> description of WORK.
251 *> \endverbatim
252 *
253 * Authors:
254 * ========
255 *
256 *> \author Univ. of Tennessee
257 *> \author Univ. of California Berkeley
258 *> \author Univ. of Colorado Denver
259 *> \author NAG Ltd.
260 *
261 *> \date September 2012
262 *
263 *> \ingroup doubleGEcomputational
264 *
265 *> \par Further Details:
266 * =====================
267 *>
268 *> \verbatim
269 *>
270 *> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane
271 *> rotations. The rotations are implemented as fast scaled rotations of
272 *> Anda and Park [1]. In the case of underflow of the Jacobi angle, a
273 *> modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses
274 *> column interchanges of de Rijk [2]. The relative accuracy of the computed
275 *> singular values and the accuracy of the computed singular vectors (in
276 *> angle metric) is as guaranteed by the theory of Demmel and Veselic [3].
277 *> The condition number that determines the accuracy in the full rank case
278 *> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the
279 *> spectral condition number. The best performance of this Jacobi SVD
280 *> procedure is achieved if used in an accelerated version of Drmac and
281 *> Veselic [5,6], and it is the kernel routine in the SIGMA library [7].
282 *> Some tunning parameters (marked with [TP]) are available for the
283 *> implementer.
284 *> The computational range for the nonzero singular values is the machine
285 *> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even
286 *> denormalized singular values can be computed with the corresponding
287 *> gradual loss of accurate digits.
288 *> \endverbatim
289 *
290 *> \par Contributors:
291 * ==================
292 *>
293 *> \verbatim
294 *>
295 *> ============
296 *>
297 *> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
298 *> \endverbatim
299 *
300 *> \par References:
301 * ================
302 *>
303 *> \verbatim
304 *>
305 *> [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling.
306 *> SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174.
307 *> [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the
308 *> singular value decomposition on a vector computer.
309 *> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371.
310 *> [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR.
311 *> [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular
312 *> value computation in floating point arithmetic.
313 *> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222.
314 *> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
315 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
316 *> LAPACK Working note 169.
317 *> [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
318 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
319 *> LAPACK Working note 170.
320 *> [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
321 *> QSVD, (H,K)-SVD computations.
322 *> Department of Mathematics, University of Zagreb, 2008.
323 *> \endverbatim
324 *
325 *> \par Bugs, examples and comments:
326 * =================================
327 *>
328 *> \verbatim
329 *> ===========================
330 *> Please report all bugs and send interesting test examples and comments to
331 *> drmac@math.hr. Thank you.
332 *> \endverbatim
333 *>
334 * =====================================================================
335  SUBROUTINE dgesvj( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V,
336  $ ldv, work, lwork, info )
337 *
338 * -- LAPACK computational routine (version 3.4.2) --
339 * -- LAPACK is a software package provided by Univ. of Tennessee, --
340 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
341 * September 2012
342 *
343 * .. Scalar Arguments ..
344  INTEGER info, lda, ldv, lwork, m, mv, n
345  CHARACTER*1 joba, jobu, jobv
346 * ..
347 * .. Array Arguments ..
348  DOUBLE PRECISION a( lda, * ), sva( n ), v( ldv, * ),
349  $ work( lwork )
350 * ..
351 *
352 * =====================================================================
353 *
354 * .. Local Parameters ..
355  DOUBLE PRECISION zero, half, one
356  parameter( zero = 0.0d0, half = 0.5d0, one = 1.0d0)
357  INTEGER nsweep
358  parameter( nsweep = 30 )
359 * ..
360 * .. Local Scalars ..
361  DOUBLE PRECISION aapp, aapp0, aapq, aaqq, apoaq, aqoap, big,
362  $ bigtheta, cs, ctol, epsln, large, mxaapq,
363  $ mxsinj, rootbig, rooteps, rootsfmin, roottol,
364  $ skl, sfmin, small, sn, t, temp1, theta,
365  $ thsign, tol
366  INTEGER blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1,
367  $ iswrot, jbc, jgl, kbl, lkahead, mvl, n2, n34,
368  $ n4, nbl, notrot, p, pskipped, q, rowskip,
369  $ swband
370  LOGICAL applv, goscale, lower, lsvec, noscale, rotok,
371  $ rsvec, uctol, upper
372 * ..
373 * .. Local Arrays ..
374  DOUBLE PRECISION fastr( 5 )
375 * ..
376 * .. Intrinsic Functions ..
377  INTRINSIC dabs, dmax1, dmin1, dble, min0, dsign, dsqrt
378 * ..
379 * .. External Functions ..
380 * ..
381 * from BLAS
382  DOUBLE PRECISION ddot, dnrm2
383  EXTERNAL ddot, dnrm2
384  INTEGER idamax
385  EXTERNAL idamax
386 * from LAPACK
387  DOUBLE PRECISION dlamch
388  EXTERNAL dlamch
389  LOGICAL lsame
390  EXTERNAL lsame
391 * ..
392 * .. External Subroutines ..
393 * ..
394 * from BLAS
395  EXTERNAL daxpy, dcopy, drotm, dscal, dswap
396 * from LAPACK
397  EXTERNAL dlascl, dlaset, dlassq, xerbla
398 *
399  EXTERNAL dgsvj0, dgsvj1
400 * ..
401 * .. Executable Statements ..
402 *
403 * Test the input arguments
404 *
405  lsvec = lsame( jobu, 'U' )
406  uctol = lsame( jobu, 'C' )
407  rsvec = lsame( jobv, 'V' )
408  applv = lsame( jobv, 'A' )
409  upper = lsame( joba, 'U' )
410  lower = lsame( joba, 'L' )
411 *
412  IF( .NOT.( upper .OR. lower .OR. lsame( joba, 'G' ) ) ) THEN
413  info = -1
414  ELSE IF( .NOT.( lsvec .OR. uctol .OR. lsame( jobu, 'N' ) ) ) THEN
415  info = -2
416  ELSE IF( .NOT.( rsvec .OR. applv .OR. lsame( jobv, 'N' ) ) ) THEN
417  info = -3
418  ELSE IF( m.LT.0 ) THEN
419  info = -4
420  ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
421  info = -5
422  ELSE IF( lda.LT.m ) THEN
423  info = -7
424  ELSE IF( mv.LT.0 ) THEN
425  info = -9
426  ELSE IF( ( rsvec .AND. ( ldv.LT.n ) ) .OR.
427  $ ( applv .AND. ( ldv.LT.mv ) ) ) THEN
428  info = -11
429  ELSE IF( uctol .AND. ( work( 1 ).LE.one ) ) THEN
430  info = -12
431  ELSE IF( lwork.LT.max0( m+n, 6 ) ) THEN
432  info = -13
433  ELSE
434  info = 0
435  END IF
436 *
437 * #:(
438  IF( info.NE.0 ) THEN
439  CALL xerbla( 'DGESVJ', -info )
440  return
441  END IF
442 *
443 * #:) Quick return for void matrix
444 *
445  IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) )return
446 *
447 * Set numerical parameters
448 * The stopping criterion for Jacobi rotations is
449 *
450 * max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS
451 *
452 * where EPS is the round-off and CTOL is defined as follows:
453 *
454  IF( uctol ) THEN
455 * ... user controlled
456  ctol = work( 1 )
457  ELSE
458 * ... default
459  IF( lsvec .OR. rsvec .OR. applv ) THEN
460  ctol = dsqrt( dble( m ) )
461  ELSE
462  ctol = dble( m )
463  END IF
464  END IF
465 * ... and the machine dependent parameters are
466 *[!] (Make sure that DLAMCH() works properly on the target machine.)
467 *
468  epsln = dlamch( 'Epsilon' )
469  rooteps = dsqrt( epsln )
470  sfmin = dlamch( 'SafeMinimum' )
471  rootsfmin = dsqrt( sfmin )
472  small = sfmin / epsln
473  big = dlamch( 'Overflow' )
474 * BIG = ONE / SFMIN
475  rootbig = one / rootsfmin
476  large = big / dsqrt( dble( m*n ) )
477  bigtheta = one / rooteps
478 *
479  tol = ctol*epsln
480  roottol = dsqrt( tol )
481 *
482  IF( dble( m )*epsln.GE.one ) THEN
483  info = -4
484  CALL xerbla( 'DGESVJ', -info )
485  return
486  END IF
487 *
488 * Initialize the right singular vector matrix.
489 *
490  IF( rsvec ) THEN
491  mvl = n
492  CALL dlaset( 'A', mvl, n, zero, one, v, ldv )
493  ELSE IF( applv ) THEN
494  mvl = mv
495  END IF
496  rsvec = rsvec .OR. applv
497 *
498 * Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N )
499 *(!) If necessary, scale A to protect the largest singular value
500 * from overflow. It is possible that saving the largest singular
501 * value destroys the information about the small ones.
502 * This initial scaling is almost minimal in the sense that the
503 * goal is to make sure that no column norm overflows, and that
504 * DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries
505 * in A are detected, the procedure returns with INFO=-6.
506 *
507  skl= one / dsqrt( dble( m )*dble( n ) )
508  noscale = .true.
509  goscale = .true.
510 *
511  IF( lower ) THEN
512 * the input matrix is M-by-N lower triangular (trapezoidal)
513  DO 1874 p = 1, n
514  aapp = zero
515  aaqq = one
516  CALL dlassq( m-p+1, a( p, p ), 1, aapp, aaqq )
517  IF( aapp.GT.big ) THEN
518  info = -6
519  CALL xerbla( 'DGESVJ', -info )
520  return
521  END IF
522  aaqq = dsqrt( aaqq )
523  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
524  sva( p ) = aapp*aaqq
525  ELSE
526  noscale = .false.
527  sva( p ) = aapp*( aaqq*skl)
528  IF( goscale ) THEN
529  goscale = .false.
530  DO 1873 q = 1, p - 1
531  sva( q ) = sva( q )*skl
532  1873 continue
533  END IF
534  END IF
535  1874 continue
536  ELSE IF( upper ) THEN
537 * the input matrix is M-by-N upper triangular (trapezoidal)
538  DO 2874 p = 1, n
539  aapp = zero
540  aaqq = one
541  CALL dlassq( p, a( 1, p ), 1, aapp, aaqq )
542  IF( aapp.GT.big ) THEN
543  info = -6
544  CALL xerbla( 'DGESVJ', -info )
545  return
546  END IF
547  aaqq = dsqrt( aaqq )
548  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
549  sva( p ) = aapp*aaqq
550  ELSE
551  noscale = .false.
552  sva( p ) = aapp*( aaqq*skl)
553  IF( goscale ) THEN
554  goscale = .false.
555  DO 2873 q = 1, p - 1
556  sva( q ) = sva( q )*skl
557  2873 continue
558  END IF
559  END IF
560  2874 continue
561  ELSE
562 * the input matrix is M-by-N general dense
563  DO 3874 p = 1, n
564  aapp = zero
565  aaqq = one
566  CALL dlassq( m, a( 1, p ), 1, aapp, aaqq )
567  IF( aapp.GT.big ) THEN
568  info = -6
569  CALL xerbla( 'DGESVJ', -info )
570  return
571  END IF
572  aaqq = dsqrt( aaqq )
573  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
574  sva( p ) = aapp*aaqq
575  ELSE
576  noscale = .false.
577  sva( p ) = aapp*( aaqq*skl)
578  IF( goscale ) THEN
579  goscale = .false.
580  DO 3873 q = 1, p - 1
581  sva( q ) = sva( q )*skl
582  3873 continue
583  END IF
584  END IF
585  3874 continue
586  END IF
587 *
588  IF( noscale )skl= one
589 *
590 * Move the smaller part of the spectrum from the underflow threshold
591 *(!) Start by determining the position of the nonzero entries of the
592 * array SVA() relative to ( SFMIN, BIG ).
593 *
594  aapp = zero
595  aaqq = big
596  DO 4781 p = 1, n
597  IF( sva( p ).NE.zero )aaqq = dmin1( aaqq, sva( p ) )
598  aapp = dmax1( aapp, sva( p ) )
599  4781 continue
600 *
601 * #:) Quick return for zero matrix
602 *
603  IF( aapp.EQ.zero ) THEN
604  IF( lsvec )CALL dlaset( 'G', m, n, zero, one, a, lda )
605  work( 1 ) = one
606  work( 2 ) = zero
607  work( 3 ) = zero
608  work( 4 ) = zero
609  work( 5 ) = zero
610  work( 6 ) = zero
611  return
612  END IF
613 *
614 * #:) Quick return for one-column matrix
615 *
616  IF( n.EQ.1 ) THEN
617  IF( lsvec )CALL dlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,
618  $ a( 1, 1 ), lda, ierr )
619  work( 1 ) = one / skl
620  IF( sva( 1 ).GE.sfmin ) THEN
621  work( 2 ) = one
622  ELSE
623  work( 2 ) = zero
624  END IF
625  work( 3 ) = zero
626  work( 4 ) = zero
627  work( 5 ) = zero
628  work( 6 ) = zero
629  return
630  END IF
631 *
632 * Protect small singular values from underflow, and try to
633 * avoid underflows/overflows in computing Jacobi rotations.
634 *
635  sn = dsqrt( sfmin / epsln )
636  temp1 = dsqrt( big / dble( n ) )
637  IF( ( aapp.LE.sn ) .OR. ( aaqq.GE.temp1 ) .OR.
638  $ ( ( sn.LE.aaqq ) .AND. ( aapp.LE.temp1 ) ) ) THEN
639  temp1 = dmin1( big, temp1 / aapp )
640 * AAQQ = AAQQ*TEMP1
641 * AAPP = AAPP*TEMP1
642  ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.LE.temp1 ) ) THEN
643  temp1 = dmin1( sn / aaqq, big / ( aapp*dsqrt( dble( n ) ) ) )
644 * AAQQ = AAQQ*TEMP1
645 * AAPP = AAPP*TEMP1
646  ELSE IF( ( aaqq.GE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
647  temp1 = dmax1( sn / aaqq, temp1 / aapp )
648 * AAQQ = AAQQ*TEMP1
649 * AAPP = AAPP*TEMP1
650  ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
651  temp1 = dmin1( sn / aaqq, big / ( dsqrt( dble( n ) )*aapp ) )
652 * AAQQ = AAQQ*TEMP1
653 * AAPP = AAPP*TEMP1
654  ELSE
655  temp1 = one
656  END IF
657 *
658 * Scale, if necessary
659 *
660  IF( temp1.NE.one ) THEN
661  CALL dlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr )
662  END IF
663  skl= temp1*skl
664  IF( skl.NE.one ) THEN
665  CALL dlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr )
666  skl= one / skl
667  END IF
668 *
669 * Row-cyclic Jacobi SVD algorithm with column pivoting
670 *
671  emptsw = ( n*( n-1 ) ) / 2
672  notrot = 0
673  fastr( 1 ) = zero
674 *
675 * A is represented in factored form A = A * diag(WORK), where diag(WORK)
676 * is initialized to identity. WORK is updated during fast scaled
677 * rotations.
678 *
679  DO 1868 q = 1, n
680  work( q ) = one
681  1868 continue
682 *
683 *
684  swband = 3
685 *[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective
686 * if DGESVJ is used as a computational routine in the preconditioned
687 * Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure
688 * works on pivots inside a band-like region around the diagonal.
689 * The boundaries are determined dynamically, based on the number of
690 * pivots above a threshold.
691 *
692  kbl = min0( 8, n )
693 *[TP] KBL is a tuning parameter that defines the tile size in the
694 * tiling of the p-q loops of pivot pairs. In general, an optimal
695 * value of KBL depends on the matrix dimensions and on the
696 * parameters of the computer's memory.
697 *
698  nbl = n / kbl
699  IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
700 *
701  blskip = kbl**2
702 *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
703 *
704  rowskip = min0( 5, kbl )
705 *[TP] ROWSKIP is a tuning parameter.
706 *
707  lkahead = 1
708 *[TP] LKAHEAD is a tuning parameter.
709 *
710 * Quasi block transformations, using the lower (upper) triangular
711 * structure of the input matrix. The quasi-block-cycling usually
712 * invokes cubic convergence. Big part of this cycle is done inside
713 * canonical subspaces of dimensions less than M.
714 *
715  IF( ( lower .OR. upper ) .AND. ( n.GT.max0( 64, 4*kbl ) ) ) THEN
716 *[TP] The number of partition levels and the actual partition are
717 * tuning parameters.
718  n4 = n / 4
719  n2 = n / 2
720  n34 = 3*n4
721  IF( applv ) THEN
722  q = 0
723  ELSE
724  q = 1
725  END IF
726 *
727  IF( lower ) THEN
728 *
729 * This works very well on lower triangular matrices, in particular
730 * in the framework of the preconditioned Jacobi SVD (xGEJSV).
731 * The idea is simple:
732 * [+ 0 0 0] Note that Jacobi transformations of [0 0]
733 * [+ + 0 0] [0 0]
734 * [+ + x 0] actually work on [x 0] [x 0]
735 * [+ + x x] [x x]. [x x]
736 *
737  CALL dgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,
738  $ work( n34+1 ), sva( n34+1 ), mvl,
739  $ v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,
740  $ 2, work( n+1 ), lwork-n, ierr )
741 *
742  CALL dgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,
743  $ work( n2+1 ), sva( n2+1 ), mvl,
744  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,
745  $ work( n+1 ), lwork-n, ierr )
746 *
747  CALL dgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,
748  $ work( n2+1 ), sva( n2+1 ), mvl,
749  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
750  $ work( n+1 ), lwork-n, ierr )
751 *
752  CALL dgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,
753  $ work( n4+1 ), sva( n4+1 ), mvl,
754  $ v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,
755  $ work( n+1 ), lwork-n, ierr )
756 *
757  CALL dgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,
758  $ epsln, sfmin, tol, 1, work( n+1 ), lwork-n,
759  $ ierr )
760 *
761  CALL dgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,
762  $ ldv, epsln, sfmin, tol, 1, work( n+1 ),
763  $ lwork-n, ierr )
764 *
765 *
766  ELSE IF( upper ) THEN
767 *
768 *
769  CALL dgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,
770  $ epsln, sfmin, tol, 2, work( n+1 ), lwork-n,
771  $ ierr )
772 *
773  CALL dgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, work( n4+1 ),
774  $ sva( n4+1 ), mvl, v( n4*q+1, n4+1 ), ldv,
775  $ epsln, sfmin, tol, 1, work( n+1 ), lwork-n,
776  $ ierr )
777 *
778  CALL dgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,
779  $ ldv, epsln, sfmin, tol, 1, work( n+1 ),
780  $ lwork-n, ierr )
781 *
782  CALL dgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,
783  $ work( n2+1 ), sva( n2+1 ), mvl,
784  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
785  $ work( n+1 ), lwork-n, ierr )
786 
787  END IF
788 *
789  END IF
790 *
791 * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
792 *
793  DO 1993 i = 1, nsweep
794 *
795 * .. go go go ...
796 *
797  mxaapq = zero
798  mxsinj = zero
799  iswrot = 0
800 *
801  notrot = 0
802  pskipped = 0
803 *
804 * Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs
805 * 1 <= p < q <= N. This is the first step toward a blocked implementation
806 * of the rotations. New implementation, based on block transformations,
807 * is under development.
808 *
809  DO 2000 ibr = 1, nbl
810 *
811  igl = ( ibr-1 )*kbl + 1
812 *
813  DO 1002 ir1 = 0, min0( lkahead, nbl-ibr )
814 *
815  igl = igl + ir1*kbl
816 *
817  DO 2001 p = igl, min0( igl+kbl-1, n-1 )
818 *
819 * .. de Rijk's pivoting
820 *
821  q = idamax( n-p+1, sva( p ), 1 ) + p - 1
822  IF( p.NE.q ) THEN
823  CALL dswap( m, a( 1, p ), 1, a( 1, q ), 1 )
824  IF( rsvec )CALL dswap( mvl, v( 1, p ), 1,
825  $ v( 1, q ), 1 )
826  temp1 = sva( p )
827  sva( p ) = sva( q )
828  sva( q ) = temp1
829  temp1 = work( p )
830  work( p ) = work( q )
831  work( q ) = temp1
832  END IF
833 *
834  IF( ir1.EQ.0 ) THEN
835 *
836 * Column norms are periodically updated by explicit
837 * norm computation.
838 * Caveat:
839 * Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1)
840 * as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to
841 * overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to
842 * underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold).
843 * Hence, DNRM2 cannot be trusted, not even in the case when
844 * the true norm is far from the under(over)flow boundaries.
845 * If properly implemented DNRM2 is available, the IF-THEN-ELSE
846 * below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)".
847 *
848  IF( ( sva( p ).LT.rootbig ) .AND.
849  $ ( sva( p ).GT.rootsfmin ) ) THEN
850  sva( p ) = dnrm2( m, a( 1, p ), 1 )*work( p )
851  ELSE
852  temp1 = zero
853  aapp = one
854  CALL dlassq( m, a( 1, p ), 1, temp1, aapp )
855  sva( p ) = temp1*dsqrt( aapp )*work( p )
856  END IF
857  aapp = sva( p )
858  ELSE
859  aapp = sva( p )
860  END IF
861 *
862  IF( aapp.GT.zero ) THEN
863 *
864  pskipped = 0
865 *
866  DO 2002 q = p + 1, min0( igl+kbl-1, n )
867 *
868  aaqq = sva( q )
869 *
870  IF( aaqq.GT.zero ) THEN
871 *
872  aapp0 = aapp
873  IF( aaqq.GE.one ) THEN
874  rotok = ( small*aapp ).LE.aaqq
875  IF( aapp.LT.( big / aaqq ) ) THEN
876  aapq = ( ddot( m, a( 1, p ), 1, a( 1,
877  $ q ), 1 )*work( p )*work( q ) /
878  $ aaqq ) / aapp
879  ELSE
880  CALL dcopy( m, a( 1, p ), 1,
881  $ work( n+1 ), 1 )
882  CALL dlascl( 'G', 0, 0, aapp,
883  $ work( p ), m, 1,
884  $ work( n+1 ), lda, ierr )
885  aapq = ddot( m, work( n+1 ), 1,
886  $ a( 1, q ), 1 )*work( q ) / aaqq
887  END IF
888  ELSE
889  rotok = aapp.LE.( aaqq / small )
890  IF( aapp.GT.( small / aaqq ) ) THEN
891  aapq = ( ddot( m, a( 1, p ), 1, a( 1,
892  $ q ), 1 )*work( p )*work( q ) /
893  $ aaqq ) / aapp
894  ELSE
895  CALL dcopy( m, a( 1, q ), 1,
896  $ work( n+1 ), 1 )
897  CALL dlascl( 'G', 0, 0, aaqq,
898  $ work( q ), m, 1,
899  $ work( n+1 ), lda, ierr )
900  aapq = ddot( m, work( n+1 ), 1,
901  $ a( 1, p ), 1 )*work( p ) / aapp
902  END IF
903  END IF
904 *
905  mxaapq = dmax1( mxaapq, dabs( aapq ) )
906 *
907 * TO rotate or NOT to rotate, THAT is the question ...
908 *
909  IF( dabs( aapq ).GT.tol ) THEN
910 *
911 * .. rotate
912 *[RTD] ROTATED = ROTATED + ONE
913 *
914  IF( ir1.EQ.0 ) THEN
915  notrot = 0
916  pskipped = 0
917  iswrot = iswrot + 1
918  END IF
919 *
920  IF( rotok ) THEN
921 *
922  aqoap = aaqq / aapp
923  apoaq = aapp / aaqq
924  theta = -half*dabs(aqoap-apoaq)/aapq
925 *
926  IF( dabs( theta ).GT.bigtheta ) THEN
927 *
928  t = half / theta
929  fastr( 3 ) = t*work( p ) / work( q )
930  fastr( 4 ) = -t*work( q ) /
931  $ work( p )
932  CALL drotm( m, a( 1, p ), 1,
933  $ a( 1, q ), 1, fastr )
934  IF( rsvec )CALL drotm( mvl,
935  $ v( 1, p ), 1,
936  $ v( 1, q ), 1,
937  $ fastr )
938  sva( q ) = aaqq*dsqrt( dmax1( zero,
939  $ one+t*apoaq*aapq ) )
940  aapp = aapp*dsqrt( dmax1( zero,
941  $ one-t*aqoap*aapq ) )
942  mxsinj = dmax1( mxsinj, dabs( t ) )
943 *
944  ELSE
945 *
946 * .. choose correct signum for THETA and rotate
947 *
948  thsign = -dsign( one, aapq )
949  t = one / ( theta+thsign*
950  $ dsqrt( one+theta*theta ) )
951  cs = dsqrt( one / ( one+t*t ) )
952  sn = t*cs
953 *
954  mxsinj = dmax1( mxsinj, dabs( sn ) )
955  sva( q ) = aaqq*dsqrt( dmax1( zero,
956  $ one+t*apoaq*aapq ) )
957  aapp = aapp*dsqrt( dmax1( zero,
958  $ one-t*aqoap*aapq ) )
959 *
960  apoaq = work( p ) / work( q )
961  aqoap = work( q ) / work( p )
962  IF( work( p ).GE.one ) THEN
963  IF( work( q ).GE.one ) THEN
964  fastr( 3 ) = t*apoaq
965  fastr( 4 ) = -t*aqoap
966  work( p ) = work( p )*cs
967  work( q ) = work( q )*cs
968  CALL drotm( m, a( 1, p ), 1,
969  $ a( 1, q ), 1,
970  $ fastr )
971  IF( rsvec )CALL drotm( mvl,
972  $ v( 1, p ), 1, v( 1, q ),
973  $ 1, fastr )
974  ELSE
975  CALL daxpy( m, -t*aqoap,
976  $ a( 1, q ), 1,
977  $ a( 1, p ), 1 )
978  CALL daxpy( m, cs*sn*apoaq,
979  $ a( 1, p ), 1,
980  $ a( 1, q ), 1 )
981  work( p ) = work( p )*cs
982  work( q ) = work( q ) / cs
983  IF( rsvec ) THEN
984  CALL daxpy( mvl, -t*aqoap,
985  $ v( 1, q ), 1,
986  $ v( 1, p ), 1 )
987  CALL daxpy( mvl,
988  $ cs*sn*apoaq,
989  $ v( 1, p ), 1,
990  $ v( 1, q ), 1 )
991  END IF
992  END IF
993  ELSE
994  IF( work( q ).GE.one ) THEN
995  CALL daxpy( m, t*apoaq,
996  $ a( 1, p ), 1,
997  $ a( 1, q ), 1 )
998  CALL daxpy( m, -cs*sn*aqoap,
999  $ a( 1, q ), 1,
1000  $ a( 1, p ), 1 )
1001  work( p ) = work( p ) / cs
1002  work( q ) = work( q )*cs
1003  IF( rsvec ) THEN
1004  CALL daxpy( mvl, t*apoaq,
1005  $ v( 1, p ), 1,
1006  $ v( 1, q ), 1 )
1007  CALL daxpy( mvl,
1008  $ -cs*sn*aqoap,
1009  $ v( 1, q ), 1,
1010  $ v( 1, p ), 1 )
1011  END IF
1012  ELSE
1013  IF( work( p ).GE.work( q ) )
1014  $ THEN
1015  CALL daxpy( m, -t*aqoap,
1016  $ a( 1, q ), 1,
1017  $ a( 1, p ), 1 )
1018  CALL daxpy( m, cs*sn*apoaq,
1019  $ a( 1, p ), 1,
1020  $ a( 1, q ), 1 )
1021  work( p ) = work( p )*cs
1022  work( q ) = work( q ) / cs
1023  IF( rsvec ) THEN
1024  CALL daxpy( mvl,
1025  $ -t*aqoap,
1026  $ v( 1, q ), 1,
1027  $ v( 1, p ), 1 )
1028  CALL daxpy( mvl,
1029  $ cs*sn*apoaq,
1030  $ v( 1, p ), 1,
1031  $ v( 1, q ), 1 )
1032  END IF
1033  ELSE
1034  CALL daxpy( m, t*apoaq,
1035  $ a( 1, p ), 1,
1036  $ a( 1, q ), 1 )
1037  CALL daxpy( m,
1038  $ -cs*sn*aqoap,
1039  $ a( 1, q ), 1,
1040  $ a( 1, p ), 1 )
1041  work( p ) = work( p ) / cs
1042  work( q ) = work( q )*cs
1043  IF( rsvec ) THEN
1044  CALL daxpy( mvl,
1045  $ t*apoaq, v( 1, p ),
1046  $ 1, v( 1, q ), 1 )
1047  CALL daxpy( mvl,
1048  $ -cs*sn*aqoap,
1049  $ v( 1, q ), 1,
1050  $ v( 1, p ), 1 )
1051  END IF
1052  END IF
1053  END IF
1054  END IF
1055  END IF
1056 *
1057  ELSE
1058 * .. have to use modified Gram-Schmidt like transformation
1059  CALL dcopy( m, a( 1, p ), 1,
1060  $ work( n+1 ), 1 )
1061  CALL dlascl( 'G', 0, 0, aapp, one, m,
1062  $ 1, work( n+1 ), lda,
1063  $ ierr )
1064  CALL dlascl( 'G', 0, 0, aaqq, one, m,
1065  $ 1, a( 1, q ), lda, ierr )
1066  temp1 = -aapq*work( p ) / work( q )
1067  CALL daxpy( m, temp1, work( n+1 ), 1,
1068  $ a( 1, q ), 1 )
1069  CALL dlascl( 'G', 0, 0, one, aaqq, m,
1070  $ 1, a( 1, q ), lda, ierr )
1071  sva( q ) = aaqq*dsqrt( dmax1( zero,
1072  $ one-aapq*aapq ) )
1073  mxsinj = dmax1( mxsinj, sfmin )
1074  END IF
1075 * END IF ROTOK THEN ... ELSE
1076 *
1077 * In the case of cancellation in updating SVA(q), SVA(p)
1078 * recompute SVA(q), SVA(p).
1079 *
1080  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1081  $ THEN
1082  IF( ( aaqq.LT.rootbig ) .AND.
1083  $ ( aaqq.GT.rootsfmin ) ) THEN
1084  sva( q ) = dnrm2( m, a( 1, q ), 1 )*
1085  $ work( q )
1086  ELSE
1087  t = zero
1088  aaqq = one
1089  CALL dlassq( m, a( 1, q ), 1, t,
1090  $ aaqq )
1091  sva( q ) = t*dsqrt( aaqq )*work( q )
1092  END IF
1093  END IF
1094  IF( ( aapp / aapp0 ).LE.rooteps ) THEN
1095  IF( ( aapp.LT.rootbig ) .AND.
1096  $ ( aapp.GT.rootsfmin ) ) THEN
1097  aapp = dnrm2( m, a( 1, p ), 1 )*
1098  $ work( p )
1099  ELSE
1100  t = zero
1101  aapp = one
1102  CALL dlassq( m, a( 1, p ), 1, t,
1103  $ aapp )
1104  aapp = t*dsqrt( aapp )*work( p )
1105  END IF
1106  sva( p ) = aapp
1107  END IF
1108 *
1109  ELSE
1110 * A(:,p) and A(:,q) already numerically orthogonal
1111  IF( ir1.EQ.0 )notrot = notrot + 1
1112 *[RTD] SKIPPED = SKIPPED + 1
1113  pskipped = pskipped + 1
1114  END IF
1115  ELSE
1116 * A(:,q) is zero column
1117  IF( ir1.EQ.0 )notrot = notrot + 1
1118  pskipped = pskipped + 1
1119  END IF
1120 *
1121  IF( ( i.LE.swband ) .AND.
1122  $ ( pskipped.GT.rowskip ) ) THEN
1123  IF( ir1.EQ.0 )aapp = -aapp
1124  notrot = 0
1125  go to 2103
1126  END IF
1127 *
1128  2002 continue
1129 * END q-LOOP
1130 *
1131  2103 continue
1132 * bailed out of q-loop
1133 *
1134  sva( p ) = aapp
1135 *
1136  ELSE
1137  sva( p ) = aapp
1138  IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
1139  $ notrot = notrot + min0( igl+kbl-1, n ) - p
1140  END IF
1141 *
1142  2001 continue
1143 * end of the p-loop
1144 * end of doing the block ( ibr, ibr )
1145  1002 continue
1146 * end of ir1-loop
1147 *
1148 * ... go to the off diagonal blocks
1149 *
1150  igl = ( ibr-1 )*kbl + 1
1151 *
1152  DO 2010 jbc = ibr + 1, nbl
1153 *
1154  jgl = ( jbc-1 )*kbl + 1
1155 *
1156 * doing the block at ( ibr, jbc )
1157 *
1158  ijblsk = 0
1159  DO 2100 p = igl, min0( igl+kbl-1, n )
1160 *
1161  aapp = sva( p )
1162  IF( aapp.GT.zero ) THEN
1163 *
1164  pskipped = 0
1165 *
1166  DO 2200 q = jgl, min0( jgl+kbl-1, n )
1167 *
1168  aaqq = sva( q )
1169  IF( aaqq.GT.zero ) THEN
1170  aapp0 = aapp
1171 *
1172 * .. M x 2 Jacobi SVD ..
1173 *
1174 * Safe Gram matrix computation
1175 *
1176  IF( aaqq.GE.one ) THEN
1177  IF( aapp.GE.aaqq ) THEN
1178  rotok = ( small*aapp ).LE.aaqq
1179  ELSE
1180  rotok = ( small*aaqq ).LE.aapp
1181  END IF
1182  IF( aapp.LT.( big / aaqq ) ) THEN
1183  aapq = ( ddot( m, a( 1, p ), 1, a( 1,
1184  $ q ), 1 )*work( p )*work( q ) /
1185  $ aaqq ) / aapp
1186  ELSE
1187  CALL dcopy( m, a( 1, p ), 1,
1188  $ work( n+1 ), 1 )
1189  CALL dlascl( 'G', 0, 0, aapp,
1190  $ work( p ), m, 1,
1191  $ work( n+1 ), lda, ierr )
1192  aapq = ddot( m, work( n+1 ), 1,
1193  $ a( 1, q ), 1 )*work( q ) / aaqq
1194  END IF
1195  ELSE
1196  IF( aapp.GE.aaqq ) THEN
1197  rotok = aapp.LE.( aaqq / small )
1198  ELSE
1199  rotok = aaqq.LE.( aapp / small )
1200  END IF
1201  IF( aapp.GT.( small / aaqq ) ) THEN
1202  aapq = ( ddot( m, a( 1, p ), 1, a( 1,
1203  $ q ), 1 )*work( p )*work( q ) /
1204  $ aaqq ) / aapp
1205  ELSE
1206  CALL dcopy( m, a( 1, q ), 1,
1207  $ work( n+1 ), 1 )
1208  CALL dlascl( 'G', 0, 0, aaqq,
1209  $ work( q ), m, 1,
1210  $ work( n+1 ), lda, ierr )
1211  aapq = ddot( m, work( n+1 ), 1,
1212  $ a( 1, p ), 1 )*work( p ) / aapp
1213  END IF
1214  END IF
1215 *
1216  mxaapq = dmax1( mxaapq, dabs( aapq ) )
1217 *
1218 * TO rotate or NOT to rotate, THAT is the question ...
1219 *
1220  IF( dabs( aapq ).GT.tol ) THEN
1221  notrot = 0
1222 *[RTD] ROTATED = ROTATED + 1
1223  pskipped = 0
1224  iswrot = iswrot + 1
1225 *
1226  IF( rotok ) THEN
1227 *
1228  aqoap = aaqq / aapp
1229  apoaq = aapp / aaqq
1230  theta = -half*dabs(aqoap-apoaq)/aapq
1231  IF( aaqq.GT.aapp0 )theta = -theta
1232 *
1233  IF( dabs( theta ).GT.bigtheta ) THEN
1234  t = half / theta
1235  fastr( 3 ) = t*work( p ) / work( q )
1236  fastr( 4 ) = -t*work( q ) /
1237  $ work( p )
1238  CALL drotm( m, a( 1, p ), 1,
1239  $ a( 1, q ), 1, fastr )
1240  IF( rsvec )CALL drotm( mvl,
1241  $ v( 1, p ), 1,
1242  $ v( 1, q ), 1,
1243  $ fastr )
1244  sva( q ) = aaqq*dsqrt( dmax1( zero,
1245  $ one+t*apoaq*aapq ) )
1246  aapp = aapp*dsqrt( dmax1( zero,
1247  $ one-t*aqoap*aapq ) )
1248  mxsinj = dmax1( mxsinj, dabs( t ) )
1249  ELSE
1250 *
1251 * .. choose correct signum for THETA and rotate
1252 *
1253  thsign = -dsign( one, aapq )
1254  IF( aaqq.GT.aapp0 )thsign = -thsign
1255  t = one / ( theta+thsign*
1256  $ dsqrt( one+theta*theta ) )
1257  cs = dsqrt( one / ( one+t*t ) )
1258  sn = t*cs
1259  mxsinj = dmax1( mxsinj, dabs( sn ) )
1260  sva( q ) = aaqq*dsqrt( dmax1( zero,
1261  $ one+t*apoaq*aapq ) )
1262  aapp = aapp*dsqrt( dmax1( zero,
1263  $ one-t*aqoap*aapq ) )
1264 *
1265  apoaq = work( p ) / work( q )
1266  aqoap = work( q ) / work( p )
1267  IF( work( p ).GE.one ) THEN
1268 *
1269  IF( work( q ).GE.one ) THEN
1270  fastr( 3 ) = t*apoaq
1271  fastr( 4 ) = -t*aqoap
1272  work( p ) = work( p )*cs
1273  work( q ) = work( q )*cs
1274  CALL drotm( m, a( 1, p ), 1,
1275  $ a( 1, q ), 1,
1276  $ fastr )
1277  IF( rsvec )CALL drotm( mvl,
1278  $ v( 1, p ), 1, v( 1, q ),
1279  $ 1, fastr )
1280  ELSE
1281  CALL daxpy( m, -t*aqoap,
1282  $ a( 1, q ), 1,
1283  $ a( 1, p ), 1 )
1284  CALL daxpy( m, cs*sn*apoaq,
1285  $ a( 1, p ), 1,
1286  $ a( 1, q ), 1 )
1287  IF( rsvec ) THEN
1288  CALL daxpy( mvl, -t*aqoap,
1289  $ v( 1, q ), 1,
1290  $ v( 1, p ), 1 )
1291  CALL daxpy( mvl,
1292  $ cs*sn*apoaq,
1293  $ v( 1, p ), 1,
1294  $ v( 1, q ), 1 )
1295  END IF
1296  work( p ) = work( p )*cs
1297  work( q ) = work( q ) / cs
1298  END IF
1299  ELSE
1300  IF( work( q ).GE.one ) THEN
1301  CALL daxpy( m, t*apoaq,
1302  $ a( 1, p ), 1,
1303  $ a( 1, q ), 1 )
1304  CALL daxpy( m, -cs*sn*aqoap,
1305  $ a( 1, q ), 1,
1306  $ a( 1, p ), 1 )
1307  IF( rsvec ) THEN
1308  CALL daxpy( mvl, t*apoaq,
1309  $ v( 1, p ), 1,
1310  $ v( 1, q ), 1 )
1311  CALL daxpy( mvl,
1312  $ -cs*sn*aqoap,
1313  $ v( 1, q ), 1,
1314  $ v( 1, p ), 1 )
1315  END IF
1316  work( p ) = work( p ) / cs
1317  work( q ) = work( q )*cs
1318  ELSE
1319  IF( work( p ).GE.work( q ) )
1320  $ THEN
1321  CALL daxpy( m, -t*aqoap,
1322  $ a( 1, q ), 1,
1323  $ a( 1, p ), 1 )
1324  CALL daxpy( m, cs*sn*apoaq,
1325  $ a( 1, p ), 1,
1326  $ a( 1, q ), 1 )
1327  work( p ) = work( p )*cs
1328  work( q ) = work( q ) / cs
1329  IF( rsvec ) THEN
1330  CALL daxpy( mvl,
1331  $ -t*aqoap,
1332  $ v( 1, q ), 1,
1333  $ v( 1, p ), 1 )
1334  CALL daxpy( mvl,
1335  $ cs*sn*apoaq,
1336  $ v( 1, p ), 1,
1337  $ v( 1, q ), 1 )
1338  END IF
1339  ELSE
1340  CALL daxpy( m, t*apoaq,
1341  $ a( 1, p ), 1,
1342  $ a( 1, q ), 1 )
1343  CALL daxpy( m,
1344  $ -cs*sn*aqoap,
1345  $ a( 1, q ), 1,
1346  $ a( 1, p ), 1 )
1347  work( p ) = work( p ) / cs
1348  work( q ) = work( q )*cs
1349  IF( rsvec ) THEN
1350  CALL daxpy( mvl,
1351  $ t*apoaq, v( 1, p ),
1352  $ 1, v( 1, q ), 1 )
1353  CALL daxpy( mvl,
1354  $ -cs*sn*aqoap,
1355  $ v( 1, q ), 1,
1356  $ v( 1, p ), 1 )
1357  END IF
1358  END IF
1359  END IF
1360  END IF
1361  END IF
1362 *
1363  ELSE
1364  IF( aapp.GT.aaqq ) THEN
1365  CALL dcopy( m, a( 1, p ), 1,
1366  $ work( n+1 ), 1 )
1367  CALL dlascl( 'G', 0, 0, aapp, one,
1368  $ m, 1, work( n+1 ), lda,
1369  $ ierr )
1370  CALL dlascl( 'G', 0, 0, aaqq, one,
1371  $ m, 1, a( 1, q ), lda,
1372  $ ierr )
1373  temp1 = -aapq*work( p ) / work( q )
1374  CALL daxpy( m, temp1, work( n+1 ),
1375  $ 1, a( 1, q ), 1 )
1376  CALL dlascl( 'G', 0, 0, one, aaqq,
1377  $ m, 1, a( 1, q ), lda,
1378  $ ierr )
1379  sva( q ) = aaqq*dsqrt( dmax1( zero,
1380  $ one-aapq*aapq ) )
1381  mxsinj = dmax1( mxsinj, sfmin )
1382  ELSE
1383  CALL dcopy( m, a( 1, q ), 1,
1384  $ work( n+1 ), 1 )
1385  CALL dlascl( 'G', 0, 0, aaqq, one,
1386  $ m, 1, work( n+1 ), lda,
1387  $ ierr )
1388  CALL dlascl( 'G', 0, 0, aapp, one,
1389  $ m, 1, a( 1, p ), lda,
1390  $ ierr )
1391  temp1 = -aapq*work( q ) / work( p )
1392  CALL daxpy( m, temp1, work( n+1 ),
1393  $ 1, a( 1, p ), 1 )
1394  CALL dlascl( 'G', 0, 0, one, aapp,
1395  $ m, 1, a( 1, p ), lda,
1396  $ ierr )
1397  sva( p ) = aapp*dsqrt( dmax1( zero,
1398  $ one-aapq*aapq ) )
1399  mxsinj = dmax1( mxsinj, sfmin )
1400  END IF
1401  END IF
1402 * END IF ROTOK THEN ... ELSE
1403 *
1404 * In the case of cancellation in updating SVA(q)
1405 * .. recompute SVA(q)
1406  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1407  $ THEN
1408  IF( ( aaqq.LT.rootbig ) .AND.
1409  $ ( aaqq.GT.rootsfmin ) ) THEN
1410  sva( q ) = dnrm2( m, a( 1, q ), 1 )*
1411  $ work( q )
1412  ELSE
1413  t = zero
1414  aaqq = one
1415  CALL dlassq( m, a( 1, q ), 1, t,
1416  $ aaqq )
1417  sva( q ) = t*dsqrt( aaqq )*work( q )
1418  END IF
1419  END IF
1420  IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
1421  IF( ( aapp.LT.rootbig ) .AND.
1422  $ ( aapp.GT.rootsfmin ) ) THEN
1423  aapp = dnrm2( m, a( 1, p ), 1 )*
1424  $ work( p )
1425  ELSE
1426  t = zero
1427  aapp = one
1428  CALL dlassq( m, a( 1, p ), 1, t,
1429  $ aapp )
1430  aapp = t*dsqrt( aapp )*work( p )
1431  END IF
1432  sva( p ) = aapp
1433  END IF
1434 * end of OK rotation
1435  ELSE
1436  notrot = notrot + 1
1437 *[RTD] SKIPPED = SKIPPED + 1
1438  pskipped = pskipped + 1
1439  ijblsk = ijblsk + 1
1440  END IF
1441  ELSE
1442  notrot = notrot + 1
1443  pskipped = pskipped + 1
1444  ijblsk = ijblsk + 1
1445  END IF
1446 *
1447  IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
1448  $ THEN
1449  sva( p ) = aapp
1450  notrot = 0
1451  go to 2011
1452  END IF
1453  IF( ( i.LE.swband ) .AND.
1454  $ ( pskipped.GT.rowskip ) ) THEN
1455  aapp = -aapp
1456  notrot = 0
1457  go to 2203
1458  END IF
1459 *
1460  2200 continue
1461 * end of the q-loop
1462  2203 continue
1463 *
1464  sva( p ) = aapp
1465 *
1466  ELSE
1467 *
1468  IF( aapp.EQ.zero )notrot = notrot +
1469  $ min0( jgl+kbl-1, n ) - jgl + 1
1470  IF( aapp.LT.zero )notrot = 0
1471 *
1472  END IF
1473 *
1474  2100 continue
1475 * end of the p-loop
1476  2010 continue
1477 * end of the jbc-loop
1478  2011 continue
1479 *2011 bailed out of the jbc-loop
1480  DO 2012 p = igl, min0( igl+kbl-1, n )
1481  sva( p ) = dabs( sva( p ) )
1482  2012 continue
1483 ***
1484  2000 continue
1485 *2000 :: end of the ibr-loop
1486 *
1487 * .. update SVA(N)
1488  IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1489  $ THEN
1490  sva( n ) = dnrm2( m, a( 1, n ), 1 )*work( n )
1491  ELSE
1492  t = zero
1493  aapp = one
1494  CALL dlassq( m, a( 1, n ), 1, t, aapp )
1495  sva( n ) = t*dsqrt( aapp )*work( n )
1496  END IF
1497 *
1498 * Additional steering devices
1499 *
1500  IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1501  $ ( iswrot.LE.n ) ) )swband = i
1502 *
1503  IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.dsqrt( dble( n ) )*
1504  $ tol ) .AND. ( dble( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1505  go to 1994
1506  END IF
1507 *
1508  IF( notrot.GE.emptsw )go to 1994
1509 *
1510  1993 continue
1511 * end i=1:NSWEEP loop
1512 *
1513 * #:( Reaching this point means that the procedure has not converged.
1514  info = nsweep - 1
1515  go to 1995
1516 *
1517  1994 continue
1518 * #:) Reaching this point means numerical convergence after the i-th
1519 * sweep.
1520 *
1521  info = 0
1522 * #:) INFO = 0 confirms successful iterations.
1523  1995 continue
1524 *
1525 * Sort the singular values and find how many are above
1526 * the underflow threshold.
1527 *
1528  n2 = 0
1529  n4 = 0
1530  DO 5991 p = 1, n - 1
1531  q = idamax( n-p+1, sva( p ), 1 ) + p - 1
1532  IF( p.NE.q ) THEN
1533  temp1 = sva( p )
1534  sva( p ) = sva( q )
1535  sva( q ) = temp1
1536  temp1 = work( p )
1537  work( p ) = work( q )
1538  work( q ) = temp1
1539  CALL dswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1540  IF( rsvec )CALL dswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1541  END IF
1542  IF( sva( p ).NE.zero ) THEN
1543  n4 = n4 + 1
1544  IF( sva( p )*skl.GT.sfmin )n2 = n2 + 1
1545  END IF
1546  5991 continue
1547  IF( sva( n ).NE.zero ) THEN
1548  n4 = n4 + 1
1549  IF( sva( n )*skl.GT.sfmin )n2 = n2 + 1
1550  END IF
1551 *
1552 * Normalize the left singular vectors.
1553 *
1554  IF( lsvec .OR. uctol ) THEN
1555  DO 1998 p = 1, n2
1556  CALL dscal( m, work( p ) / sva( p ), a( 1, p ), 1 )
1557  1998 continue
1558  END IF
1559 *
1560 * Scale the product of Jacobi rotations (assemble the fast rotations).
1561 *
1562  IF( rsvec ) THEN
1563  IF( applv ) THEN
1564  DO 2398 p = 1, n
1565  CALL dscal( mvl, work( p ), v( 1, p ), 1 )
1566  2398 continue
1567  ELSE
1568  DO 2399 p = 1, n
1569  temp1 = one / dnrm2( mvl, v( 1, p ), 1 )
1570  CALL dscal( mvl, temp1, v( 1, p ), 1 )
1571  2399 continue
1572  END IF
1573  END IF
1574 *
1575 * Undo scaling, if necessary (and possible).
1576  IF( ( skl.GT.one ).AND.( sva( 1 ).LT.big/skl ) ) THEN
1577  DO p = 1, n
1578  sva( p ) = skl*sva( p )
1579  END DO
1580  skl = one
1581  ELSE IF( n2.GT.0 ) THEN
1582  IF( ( skl.LT.one ).AND.( sva( n2 ).GT.sfmin/skl ) ) THEN
1583  DO p = 1, n
1584  sva( p ) = skl*sva( p )
1585  END DO
1586  skl = one
1587  END IF
1588  END IF
1589 *
1590  work( 1 ) = skl
1591 * The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE
1592 * then some of the singular values may overflow or underflow and
1593 * the spectrum is given in this factored representation.
1594 *
1595  work( 2 ) = dble( n4 )
1596 * N4 is the number of computed nonzero singular values of A.
1597 *
1598  work( 3 ) = dble( n2 )
1599 * N2 is the number of singular values of A greater than SFMIN.
1600 * If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers
1601 * that may carry some information.
1602 *
1603  work( 4 ) = dble( i )
1604 * i is the index of the last sweep before declaring convergence.
1605 *
1606  work( 5 ) = mxaapq
1607 * MXAAPQ is the largest absolute value of scaled pivots in the
1608 * last sweep
1609 *
1610  work( 6 ) = mxsinj
1611 * MXSINJ is the largest absolute value of the sines of Jacobi angles
1612 * in the last sweep
1613 *
1614  return
1615 * ..
1616 * .. END OF DGESVJ
1617 * ..
1618  END