LAPACK  3.5.0
LAPACK: Linear Algebra PACKage
 All Classes Files Functions Variables Typedefs Macros
sgesvj.f
Go to the documentation of this file.
1 *> \brief \b SGESVJ
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGESVJ + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvj.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvj.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvj.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V,
22 * LDV, WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, LDA, LDV, LWORK, M, MV, N
26 * CHARACTER*1 JOBA, JOBU, JOBV
27 * ..
28 * .. Array Arguments ..
29 * REAL A( LDA, * ), SVA( N ), V( LDV, * ),
30 * $ WORK( LWORK )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> SGESVJ computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, where M >= N. The SVD of A is written as
41 *> [++] [xx] [x0] [xx]
42 *> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx]
43 *> [++] [xx]
44 *> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
45 *> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements
46 *> of SIGMA are the singular values of A. The columns of U and V are the
47 *> left and the right singular vectors of A, respectively.
48 *> \endverbatim
49 *
50 * Arguments:
51 * ==========
52 *
53 *> \param[in] JOBA
54 *> \verbatim
55 *> JOBA is CHARACTER* 1
56 *> Specifies the structure of A.
57 *> = 'L': The input matrix A is lower triangular;
58 *> = 'U': The input matrix A is upper triangular;
59 *> = 'G': The input matrix A is general M-by-N matrix, M >= N.
60 *> \endverbatim
61 *>
62 *> \param[in] JOBU
63 *> \verbatim
64 *> JOBU is CHARACTER*1
65 *> Specifies whether to compute the left singular vectors
66 *> (columns of U):
67 *> = 'U': The left singular vectors corresponding to the nonzero
68 *> singular values are computed and returned in the leading
69 *> columns of A. See more details in the description of A.
70 *> The default numerical orthogonality threshold is set to
71 *> approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=SLAMCH('E').
72 *> = 'C': Analogous to JOBU='U', except that user can control the
73 *> level of numerical orthogonality of the computed left
74 *> singular vectors. TOL can be set to TOL = CTOL*EPS, where
75 *> CTOL is given on input in the array WORK.
76 *> No CTOL smaller than ONE is allowed. CTOL greater
77 *> than 1 / EPS is meaningless. The option 'C'
78 *> can be used if M*EPS is satisfactory orthogonality
79 *> of the computed left singular vectors, so CTOL=M could
80 *> save few sweeps of Jacobi rotations.
81 *> See the descriptions of A and WORK(1).
82 *> = 'N': The matrix U is not computed. However, see the
83 *> description of A.
84 *> \endverbatim
85 *>
86 *> \param[in] JOBV
87 *> \verbatim
88 *> JOBV is CHARACTER*1
89 *> Specifies whether to compute the right singular vectors, that
90 *> is, the matrix V:
91 *> = 'V' : the matrix V is computed and returned in the array V
92 *> = 'A' : the Jacobi rotations are applied to the MV-by-N
93 *> array V. In other words, the right singular vector
94 *> matrix V is not computed explicitly; instead it is
95 *> applied to an MV-by-N matrix initially stored in the
96 *> first MV rows of V.
97 *> = 'N' : the matrix V is not computed and the array V is not
98 *> referenced
99 *> \endverbatim
100 *>
101 *> \param[in] M
102 *> \verbatim
103 *> M is INTEGER
104 *> The number of rows of the input matrix A. 1/SLAMCH('E') > M >= 0.
105 *> \endverbatim
106 *>
107 *> \param[in] N
108 *> \verbatim
109 *> N is INTEGER
110 *> The number of columns of the input matrix A.
111 *> M >= N >= 0.
112 *> \endverbatim
113 *>
114 *> \param[in,out] A
115 *> \verbatim
116 *> A is REAL array, dimension (LDA,N)
117 *> On entry, the M-by-N matrix A.
118 *> On exit,
119 *> If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C':
120 *> If INFO .EQ. 0 :
121 *> RANKA orthonormal columns of U are returned in the
122 *> leading RANKA columns of the array A. Here RANKA <= N
123 *> is the number of computed singular values of A that are
124 *> above the underflow threshold SLAMCH('S'). The singular
125 *> vectors corresponding to underflowed or zero singular
126 *> values are not computed. The value of RANKA is returned
127 *> in the array WORK as RANKA=NINT(WORK(2)). Also see the
128 *> descriptions of SVA and WORK. The computed columns of U
129 *> are mutually numerically orthogonal up to approximately
130 *> TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'),
131 *> see the description of JOBU.
132 *> If INFO .GT. 0,
133 *> the procedure SGESVJ did not converge in the given number
134 *> of iterations (sweeps). In that case, the computed
135 *> columns of U may not be orthogonal up to TOL. The output
136 *> U (stored in A), SIGMA (given by the computed singular
137 *> values in SVA(1:N)) and V is still a decomposition of the
138 *> input matrix A in the sense that the residual
139 *> ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small.
140 *> If JOBU .EQ. 'N':
141 *> If INFO .EQ. 0 :
142 *> Note that the left singular vectors are 'for free' in the
143 *> one-sided Jacobi SVD algorithm. However, if only the
144 *> singular values are needed, the level of numerical
145 *> orthogonality of U is not an issue and iterations are
146 *> stopped when the columns of the iterated matrix are
147 *> numerically orthogonal up to approximately M*EPS. Thus,
148 *> on exit, A contains the columns of U scaled with the
149 *> corresponding singular values.
150 *> If INFO .GT. 0 :
151 *> the procedure SGESVJ did not converge in the given number
152 *> of iterations (sweeps).
153 *> \endverbatim
154 *>
155 *> \param[in] LDA
156 *> \verbatim
157 *> LDA is INTEGER
158 *> The leading dimension of the array A. LDA >= max(1,M).
159 *> \endverbatim
160 *>
161 *> \param[out] SVA
162 *> \verbatim
163 *> SVA is REAL array, dimension (N)
164 *> On exit,
165 *> If INFO .EQ. 0 :
166 *> depending on the value SCALE = WORK(1), we have:
167 *> If SCALE .EQ. ONE:
168 *> SVA(1:N) contains the computed singular values of A.
169 *> During the computation SVA contains the Euclidean column
170 *> norms of the iterated matrices in the array A.
171 *> If SCALE .NE. ONE:
172 *> The singular values of A are SCALE*SVA(1:N), and this
173 *> factored representation is due to the fact that some of the
174 *> singular values of A might underflow or overflow.
175 *>
176 *> If INFO .GT. 0 :
177 *> the procedure SGESVJ did not converge in the given number of
178 *> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate.
179 *> \endverbatim
180 *>
181 *> \param[in] MV
182 *> \verbatim
183 *> MV is INTEGER
184 *> If JOBV .EQ. 'A', then the product of Jacobi rotations in SGESVJ
185 *> is applied to the first MV rows of V. See the description of JOBV.
186 *> \endverbatim
187 *>
188 *> \param[in,out] V
189 *> \verbatim
190 *> V is REAL array, dimension (LDV,N)
191 *> If JOBV = 'V', then V contains on exit the N-by-N matrix of
192 *> the right singular vectors;
193 *> If JOBV = 'A', then V contains the product of the computed right
194 *> singular vector matrix and the initial matrix in
195 *> the array V.
196 *> If JOBV = 'N', then V is not referenced.
197 *> \endverbatim
198 *>
199 *> \param[in] LDV
200 *> \verbatim
201 *> LDV is INTEGER
202 *> The leading dimension of the array V, LDV .GE. 1.
203 *> If JOBV .EQ. 'V', then LDV .GE. max(1,N).
204 *> If JOBV .EQ. 'A', then LDV .GE. max(1,MV) .
205 *> \endverbatim
206 *>
207 *> \param[in,out] WORK
208 *> \verbatim
209 *> WORK is REAL array, dimension max(4,M+N).
210 *> On entry,
211 *> If JOBU .EQ. 'C' :
212 *> WORK(1) = CTOL, where CTOL defines the threshold for convergence.
213 *> The process stops if all columns of A are mutually
214 *> orthogonal up to CTOL*EPS, EPS=SLAMCH('E').
215 *> It is required that CTOL >= ONE, i.e. it is not
216 *> allowed to force the routine to obtain orthogonality
217 *> below EPSILON.
218 *> On exit,
219 *> WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N)
220 *> are the computed singular vcalues of A.
221 *> (See description of SVA().)
222 *> WORK(2) = NINT(WORK(2)) is the number of the computed nonzero
223 *> singular values.
224 *> WORK(3) = NINT(WORK(3)) is the number of the computed singular
225 *> values that are larger than the underflow threshold.
226 *> WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi
227 *> rotations needed for numerical convergence.
228 *> WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep.
229 *> This is useful information in cases when SGESVJ did
230 *> not converge, as it can be used to estimate whether
231 *> the output is stil useful and for post festum analysis.
232 *> WORK(6) = the largest absolute value over all sines of the
233 *> Jacobi rotation angles in the last sweep. It can be
234 *> useful for a post festum analysis.
235 *> \endverbatim
236 *>
237 *> \param[in] LWORK
238 *> \verbatim
239 *> LWORK is INTEGER
240 *> length of WORK, WORK >= MAX(6,M+N)
241 *> \endverbatim
242 *>
243 *> \param[out] INFO
244 *> \verbatim
245 *> INFO is INTEGER
246 *> = 0 : successful exit.
247 *> < 0 : if INFO = -i, then the i-th argument had an illegal value
248 *> > 0 : SGESVJ did not converge in the maximal allowed number (30)
249 *> of sweeps. The output may still be useful. See the
250 *> description of WORK.
251 *> \endverbatim
252 *
253 * Authors:
254 * ========
255 *
256 *> \author Univ. of Tennessee
257 *> \author Univ. of California Berkeley
258 *> \author Univ. of Colorado Denver
259 *> \author NAG Ltd.
260 *
261 *> \date September 2012
262 *
263 *> \ingroup realGEcomputational
264 *
265 *> \par Further Details:
266 * =====================
267 *>
268 *> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane
269 *> rotations. The rotations are implemented as fast scaled rotations of
270 *> Anda and Park [1]. In the case of underflow of the Jacobi angle, a
271 *> modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses
272 *> column interchanges of de Rijk [2]. The relative accuracy of the computed
273 *> singular values and the accuracy of the computed singular vectors (in
274 *> angle metric) is as guaranteed by the theory of Demmel and Veselic [3].
275 *> The condition number that determines the accuracy in the full rank case
276 *> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the
277 *> spectral condition number. The best performance of this Jacobi SVD
278 *> procedure is achieved if used in an accelerated version of Drmac and
279 *> Veselic [5,6], and it is the kernel routine in the SIGMA library [7].
280 *> Some tunning parameters (marked with [TP]) are available for the
281 *> implementer. \n
282 *> The computational range for the nonzero singular values is the machine
283 *> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even
284 *> denormalized singular values can be computed with the corresponding
285 *> gradual loss of accurate digits.
286 *>
287 *> \par Contributors:
288 * ==================
289 *>
290 *> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
291 *>
292 *> \par References:
293 * ================
294 *>
295 *> [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. \n
296 *> SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. \n\n
297 *> [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the
298 *> singular value decomposition on a vector computer. \n
299 *> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. \n\n
300 *> [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. \n
301 *> [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular
302 *> value computation in floating point arithmetic. \n
303 *> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. \n\n
304 *> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. \n
305 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. \n
306 *> LAPACK Working note 169. \n\n
307 *> [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. \n
308 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. \n
309 *> LAPACK Working note 170. \n\n
310 *> [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
311 *> QSVD, (H,K)-SVD computations.\n
312 *> Department of Mathematics, University of Zagreb, 2008.
313 *>
314 *> \par Bugs, Examples and Comments:
315 * =================================
316 *>
317 *> Please report all bugs and send interesting test examples and comments to
318 *> drmac@math.hr. Thank you.
319 *
320 * =====================================================================
321  SUBROUTINE sgesvj( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V,
322  $ ldv, work, lwork, info )
323 *
324 * -- LAPACK computational routine (version 3.4.2) --
325 * -- LAPACK is a software package provided by Univ. of Tennessee, --
326 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
327 * September 2012
328 *
329 * .. Scalar Arguments ..
330  INTEGER info, lda, ldv, lwork, m, mv, n
331  CHARACTER*1 joba, jobu, jobv
332 * ..
333 * .. Array Arguments ..
334  REAL a( lda, * ), sva( n ), v( ldv, * ),
335  $ work( lwork )
336 * ..
337 *
338 * =====================================================================
339 *
340 * .. Local Parameters ..
341  REAL zero, half, one
342  parameter( zero = 0.0e0, half = 0.5e0, one = 1.0e0)
343  INTEGER nsweep
344  parameter( nsweep = 30 )
345 * ..
346 * .. Local Scalars ..
347  REAL aapp, aapp0, aapq, aaqq, apoaq, aqoap, big,
348  $ bigtheta, cs, ctol, epsln, large, mxaapq,
349  $ mxsinj, rootbig, rooteps, rootsfmin, roottol,
350  $ skl, sfmin, small, sn, t, temp1, theta,
351  $ thsign, tol
352  INTEGER blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1,
353  $ iswrot, jbc, jgl, kbl, lkahead, mvl, n2, n34,
354  $ n4, nbl, notrot, p, pskipped, q, rowskip,
355  $ swband
356  LOGICAL applv, goscale, lower, lsvec, noscale, rotok,
357  $ rsvec, uctol, upper
358 * ..
359 * .. Local Arrays ..
360  REAL fastr( 5 )
361 * ..
362 * .. Intrinsic Functions ..
363  INTRINSIC abs, amax1, amin1, float, min0, sign, sqrt
364 * ..
365 * .. External Functions ..
366 * ..
367 * from BLAS
368  REAL sdot, snrm2
369  EXTERNAL sdot, snrm2
370  INTEGER isamax
371  EXTERNAL isamax
372 * from LAPACK
373  REAL slamch
374  EXTERNAL slamch
375  LOGICAL lsame
376  EXTERNAL lsame
377 * ..
378 * .. External Subroutines ..
379 * ..
380 * from BLAS
381  EXTERNAL saxpy, scopy, srotm, sscal, sswap
382 * from LAPACK
383  EXTERNAL slascl, slaset, slassq, xerbla
384 *
385  EXTERNAL sgsvj0, sgsvj1
386 * ..
387 * .. Executable Statements ..
388 *
389 * Test the input arguments
390 *
391  lsvec = lsame( jobu, 'U' )
392  uctol = lsame( jobu, 'C' )
393  rsvec = lsame( jobv, 'V' )
394  applv = lsame( jobv, 'A' )
395  upper = lsame( joba, 'U' )
396  lower = lsame( joba, 'L' )
397 *
398  IF( .NOT.( upper .OR. lower .OR. lsame( joba, 'G' ) ) ) THEN
399  info = -1
400  ELSE IF( .NOT.( lsvec .OR. uctol .OR. lsame( jobu, 'N' ) ) ) THEN
401  info = -2
402  ELSE IF( .NOT.( rsvec .OR. applv .OR. lsame( jobv, 'N' ) ) ) THEN
403  info = -3
404  ELSE IF( m.LT.0 ) THEN
405  info = -4
406  ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
407  info = -5
408  ELSE IF( lda.LT.m ) THEN
409  info = -7
410  ELSE IF( mv.LT.0 ) THEN
411  info = -9
412  ELSE IF( ( rsvec .AND. ( ldv.LT.n ) ) .OR.
413  $ ( applv .AND. ( ldv.LT.mv ) ) ) THEN
414  info = -11
415  ELSE IF( uctol .AND. ( work( 1 ).LE.one ) ) THEN
416  info = -12
417  ELSE IF( lwork.LT.max0( m+n, 6 ) ) THEN
418  info = -13
419  ELSE
420  info = 0
421  END IF
422 *
423 * #:(
424  IF( info.NE.0 ) THEN
425  CALL xerbla( 'SGESVJ', -info )
426  RETURN
427  END IF
428 *
429 * #:) Quick return for void matrix
430 *
431  IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) )RETURN
432 *
433 * Set numerical parameters
434 * The stopping criterion for Jacobi rotations is
435 *
436 * max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS
437 *
438 * where EPS is the round-off and CTOL is defined as follows:
439 *
440  IF( uctol ) THEN
441 * ... user controlled
442  ctol = work( 1 )
443  ELSE
444 * ... default
445  IF( lsvec .OR. rsvec .OR. applv ) THEN
446  ctol = sqrt( float( m ) )
447  ELSE
448  ctol = float( m )
449  END IF
450  END IF
451 * ... and the machine dependent parameters are
452 *[!] (Make sure that SLAMCH() works properly on the target machine.)
453 *
454  epsln = slamch( 'Epsilon' )
455  rooteps = sqrt( epsln )
456  sfmin = slamch( 'SafeMinimum' )
457  rootsfmin = sqrt( sfmin )
458  small = sfmin / epsln
459  big = slamch( 'Overflow' )
460 * BIG = ONE / SFMIN
461  rootbig = one / rootsfmin
462  large = big / sqrt( float( m*n ) )
463  bigtheta = one / rooteps
464 *
465  tol = ctol*epsln
466  roottol = sqrt( tol )
467 *
468  IF( float( m )*epsln.GE.one ) THEN
469  info = -4
470  CALL xerbla( 'SGESVJ', -info )
471  RETURN
472  END IF
473 *
474 * Initialize the right singular vector matrix.
475 *
476  IF( rsvec ) THEN
477  mvl = n
478  CALL slaset( 'A', mvl, n, zero, one, v, ldv )
479  ELSE IF( applv ) THEN
480  mvl = mv
481  END IF
482  rsvec = rsvec .OR. applv
483 *
484 * Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N )
485 *(!) If necessary, scale A to protect the largest singular value
486 * from overflow. It is possible that saving the largest singular
487 * value destroys the information about the small ones.
488 * This initial scaling is almost minimal in the sense that the
489 * goal is to make sure that no column norm overflows, and that
490 * SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries
491 * in A are detected, the procedure returns with INFO=-6.
492 *
493  skl = one / sqrt( float( m )*float( n ) )
494  noscale = .true.
495  goscale = .true.
496 *
497  IF( lower ) THEN
498 * the input matrix is M-by-N lower triangular (trapezoidal)
499  DO 1874 p = 1, n
500  aapp = zero
501  aaqq = one
502  CALL slassq( m-p+1, a( p, p ), 1, aapp, aaqq )
503  IF( aapp.GT.big ) THEN
504  info = -6
505  CALL xerbla( 'SGESVJ', -info )
506  RETURN
507  END IF
508  aaqq = sqrt( aaqq )
509  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
510  sva( p ) = aapp*aaqq
511  ELSE
512  noscale = .false.
513  sva( p ) = aapp*( aaqq*skl )
514  IF( goscale ) THEN
515  goscale = .false.
516  DO 1873 q = 1, p - 1
517  sva( q ) = sva( q )*skl
518  1873 CONTINUE
519  END IF
520  END IF
521  1874 CONTINUE
522  ELSE IF( upper ) THEN
523 * the input matrix is M-by-N upper triangular (trapezoidal)
524  DO 2874 p = 1, n
525  aapp = zero
526  aaqq = one
527  CALL slassq( p, a( 1, p ), 1, aapp, aaqq )
528  IF( aapp.GT.big ) THEN
529  info = -6
530  CALL xerbla( 'SGESVJ', -info )
531  RETURN
532  END IF
533  aaqq = sqrt( aaqq )
534  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
535  sva( p ) = aapp*aaqq
536  ELSE
537  noscale = .false.
538  sva( p ) = aapp*( aaqq*skl )
539  IF( goscale ) THEN
540  goscale = .false.
541  DO 2873 q = 1, p - 1
542  sva( q ) = sva( q )*skl
543  2873 CONTINUE
544  END IF
545  END IF
546  2874 CONTINUE
547  ELSE
548 * the input matrix is M-by-N general dense
549  DO 3874 p = 1, n
550  aapp = zero
551  aaqq = one
552  CALL slassq( m, a( 1, p ), 1, aapp, aaqq )
553  IF( aapp.GT.big ) THEN
554  info = -6
555  CALL xerbla( 'SGESVJ', -info )
556  RETURN
557  END IF
558  aaqq = sqrt( aaqq )
559  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
560  sva( p ) = aapp*aaqq
561  ELSE
562  noscale = .false.
563  sva( p ) = aapp*( aaqq*skl )
564  IF( goscale ) THEN
565  goscale = .false.
566  DO 3873 q = 1, p - 1
567  sva( q ) = sva( q )*skl
568  3873 CONTINUE
569  END IF
570  END IF
571  3874 CONTINUE
572  END IF
573 *
574  IF( noscale )skl = one
575 *
576 * Move the smaller part of the spectrum from the underflow threshold
577 *(!) Start by determining the position of the nonzero entries of the
578 * array SVA() relative to ( SFMIN, BIG ).
579 *
580  aapp = zero
581  aaqq = big
582  DO 4781 p = 1, n
583  IF( sva( p ).NE.zero )aaqq = amin1( aaqq, sva( p ) )
584  aapp = amax1( aapp, sva( p ) )
585  4781 CONTINUE
586 *
587 * #:) Quick return for zero matrix
588 *
589  IF( aapp.EQ.zero ) THEN
590  IF( lsvec )CALL slaset( 'G', m, n, zero, one, a, lda )
591  work( 1 ) = one
592  work( 2 ) = zero
593  work( 3 ) = zero
594  work( 4 ) = zero
595  work( 5 ) = zero
596  work( 6 ) = zero
597  RETURN
598  END IF
599 *
600 * #:) Quick return for one-column matrix
601 *
602  IF( n.EQ.1 ) THEN
603  IF( lsvec )CALL slascl( 'G', 0, 0, sva( 1 ), skl, m, 1,
604  $ a( 1, 1 ), lda, ierr )
605  work( 1 ) = one / skl
606  IF( sva( 1 ).GE.sfmin ) THEN
607  work( 2 ) = one
608  ELSE
609  work( 2 ) = zero
610  END IF
611  work( 3 ) = zero
612  work( 4 ) = zero
613  work( 5 ) = zero
614  work( 6 ) = zero
615  RETURN
616  END IF
617 *
618 * Protect small singular values from underflow, and try to
619 * avoid underflows/overflows in computing Jacobi rotations.
620 *
621  sn = sqrt( sfmin / epsln )
622  temp1 = sqrt( big / float( n ) )
623  IF( ( aapp.LE.sn ) .OR. ( aaqq.GE.temp1 ) .OR.
624  $ ( ( sn.LE.aaqq ) .AND. ( aapp.LE.temp1 ) ) ) THEN
625  temp1 = amin1( big, temp1 / aapp )
626 * AAQQ = AAQQ*TEMP1
627 * AAPP = AAPP*TEMP1
628  ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.LE.temp1 ) ) THEN
629  temp1 = amin1( sn / aaqq, big / ( aapp*sqrt( float( n ) ) ) )
630 * AAQQ = AAQQ*TEMP1
631 * AAPP = AAPP*TEMP1
632  ELSE IF( ( aaqq.GE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
633  temp1 = amax1( sn / aaqq, temp1 / aapp )
634 * AAQQ = AAQQ*TEMP1
635 * AAPP = AAPP*TEMP1
636  ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
637  temp1 = amin1( sn / aaqq, big / ( sqrt( float( n ) )*aapp ) )
638 * AAQQ = AAQQ*TEMP1
639 * AAPP = AAPP*TEMP1
640  ELSE
641  temp1 = one
642  END IF
643 *
644 * Scale, if necessary
645 *
646  IF( temp1.NE.one ) THEN
647  CALL slascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr )
648  END IF
649  skl = temp1*skl
650  IF( skl.NE.one ) THEN
651  CALL slascl( joba, 0, 0, one, skl, m, n, a, lda, ierr )
652  skl = one / skl
653  END IF
654 *
655 * Row-cyclic Jacobi SVD algorithm with column pivoting
656 *
657  emptsw = ( n*( n-1 ) ) / 2
658  notrot = 0
659  fastr( 1 ) = zero
660 *
661 * A is represented in factored form A = A * diag(WORK), where diag(WORK)
662 * is initialized to identity. WORK is updated during fast scaled
663 * rotations.
664 *
665  DO 1868 q = 1, n
666  work( q ) = one
667  1868 CONTINUE
668 *
669 *
670  swband = 3
671 *[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective
672 * if SGESVJ is used as a computational routine in the preconditioned
673 * Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure
674 * works on pivots inside a band-like region around the diagonal.
675 * The boundaries are determined dynamically, based on the number of
676 * pivots above a threshold.
677 *
678  kbl = min0( 8, n )
679 *[TP] KBL is a tuning parameter that defines the tile size in the
680 * tiling of the p-q loops of pivot pairs. In general, an optimal
681 * value of KBL depends on the matrix dimensions and on the
682 * parameters of the computer's memory.
683 *
684  nbl = n / kbl
685  IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
686 *
687  blskip = kbl**2
688 *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
689 *
690  rowskip = min0( 5, kbl )
691 *[TP] ROWSKIP is a tuning parameter.
692 *
693  lkahead = 1
694 *[TP] LKAHEAD is a tuning parameter.
695 *
696 * Quasi block transformations, using the lower (upper) triangular
697 * structure of the input matrix. The quasi-block-cycling usually
698 * invokes cubic convergence. Big part of this cycle is done inside
699 * canonical subspaces of dimensions less than M.
700 *
701  IF( ( lower .OR. upper ) .AND. ( n.GT.max0( 64, 4*kbl ) ) ) THEN
702 *[TP] The number of partition levels and the actual partition are
703 * tuning parameters.
704  n4 = n / 4
705  n2 = n / 2
706  n34 = 3*n4
707  IF( applv ) THEN
708  q = 0
709  ELSE
710  q = 1
711  END IF
712 *
713  IF( lower ) THEN
714 *
715 * This works very well on lower triangular matrices, in particular
716 * in the framework of the preconditioned Jacobi SVD (xGEJSV).
717 * The idea is simple:
718 * [+ 0 0 0] Note that Jacobi transformations of [0 0]
719 * [+ + 0 0] [0 0]
720 * [+ + x 0] actually work on [x 0] [x 0]
721 * [+ + x x] [x x]. [x x]
722 *
723  CALL sgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,
724  $ work( n34+1 ), sva( n34+1 ), mvl,
725  $ v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,
726  $ 2, work( n+1 ), lwork-n, ierr )
727 *
728  CALL sgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,
729  $ work( n2+1 ), sva( n2+1 ), mvl,
730  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,
731  $ work( n+1 ), lwork-n, ierr )
732 *
733  CALL sgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,
734  $ work( n2+1 ), sva( n2+1 ), mvl,
735  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
736  $ work( n+1 ), lwork-n, ierr )
737 *
738  CALL sgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,
739  $ work( n4+1 ), sva( n4+1 ), mvl,
740  $ v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,
741  $ work( n+1 ), lwork-n, ierr )
742 *
743  CALL sgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,
744  $ epsln, sfmin, tol, 1, work( n+1 ), lwork-n,
745  $ ierr )
746 *
747  CALL sgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,
748  $ ldv, epsln, sfmin, tol, 1, work( n+1 ),
749  $ lwork-n, ierr )
750 *
751 *
752  ELSE IF( upper ) THEN
753 *
754 *
755  CALL sgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,
756  $ epsln, sfmin, tol, 2, work( n+1 ), lwork-n,
757  $ ierr )
758 *
759  CALL sgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, work( n4+1 ),
760  $ sva( n4+1 ), mvl, v( n4*q+1, n4+1 ), ldv,
761  $ epsln, sfmin, tol, 1, work( n+1 ), lwork-n,
762  $ ierr )
763 *
764  CALL sgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,
765  $ ldv, epsln, sfmin, tol, 1, work( n+1 ),
766  $ lwork-n, ierr )
767 *
768  CALL sgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,
769  $ work( n2+1 ), sva( n2+1 ), mvl,
770  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
771  $ work( n+1 ), lwork-n, ierr )
772 
773  END IF
774 *
775  END IF
776 *
777 * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
778 *
779  DO 1993 i = 1, nsweep
780 *
781 * .. go go go ...
782 *
783  mxaapq = zero
784  mxsinj = zero
785  iswrot = 0
786 *
787  notrot = 0
788  pskipped = 0
789 *
790 * Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs
791 * 1 <= p < q <= N. This is the first step toward a blocked implementation
792 * of the rotations. New implementation, based on block transformations,
793 * is under development.
794 *
795  DO 2000 ibr = 1, nbl
796 *
797  igl = ( ibr-1 )*kbl + 1
798 *
799  DO 1002 ir1 = 0, min0( lkahead, nbl-ibr )
800 *
801  igl = igl + ir1*kbl
802 *
803  DO 2001 p = igl, min0( igl+kbl-1, n-1 )
804 *
805 * .. de Rijk's pivoting
806 *
807  q = isamax( n-p+1, sva( p ), 1 ) + p - 1
808  IF( p.NE.q ) THEN
809  CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
810  IF( rsvec )CALL sswap( mvl, v( 1, p ), 1,
811  $ v( 1, q ), 1 )
812  temp1 = sva( p )
813  sva( p ) = sva( q )
814  sva( q ) = temp1
815  temp1 = work( p )
816  work( p ) = work( q )
817  work( q ) = temp1
818  END IF
819 *
820  IF( ir1.EQ.0 ) THEN
821 *
822 * Column norms are periodically updated by explicit
823 * norm computation.
824 * Caveat:
825 * Unfortunately, some BLAS implementations compute SNRM2(M,A(1,p),1)
826 * as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to
827 * overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to
828 * underflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
829 * Hence, SNRM2 cannot be trusted, not even in the case when
830 * the true norm is far from the under(over)flow boundaries.
831 * If properly implemented SNRM2 is available, the IF-THEN-ELSE
832 * below should read "AAPP = SNRM2( M, A(1,p), 1 ) * WORK(p)".
833 *
834  IF( ( sva( p ).LT.rootbig ) .AND.
835  $ ( sva( p ).GT.rootsfmin ) ) THEN
836  sva( p ) = snrm2( m, a( 1, p ), 1 )*work( p )
837  ELSE
838  temp1 = zero
839  aapp = one
840  CALL slassq( m, a( 1, p ), 1, temp1, aapp )
841  sva( p ) = temp1*sqrt( aapp )*work( p )
842  END IF
843  aapp = sva( p )
844  ELSE
845  aapp = sva( p )
846  END IF
847 *
848  IF( aapp.GT.zero ) THEN
849 *
850  pskipped = 0
851 *
852  DO 2002 q = p + 1, min0( igl+kbl-1, n )
853 *
854  aaqq = sva( q )
855 *
856  IF( aaqq.GT.zero ) THEN
857 *
858  aapp0 = aapp
859  IF( aaqq.GE.one ) THEN
860  rotok = ( small*aapp ).LE.aaqq
861  IF( aapp.LT.( big / aaqq ) ) THEN
862  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
863  $ q ), 1 )*work( p )*work( q ) /
864  $ aaqq ) / aapp
865  ELSE
866  CALL scopy( m, a( 1, p ), 1,
867  $ work( n+1 ), 1 )
868  CALL slascl( 'G', 0, 0, aapp,
869  $ work( p ), m, 1,
870  $ work( n+1 ), lda, ierr )
871  aapq = sdot( m, work( n+1 ), 1,
872  $ a( 1, q ), 1 )*work( q ) / aaqq
873  END IF
874  ELSE
875  rotok = aapp.LE.( aaqq / small )
876  IF( aapp.GT.( small / aaqq ) ) THEN
877  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
878  $ q ), 1 )*work( p )*work( q ) /
879  $ aaqq ) / aapp
880  ELSE
881  CALL scopy( m, a( 1, q ), 1,
882  $ work( n+1 ), 1 )
883  CALL slascl( 'G', 0, 0, aaqq,
884  $ work( q ), m, 1,
885  $ work( n+1 ), lda, ierr )
886  aapq = sdot( m, work( n+1 ), 1,
887  $ a( 1, p ), 1 )*work( p ) / aapp
888  END IF
889  END IF
890 *
891  mxaapq = amax1( mxaapq, abs( aapq ) )
892 *
893 * TO rotate or NOT to rotate, THAT is the question ...
894 *
895  IF( abs( aapq ).GT.tol ) THEN
896 *
897 * .. rotate
898 *[RTD] ROTATED = ROTATED + ONE
899 *
900  IF( ir1.EQ.0 ) THEN
901  notrot = 0
902  pskipped = 0
903  iswrot = iswrot + 1
904  END IF
905 *
906  IF( rotok ) THEN
907 *
908  aqoap = aaqq / aapp
909  apoaq = aapp / aaqq
910  theta = -half*abs( aqoap-apoaq ) / aapq
911 *
912  IF( abs( theta ).GT.bigtheta ) THEN
913 *
914  t = half / theta
915  fastr( 3 ) = t*work( p ) / work( q )
916  fastr( 4 ) = -t*work( q ) /
917  $ work( p )
918  CALL srotm( m, a( 1, p ), 1,
919  $ a( 1, q ), 1, fastr )
920  IF( rsvec )CALL srotm( mvl,
921  $ v( 1, p ), 1,
922  $ v( 1, q ), 1,
923  $ fastr )
924  sva( q ) = aaqq*sqrt( amax1( zero,
925  $ one+t*apoaq*aapq ) )
926  aapp = aapp*sqrt( amax1( zero,
927  $ one-t*aqoap*aapq ) )
928  mxsinj = amax1( mxsinj, abs( t ) )
929 *
930  ELSE
931 *
932 * .. choose correct signum for THETA and rotate
933 *
934  thsign = -sign( one, aapq )
935  t = one / ( theta+thsign*
936  $ sqrt( one+theta*theta ) )
937  cs = sqrt( one / ( one+t*t ) )
938  sn = t*cs
939 *
940  mxsinj = amax1( mxsinj, abs( sn ) )
941  sva( q ) = aaqq*sqrt( amax1( zero,
942  $ one+t*apoaq*aapq ) )
943  aapp = aapp*sqrt( amax1( zero,
944  $ one-t*aqoap*aapq ) )
945 *
946  apoaq = work( p ) / work( q )
947  aqoap = work( q ) / work( p )
948  IF( work( p ).GE.one ) THEN
949  IF( work( q ).GE.one ) THEN
950  fastr( 3 ) = t*apoaq
951  fastr( 4 ) = -t*aqoap
952  work( p ) = work( p )*cs
953  work( q ) = work( q )*cs
954  CALL srotm( m, a( 1, p ), 1,
955  $ a( 1, q ), 1,
956  $ fastr )
957  IF( rsvec )CALL srotm( mvl,
958  $ v( 1, p ), 1, v( 1, q ),
959  $ 1, fastr )
960  ELSE
961  CALL saxpy( m, -t*aqoap,
962  $ a( 1, q ), 1,
963  $ a( 1, p ), 1 )
964  CALL saxpy( m, cs*sn*apoaq,
965  $ a( 1, p ), 1,
966  $ a( 1, q ), 1 )
967  work( p ) = work( p )*cs
968  work( q ) = work( q ) / cs
969  IF( rsvec ) THEN
970  CALL saxpy( mvl, -t*aqoap,
971  $ v( 1, q ), 1,
972  $ v( 1, p ), 1 )
973  CALL saxpy( mvl,
974  $ cs*sn*apoaq,
975  $ v( 1, p ), 1,
976  $ v( 1, q ), 1 )
977  END IF
978  END IF
979  ELSE
980  IF( work( q ).GE.one ) THEN
981  CALL saxpy( m, t*apoaq,
982  $ a( 1, p ), 1,
983  $ a( 1, q ), 1 )
984  CALL saxpy( m, -cs*sn*aqoap,
985  $ a( 1, q ), 1,
986  $ a( 1, p ), 1 )
987  work( p ) = work( p ) / cs
988  work( q ) = work( q )*cs
989  IF( rsvec ) THEN
990  CALL saxpy( mvl, t*apoaq,
991  $ v( 1, p ), 1,
992  $ v( 1, q ), 1 )
993  CALL saxpy( mvl,
994  $ -cs*sn*aqoap,
995  $ v( 1, q ), 1,
996  $ v( 1, p ), 1 )
997  END IF
998  ELSE
999  IF( work( p ).GE.work( q ) )
1000  $ THEN
1001  CALL saxpy( m, -t*aqoap,
1002  $ a( 1, q ), 1,
1003  $ a( 1, p ), 1 )
1004  CALL saxpy( m, cs*sn*apoaq,
1005  $ a( 1, p ), 1,
1006  $ a( 1, q ), 1 )
1007  work( p ) = work( p )*cs
1008  work( q ) = work( q ) / cs
1009  IF( rsvec ) THEN
1010  CALL saxpy( mvl,
1011  $ -t*aqoap,
1012  $ v( 1, q ), 1,
1013  $ v( 1, p ), 1 )
1014  CALL saxpy( mvl,
1015  $ cs*sn*apoaq,
1016  $ v( 1, p ), 1,
1017  $ v( 1, q ), 1 )
1018  END IF
1019  ELSE
1020  CALL saxpy( m, t*apoaq,
1021  $ a( 1, p ), 1,
1022  $ a( 1, q ), 1 )
1023  CALL saxpy( m,
1024  $ -cs*sn*aqoap,
1025  $ a( 1, q ), 1,
1026  $ a( 1, p ), 1 )
1027  work( p ) = work( p ) / cs
1028  work( q ) = work( q )*cs
1029  IF( rsvec ) THEN
1030  CALL saxpy( mvl,
1031  $ t*apoaq, v( 1, p ),
1032  $ 1, v( 1, q ), 1 )
1033  CALL saxpy( mvl,
1034  $ -cs*sn*aqoap,
1035  $ v( 1, q ), 1,
1036  $ v( 1, p ), 1 )
1037  END IF
1038  END IF
1039  END IF
1040  END IF
1041  END IF
1042 *
1043  ELSE
1044 * .. have to use modified Gram-Schmidt like transformation
1045  CALL scopy( m, a( 1, p ), 1,
1046  $ work( n+1 ), 1 )
1047  CALL slascl( 'G', 0, 0, aapp, one, m,
1048  $ 1, work( n+1 ), lda,
1049  $ ierr )
1050  CALL slascl( 'G', 0, 0, aaqq, one, m,
1051  $ 1, a( 1, q ), lda, ierr )
1052  temp1 = -aapq*work( p ) / work( q )
1053  CALL saxpy( m, temp1, work( n+1 ), 1,
1054  $ a( 1, q ), 1 )
1055  CALL slascl( 'G', 0, 0, one, aaqq, m,
1056  $ 1, a( 1, q ), lda, ierr )
1057  sva( q ) = aaqq*sqrt( amax1( zero,
1058  $ one-aapq*aapq ) )
1059  mxsinj = amax1( mxsinj, sfmin )
1060  END IF
1061 * END IF ROTOK THEN ... ELSE
1062 *
1063 * In the case of cancellation in updating SVA(q), SVA(p)
1064 * recompute SVA(q), SVA(p).
1065 *
1066  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1067  $ THEN
1068  IF( ( aaqq.LT.rootbig ) .AND.
1069  $ ( aaqq.GT.rootsfmin ) ) THEN
1070  sva( q ) = snrm2( m, a( 1, q ), 1 )*
1071  $ work( q )
1072  ELSE
1073  t = zero
1074  aaqq = one
1075  CALL slassq( m, a( 1, q ), 1, t,
1076  $ aaqq )
1077  sva( q ) = t*sqrt( aaqq )*work( q )
1078  END IF
1079  END IF
1080  IF( ( aapp / aapp0 ).LE.rooteps ) THEN
1081  IF( ( aapp.LT.rootbig ) .AND.
1082  $ ( aapp.GT.rootsfmin ) ) THEN
1083  aapp = snrm2( m, a( 1, p ), 1 )*
1084  $ work( p )
1085  ELSE
1086  t = zero
1087  aapp = one
1088  CALL slassq( m, a( 1, p ), 1, t,
1089  $ aapp )
1090  aapp = t*sqrt( aapp )*work( p )
1091  END IF
1092  sva( p ) = aapp
1093  END IF
1094 *
1095  ELSE
1096 * A(:,p) and A(:,q) already numerically orthogonal
1097  IF( ir1.EQ.0 )notrot = notrot + 1
1098 *[RTD] SKIPPED = SKIPPED + 1
1099  pskipped = pskipped + 1
1100  END IF
1101  ELSE
1102 * A(:,q) is zero column
1103  IF( ir1.EQ.0 )notrot = notrot + 1
1104  pskipped = pskipped + 1
1105  END IF
1106 *
1107  IF( ( i.LE.swband ) .AND.
1108  $ ( pskipped.GT.rowskip ) ) THEN
1109  IF( ir1.EQ.0 )aapp = -aapp
1110  notrot = 0
1111  go to 2103
1112  END IF
1113 *
1114  2002 CONTINUE
1115 * END q-LOOP
1116 *
1117  2103 CONTINUE
1118 * bailed out of q-loop
1119 *
1120  sva( p ) = aapp
1121 *
1122  ELSE
1123  sva( p ) = aapp
1124  IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
1125  $ notrot = notrot + min0( igl+kbl-1, n ) - p
1126  END IF
1127 *
1128  2001 CONTINUE
1129 * end of the p-loop
1130 * end of doing the block ( ibr, ibr )
1131  1002 CONTINUE
1132 * end of ir1-loop
1133 *
1134 * ... go to the off diagonal blocks
1135 *
1136  igl = ( ibr-1 )*kbl + 1
1137 *
1138  DO 2010 jbc = ibr + 1, nbl
1139 *
1140  jgl = ( jbc-1 )*kbl + 1
1141 *
1142 * doing the block at ( ibr, jbc )
1143 *
1144  ijblsk = 0
1145  DO 2100 p = igl, min0( igl+kbl-1, n )
1146 *
1147  aapp = sva( p )
1148  IF( aapp.GT.zero ) THEN
1149 *
1150  pskipped = 0
1151 *
1152  DO 2200 q = jgl, min0( jgl+kbl-1, n )
1153 *
1154  aaqq = sva( q )
1155  IF( aaqq.GT.zero ) THEN
1156  aapp0 = aapp
1157 *
1158 * .. M x 2 Jacobi SVD ..
1159 *
1160 * Safe Gram matrix computation
1161 *
1162  IF( aaqq.GE.one ) THEN
1163  IF( aapp.GE.aaqq ) THEN
1164  rotok = ( small*aapp ).LE.aaqq
1165  ELSE
1166  rotok = ( small*aaqq ).LE.aapp
1167  END IF
1168  IF( aapp.LT.( big / aaqq ) ) THEN
1169  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
1170  $ q ), 1 )*work( p )*work( q ) /
1171  $ aaqq ) / aapp
1172  ELSE
1173  CALL scopy( m, a( 1, p ), 1,
1174  $ work( n+1 ), 1 )
1175  CALL slascl( 'G', 0, 0, aapp,
1176  $ work( p ), m, 1,
1177  $ work( n+1 ), lda, ierr )
1178  aapq = sdot( m, work( n+1 ), 1,
1179  $ a( 1, q ), 1 )*work( q ) / aaqq
1180  END IF
1181  ELSE
1182  IF( aapp.GE.aaqq ) THEN
1183  rotok = aapp.LE.( aaqq / small )
1184  ELSE
1185  rotok = aaqq.LE.( aapp / small )
1186  END IF
1187  IF( aapp.GT.( small / aaqq ) ) THEN
1188  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
1189  $ q ), 1 )*work( p )*work( q ) /
1190  $ aaqq ) / aapp
1191  ELSE
1192  CALL scopy( m, a( 1, q ), 1,
1193  $ work( n+1 ), 1 )
1194  CALL slascl( 'G', 0, 0, aaqq,
1195  $ work( q ), m, 1,
1196  $ work( n+1 ), lda, ierr )
1197  aapq = sdot( m, work( n+1 ), 1,
1198  $ a( 1, p ), 1 )*work( p ) / aapp
1199  END IF
1200  END IF
1201 *
1202  mxaapq = amax1( mxaapq, abs( aapq ) )
1203 *
1204 * TO rotate or NOT to rotate, THAT is the question ...
1205 *
1206  IF( abs( aapq ).GT.tol ) THEN
1207  notrot = 0
1208 *[RTD] ROTATED = ROTATED + 1
1209  pskipped = 0
1210  iswrot = iswrot + 1
1211 *
1212  IF( rotok ) THEN
1213 *
1214  aqoap = aaqq / aapp
1215  apoaq = aapp / aaqq
1216  theta = -half*abs( aqoap-apoaq ) / aapq
1217  IF( aaqq.GT.aapp0 )theta = -theta
1218 *
1219  IF( abs( theta ).GT.bigtheta ) THEN
1220  t = half / theta
1221  fastr( 3 ) = t*work( p ) / work( q )
1222  fastr( 4 ) = -t*work( q ) /
1223  $ work( p )
1224  CALL srotm( m, a( 1, p ), 1,
1225  $ a( 1, q ), 1, fastr )
1226  IF( rsvec )CALL srotm( mvl,
1227  $ v( 1, p ), 1,
1228  $ v( 1, q ), 1,
1229  $ fastr )
1230  sva( q ) = aaqq*sqrt( amax1( zero,
1231  $ one+t*apoaq*aapq ) )
1232  aapp = aapp*sqrt( amax1( zero,
1233  $ one-t*aqoap*aapq ) )
1234  mxsinj = amax1( mxsinj, abs( t ) )
1235  ELSE
1236 *
1237 * .. choose correct signum for THETA and rotate
1238 *
1239  thsign = -sign( one, aapq )
1240  IF( aaqq.GT.aapp0 )thsign = -thsign
1241  t = one / ( theta+thsign*
1242  $ sqrt( one+theta*theta ) )
1243  cs = sqrt( one / ( one+t*t ) )
1244  sn = t*cs
1245  mxsinj = amax1( mxsinj, abs( sn ) )
1246  sva( q ) = aaqq*sqrt( amax1( zero,
1247  $ one+t*apoaq*aapq ) )
1248  aapp = aapp*sqrt( amax1( zero,
1249  $ one-t*aqoap*aapq ) )
1250 *
1251  apoaq = work( p ) / work( q )
1252  aqoap = work( q ) / work( p )
1253  IF( work( p ).GE.one ) THEN
1254 *
1255  IF( work( q ).GE.one ) THEN
1256  fastr( 3 ) = t*apoaq
1257  fastr( 4 ) = -t*aqoap
1258  work( p ) = work( p )*cs
1259  work( q ) = work( q )*cs
1260  CALL srotm( m, a( 1, p ), 1,
1261  $ a( 1, q ), 1,
1262  $ fastr )
1263  IF( rsvec )CALL srotm( mvl,
1264  $ v( 1, p ), 1, v( 1, q ),
1265  $ 1, fastr )
1266  ELSE
1267  CALL saxpy( m, -t*aqoap,
1268  $ a( 1, q ), 1,
1269  $ a( 1, p ), 1 )
1270  CALL saxpy( m, cs*sn*apoaq,
1271  $ a( 1, p ), 1,
1272  $ a( 1, q ), 1 )
1273  IF( rsvec ) THEN
1274  CALL saxpy( mvl, -t*aqoap,
1275  $ v( 1, q ), 1,
1276  $ v( 1, p ), 1 )
1277  CALL saxpy( mvl,
1278  $ cs*sn*apoaq,
1279  $ v( 1, p ), 1,
1280  $ v( 1, q ), 1 )
1281  END IF
1282  work( p ) = work( p )*cs
1283  work( q ) = work( q ) / cs
1284  END IF
1285  ELSE
1286  IF( work( q ).GE.one ) THEN
1287  CALL saxpy( m, t*apoaq,
1288  $ a( 1, p ), 1,
1289  $ a( 1, q ), 1 )
1290  CALL saxpy( m, -cs*sn*aqoap,
1291  $ a( 1, q ), 1,
1292  $ a( 1, p ), 1 )
1293  IF( rsvec ) THEN
1294  CALL saxpy( mvl, t*apoaq,
1295  $ v( 1, p ), 1,
1296  $ v( 1, q ), 1 )
1297  CALL saxpy( mvl,
1298  $ -cs*sn*aqoap,
1299  $ v( 1, q ), 1,
1300  $ v( 1, p ), 1 )
1301  END IF
1302  work( p ) = work( p ) / cs
1303  work( q ) = work( q )*cs
1304  ELSE
1305  IF( work( p ).GE.work( q ) )
1306  $ THEN
1307  CALL saxpy( m, -t*aqoap,
1308  $ a( 1, q ), 1,
1309  $ a( 1, p ), 1 )
1310  CALL saxpy( m, cs*sn*apoaq,
1311  $ a( 1, p ), 1,
1312  $ a( 1, q ), 1 )
1313  work( p ) = work( p )*cs
1314  work( q ) = work( q ) / cs
1315  IF( rsvec ) THEN
1316  CALL saxpy( mvl,
1317  $ -t*aqoap,
1318  $ v( 1, q ), 1,
1319  $ v( 1, p ), 1 )
1320  CALL saxpy( mvl,
1321  $ cs*sn*apoaq,
1322  $ v( 1, p ), 1,
1323  $ v( 1, q ), 1 )
1324  END IF
1325  ELSE
1326  CALL saxpy( m, t*apoaq,
1327  $ a( 1, p ), 1,
1328  $ a( 1, q ), 1 )
1329  CALL saxpy( m,
1330  $ -cs*sn*aqoap,
1331  $ a( 1, q ), 1,
1332  $ a( 1, p ), 1 )
1333  work( p ) = work( p ) / cs
1334  work( q ) = work( q )*cs
1335  IF( rsvec ) THEN
1336  CALL saxpy( mvl,
1337  $ t*apoaq, v( 1, p ),
1338  $ 1, v( 1, q ), 1 )
1339  CALL saxpy( mvl,
1340  $ -cs*sn*aqoap,
1341  $ v( 1, q ), 1,
1342  $ v( 1, p ), 1 )
1343  END IF
1344  END IF
1345  END IF
1346  END IF
1347  END IF
1348 *
1349  ELSE
1350  IF( aapp.GT.aaqq ) THEN
1351  CALL scopy( m, a( 1, p ), 1,
1352  $ work( n+1 ), 1 )
1353  CALL slascl( 'G', 0, 0, aapp, one,
1354  $ m, 1, work( n+1 ), lda,
1355  $ ierr )
1356  CALL slascl( 'G', 0, 0, aaqq, one,
1357  $ m, 1, a( 1, q ), lda,
1358  $ ierr )
1359  temp1 = -aapq*work( p ) / work( q )
1360  CALL saxpy( m, temp1, work( n+1 ),
1361  $ 1, a( 1, q ), 1 )
1362  CALL slascl( 'G', 0, 0, one, aaqq,
1363  $ m, 1, a( 1, q ), lda,
1364  $ ierr )
1365  sva( q ) = aaqq*sqrt( amax1( zero,
1366  $ one-aapq*aapq ) )
1367  mxsinj = amax1( mxsinj, sfmin )
1368  ELSE
1369  CALL scopy( m, a( 1, q ), 1,
1370  $ work( n+1 ), 1 )
1371  CALL slascl( 'G', 0, 0, aaqq, one,
1372  $ m, 1, work( n+1 ), lda,
1373  $ ierr )
1374  CALL slascl( 'G', 0, 0, aapp, one,
1375  $ m, 1, a( 1, p ), lda,
1376  $ ierr )
1377  temp1 = -aapq*work( q ) / work( p )
1378  CALL saxpy( m, temp1, work( n+1 ),
1379  $ 1, a( 1, p ), 1 )
1380  CALL slascl( 'G', 0, 0, one, aapp,
1381  $ m, 1, a( 1, p ), lda,
1382  $ ierr )
1383  sva( p ) = aapp*sqrt( amax1( zero,
1384  $ one-aapq*aapq ) )
1385  mxsinj = amax1( mxsinj, sfmin )
1386  END IF
1387  END IF
1388 * END IF ROTOK THEN ... ELSE
1389 *
1390 * In the case of cancellation in updating SVA(q)
1391 * .. recompute SVA(q)
1392  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1393  $ THEN
1394  IF( ( aaqq.LT.rootbig ) .AND.
1395  $ ( aaqq.GT.rootsfmin ) ) THEN
1396  sva( q ) = snrm2( m, a( 1, q ), 1 )*
1397  $ work( q )
1398  ELSE
1399  t = zero
1400  aaqq = one
1401  CALL slassq( m, a( 1, q ), 1, t,
1402  $ aaqq )
1403  sva( q ) = t*sqrt( aaqq )*work( q )
1404  END IF
1405  END IF
1406  IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
1407  IF( ( aapp.LT.rootbig ) .AND.
1408  $ ( aapp.GT.rootsfmin ) ) THEN
1409  aapp = snrm2( m, a( 1, p ), 1 )*
1410  $ work( p )
1411  ELSE
1412  t = zero
1413  aapp = one
1414  CALL slassq( m, a( 1, p ), 1, t,
1415  $ aapp )
1416  aapp = t*sqrt( aapp )*work( p )
1417  END IF
1418  sva( p ) = aapp
1419  END IF
1420 * end of OK rotation
1421  ELSE
1422  notrot = notrot + 1
1423 *[RTD] SKIPPED = SKIPPED + 1
1424  pskipped = pskipped + 1
1425  ijblsk = ijblsk + 1
1426  END IF
1427  ELSE
1428  notrot = notrot + 1
1429  pskipped = pskipped + 1
1430  ijblsk = ijblsk + 1
1431  END IF
1432 *
1433  IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
1434  $ THEN
1435  sva( p ) = aapp
1436  notrot = 0
1437  go to 2011
1438  END IF
1439  IF( ( i.LE.swband ) .AND.
1440  $ ( pskipped.GT.rowskip ) ) THEN
1441  aapp = -aapp
1442  notrot = 0
1443  go to 2203
1444  END IF
1445 *
1446  2200 CONTINUE
1447 * end of the q-loop
1448  2203 CONTINUE
1449 *
1450  sva( p ) = aapp
1451 *
1452  ELSE
1453 *
1454  IF( aapp.EQ.zero )notrot = notrot +
1455  $ min0( jgl+kbl-1, n ) - jgl + 1
1456  IF( aapp.LT.zero )notrot = 0
1457 *
1458  END IF
1459 *
1460  2100 CONTINUE
1461 * end of the p-loop
1462  2010 CONTINUE
1463 * end of the jbc-loop
1464  2011 CONTINUE
1465 *2011 bailed out of the jbc-loop
1466  DO 2012 p = igl, min0( igl+kbl-1, n )
1467  sva( p ) = abs( sva( p ) )
1468  2012 CONTINUE
1469 ***
1470  2000 CONTINUE
1471 *2000 :: end of the ibr-loop
1472 *
1473 * .. update SVA(N)
1474  IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1475  $ THEN
1476  sva( n ) = snrm2( m, a( 1, n ), 1 )*work( n )
1477  ELSE
1478  t = zero
1479  aapp = one
1480  CALL slassq( m, a( 1, n ), 1, t, aapp )
1481  sva( n ) = t*sqrt( aapp )*work( n )
1482  END IF
1483 *
1484 * Additional steering devices
1485 *
1486  IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1487  $ ( iswrot.LE.n ) ) )swband = i
1488 *
1489  IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.sqrt( float( n ) )*
1490  $ tol ) .AND. ( float( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1491  go to 1994
1492  END IF
1493 *
1494  IF( notrot.GE.emptsw )go to 1994
1495 *
1496  1993 CONTINUE
1497 * end i=1:NSWEEP loop
1498 *
1499 * #:( Reaching this point means that the procedure has not converged.
1500  info = nsweep - 1
1501  go to 1995
1502 *
1503  1994 CONTINUE
1504 * #:) Reaching this point means numerical convergence after the i-th
1505 * sweep.
1506 *
1507  info = 0
1508 * #:) INFO = 0 confirms successful iterations.
1509  1995 CONTINUE
1510 *
1511 * Sort the singular values and find how many are above
1512 * the underflow threshold.
1513 *
1514  n2 = 0
1515  n4 = 0
1516  DO 5991 p = 1, n - 1
1517  q = isamax( n-p+1, sva( p ), 1 ) + p - 1
1518  IF( p.NE.q ) THEN
1519  temp1 = sva( p )
1520  sva( p ) = sva( q )
1521  sva( q ) = temp1
1522  temp1 = work( p )
1523  work( p ) = work( q )
1524  work( q ) = temp1
1525  CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1526  IF( rsvec )CALL sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1527  END IF
1528  IF( sva( p ).NE.zero ) THEN
1529  n4 = n4 + 1
1530  IF( sva( p )*skl.GT.sfmin )n2 = n2 + 1
1531  END IF
1532  5991 CONTINUE
1533  IF( sva( n ).NE.zero ) THEN
1534  n4 = n4 + 1
1535  IF( sva( n )*skl.GT.sfmin )n2 = n2 + 1
1536  END IF
1537 *
1538 * Normalize the left singular vectors.
1539 *
1540  IF( lsvec .OR. uctol ) THEN
1541  DO 1998 p = 1, n2
1542  CALL sscal( m, work( p ) / sva( p ), a( 1, p ), 1 )
1543  1998 CONTINUE
1544  END IF
1545 *
1546 * Scale the product of Jacobi rotations (assemble the fast rotations).
1547 *
1548  IF( rsvec ) THEN
1549  IF( applv ) THEN
1550  DO 2398 p = 1, n
1551  CALL sscal( mvl, work( p ), v( 1, p ), 1 )
1552  2398 CONTINUE
1553  ELSE
1554  DO 2399 p = 1, n
1555  temp1 = one / snrm2( mvl, v( 1, p ), 1 )
1556  CALL sscal( mvl, temp1, v( 1, p ), 1 )
1557  2399 CONTINUE
1558  END IF
1559  END IF
1560 *
1561 * Undo scaling, if necessary (and possible).
1562  IF( ( ( skl.GT.one ) .AND. ( sva( 1 ).LT.( big / skl ) ) )
1563  $ .OR. ( ( skl.LT.one ) .AND. ( sva( max( n2, 1 ) ) .GT.
1564  $ ( sfmin / skl ) ) ) ) THEN
1565  DO 2400 p = 1, n
1566  sva( p ) = skl*sva( p )
1567  2400 CONTINUE
1568  skl = one
1569  END IF
1570 *
1571  work( 1 ) = skl
1572 * The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE
1573 * then some of the singular values may overflow or underflow and
1574 * the spectrum is given in this factored representation.
1575 *
1576  work( 2 ) = float( n4 )
1577 * N4 is the number of computed nonzero singular values of A.
1578 *
1579  work( 3 ) = float( n2 )
1580 * N2 is the number of singular values of A greater than SFMIN.
1581 * If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers
1582 * that may carry some information.
1583 *
1584  work( 4 ) = float( i )
1585 * i is the index of the last sweep before declaring convergence.
1586 *
1587  work( 5 ) = mxaapq
1588 * MXAAPQ is the largest absolute value of scaled pivots in the
1589 * last sweep
1590 *
1591  work( 6 ) = mxsinj
1592 * MXSINJ is the largest absolute value of the sines of Jacobi angles
1593 * in the last sweep
1594 *
1595  RETURN
1596 * ..
1597 * .. END OF SGESVJ
1598 * ..
1599  END