LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
sgsvj0.f
Go to the documentation of this file.
1 *> \brief \b SGSVJ0 pre-processor for the routine sgesvj.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGSVJ0 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgsvj0.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgsvj0.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgsvj0.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
22 * SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
26 * REAL EPS, SFMIN, TOL
27 * CHARACTER*1 JOBV
28 * ..
29 * .. Array Arguments ..
30 * REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ),
31 * $ WORK( LWORK )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> SGSVJ0 is called from SGESVJ as a pre-processor and that is its main
41 *> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but
42 *> it does not check convergence (stopping criterion). Few tuning
43 *> parameters (marked by [TP]) are available for the implementer.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] JOBV
50 *> \verbatim
51 *> JOBV is CHARACTER*1
52 *> Specifies whether the output from this procedure is used
53 *> to compute the matrix V:
54 *> = 'V': the product of the Jacobi rotations is accumulated
55 *> by postmulyiplying the N-by-N array V.
56 *> (See the description of V.)
57 *> = 'A': the product of the Jacobi rotations is accumulated
58 *> by postmulyiplying the MV-by-N array V.
59 *> (See the descriptions of MV and V.)
60 *> = 'N': the Jacobi rotations are not accumulated.
61 *> \endverbatim
62 *>
63 *> \param[in] M
64 *> \verbatim
65 *> M is INTEGER
66 *> The number of rows of the input matrix A. M >= 0.
67 *> \endverbatim
68 *>
69 *> \param[in] N
70 *> \verbatim
71 *> N is INTEGER
72 *> The number of columns of the input matrix A.
73 *> M >= N >= 0.
74 *> \endverbatim
75 *>
76 *> \param[in,out] A
77 *> \verbatim
78 *> A is REAL array, dimension (LDA,N)
79 *> On entry, M-by-N matrix A, such that A*diag(D) represents
80 *> the input matrix.
81 *> On exit,
82 *> A_onexit * D_onexit represents the input matrix A*diag(D)
83 *> post-multiplied by a sequence of Jacobi rotations, where the
84 *> rotation threshold and the total number of sweeps are given in
85 *> TOL and NSWEEP, respectively.
86 *> (See the descriptions of D, TOL and NSWEEP.)
87 *> \endverbatim
88 *>
89 *> \param[in] LDA
90 *> \verbatim
91 *> LDA is INTEGER
92 *> The leading dimension of the array A. LDA >= max(1,M).
93 *> \endverbatim
94 *>
95 *> \param[in,out] D
96 *> \verbatim
97 *> D is REAL array, dimension (N)
98 *> The array D accumulates the scaling factors from the fast scaled
99 *> Jacobi rotations.
100 *> On entry, A*diag(D) represents the input matrix.
101 *> On exit, A_onexit*diag(D_onexit) represents the input matrix
102 *> post-multiplied by a sequence of Jacobi rotations, where the
103 *> rotation threshold and the total number of sweeps are given in
104 *> TOL and NSWEEP, respectively.
105 *> (See the descriptions of A, TOL and NSWEEP.)
106 *> \endverbatim
107 *>
108 *> \param[in,out] SVA
109 *> \verbatim
110 *> SVA is REAL array, dimension (N)
111 *> On entry, SVA contains the Euclidean norms of the columns of
112 *> the matrix A*diag(D).
113 *> On exit, SVA contains the Euclidean norms of the columns of
114 *> the matrix onexit*diag(D_onexit).
115 *> \endverbatim
116 *>
117 *> \param[in] MV
118 *> \verbatim
119 *> MV is INTEGER
120 *> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a
121 *> sequence of Jacobi rotations.
122 *> If JOBV = 'N', then MV is not referenced.
123 *> \endverbatim
124 *>
125 *> \param[in,out] V
126 *> \verbatim
127 *> V is REAL array, dimension (LDV,N)
128 *> If JOBV .EQ. 'V' then N rows of V are post-multipled by a
129 *> sequence of Jacobi rotations.
130 *> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a
131 *> sequence of Jacobi rotations.
132 *> If JOBV = 'N', then V is not referenced.
133 *> \endverbatim
134 *>
135 *> \param[in] LDV
136 *> \verbatim
137 *> LDV is INTEGER
138 *> The leading dimension of the array V, LDV >= 1.
139 *> If JOBV = 'V', LDV .GE. N.
140 *> If JOBV = 'A', LDV .GE. MV.
141 *> \endverbatim
142 *>
143 *> \param[in] EPS
144 *> \verbatim
145 *> EPS is REAL
146 *> EPS = SLAMCH('Epsilon')
147 *> \endverbatim
148 *>
149 *> \param[in] SFMIN
150 *> \verbatim
151 *> SFMIN is REAL
152 *> SFMIN = SLAMCH('Safe Minimum')
153 *> \endverbatim
154 *>
155 *> \param[in] TOL
156 *> \verbatim
157 *> TOL is REAL
158 *> TOL is the threshold for Jacobi rotations. For a pair
159 *> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
160 *> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL.
161 *> \endverbatim
162 *>
163 *> \param[in] NSWEEP
164 *> \verbatim
165 *> NSWEEP is INTEGER
166 *> NSWEEP is the number of sweeps of Jacobi rotations to be
167 *> performed.
168 *> \endverbatim
169 *>
170 *> \param[out] WORK
171 *> \verbatim
172 *> WORK is REAL array, dimension LWORK.
173 *> \endverbatim
174 *>
175 *> \param[in] LWORK
176 *> \verbatim
177 *> LWORK is INTEGER
178 *> LWORK is the dimension of WORK. LWORK .GE. M.
179 *> \endverbatim
180 *>
181 *> \param[out] INFO
182 *> \verbatim
183 *> INFO is INTEGER
184 *> = 0 : successful exit.
185 *> < 0 : if INFO = -i, then the i-th argument had an illegal value
186 *> \endverbatim
187 *
188 * Authors:
189 * ========
190 *
191 *> \author Univ. of Tennessee
192 *> \author Univ. of California Berkeley
193 *> \author Univ. of Colorado Denver
194 *> \author NAG Ltd.
195 *
196 *> \date September 2012
197 *
198 *> \ingroup realOTHERcomputational
199 *
200 *> \par Further Details:
201 * =====================
202 *>
203 *> SGSVJ0 is used just to enable SGESVJ to call a simplified version of
204 *> itself to work on a submatrix of the original matrix.
205 *>
206 *> \par Contributors:
207 * ==================
208 *>
209 *> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
210 *>
211 *> \par Bugs, Examples and Comments:
212 * =================================
213 *>
214 *> Please report all bugs and send interesting test examples and comments to
215 *> drmac@math.hr. Thank you.
216 *
217 * =====================================================================
218  SUBROUTINE sgsvj0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
219  $ sfmin, tol, nsweep, work, lwork, info )
220 *
221 * -- LAPACK computational routine (version 3.4.2) --
222 * -- LAPACK is a software package provided by Univ. of Tennessee, --
223 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
224 * September 2012
225 *
226 * .. Scalar Arguments ..
227  INTEGER info, lda, ldv, lwork, m, mv, n, nsweep
228  REAL eps, sfmin, tol
229  CHARACTER*1 jobv
230 * ..
231 * .. Array Arguments ..
232  REAL a( lda, * ), sva( n ), d( n ), v( ldv, * ),
233  $ work( lwork )
234 * ..
235 *
236 * =====================================================================
237 *
238 * .. Local Parameters ..
239  REAL zero, half, one
240  parameter( zero = 0.0e0, half = 0.5e0, one = 1.0e0)
241 * ..
242 * .. Local Scalars ..
243  REAL aapp, aapp0, aapq, aaqq, apoaq, aqoap, big,
244  $ bigtheta, cs, mxaapq, mxsinj, rootbig, rooteps,
245  $ rootsfmin, roottol, small, sn, t, temp1, theta,
246  $ thsign
247  INTEGER blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1,
248  $ iswrot, jbc, jgl, kbl, lkahead, mvl, nbl,
249  $ notrot, p, pskipped, q, rowskip, swband
250  LOGICAL applv, rotok, rsvec
251 * ..
252 * .. Local Arrays ..
253  REAL fastr( 5 )
254 * ..
255 * .. Intrinsic Functions ..
256  INTRINSIC abs, amax1, float, min0, sign, sqrt
257 * ..
258 * .. External Functions ..
259  REAL sdot, snrm2
260  INTEGER isamax
261  LOGICAL lsame
262  EXTERNAL isamax, lsame, sdot, snrm2
263 * ..
264 * .. External Subroutines ..
265  EXTERNAL saxpy, scopy, slascl, slassq, srotm, sswap
266 * ..
267 * .. Executable Statements ..
268 *
269 * Test the input parameters.
270 *
271  applv = lsame( jobv, 'A' )
272  rsvec = lsame( jobv, 'V' )
273  IF( .NOT.( rsvec .OR. applv .OR. lsame( jobv, 'N' ) ) ) THEN
274  info = -1
275  ELSE IF( m.LT.0 ) THEN
276  info = -2
277  ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
278  info = -3
279  ELSE IF( lda.LT.m ) THEN
280  info = -5
281  ELSE IF( ( rsvec.OR.applv ) .AND. ( mv.LT.0 ) ) THEN
282  info = -8
283  ELSE IF( ( rsvec.AND.( ldv.LT.n ) ).OR.
284  $ ( applv.AND.( ldv.LT.mv ) ) ) THEN
285  info = -10
286  ELSE IF( tol.LE.eps ) THEN
287  info = -13
288  ELSE IF( nsweep.LT.0 ) THEN
289  info = -14
290  ELSE IF( lwork.LT.m ) THEN
291  info = -16
292  ELSE
293  info = 0
294  END IF
295 *
296 * #:(
297  IF( info.NE.0 ) THEN
298  CALL xerbla( 'SGSVJ0', -info )
299  return
300  END IF
301 *
302  IF( rsvec ) THEN
303  mvl = n
304  ELSE IF( applv ) THEN
305  mvl = mv
306  END IF
307  rsvec = rsvec .OR. applv
308 
309  rooteps = sqrt( eps )
310  rootsfmin = sqrt( sfmin )
311  small = sfmin / eps
312  big = one / sfmin
313  rootbig = one / rootsfmin
314  bigtheta = one / rooteps
315  roottol = sqrt( tol )
316 *
317 * .. Row-cyclic Jacobi SVD algorithm with column pivoting ..
318 *
319  emptsw = ( n*( n-1 ) ) / 2
320  notrot = 0
321  fastr( 1 ) = zero
322 *
323 * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
324 *
325 
326  swband = 0
327 *[TP] SWBAND is a tuning parameter. It is meaningful and effective
328 * if SGESVJ is used as a computational routine in the preconditioned
329 * Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure
330 * ......
331 
332  kbl = min0( 8, n )
333 *[TP] KBL is a tuning parameter that defines the tile size in the
334 * tiling of the p-q loops of pivot pairs. In general, an optimal
335 * value of KBL depends on the matrix dimensions and on the
336 * parameters of the computer's memory.
337 *
338  nbl = n / kbl
339  IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
340 
341  blskip = ( kbl**2 ) + 1
342 *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
343 
344  rowskip = min0( 5, kbl )
345 *[TP] ROWSKIP is a tuning parameter.
346 
347  lkahead = 1
348 *[TP] LKAHEAD is a tuning parameter.
349  swband = 0
350  pskipped = 0
351 *
352  DO 1993 i = 1, nsweep
353 * .. go go go ...
354 *
355  mxaapq = zero
356  mxsinj = zero
357  iswrot = 0
358 *
359  notrot = 0
360  pskipped = 0
361 *
362  DO 2000 ibr = 1, nbl
363 
364  igl = ( ibr-1 )*kbl + 1
365 *
366  DO 1002 ir1 = 0, min0( lkahead, nbl-ibr )
367 *
368  igl = igl + ir1*kbl
369 *
370  DO 2001 p = igl, min0( igl+kbl-1, n-1 )
371 
372 * .. de Rijk's pivoting
373  q = isamax( n-p+1, sva( p ), 1 ) + p - 1
374  IF( p.NE.q ) THEN
375  CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
376  IF( rsvec )CALL sswap( mvl, v( 1, p ), 1,
377  $ v( 1, q ), 1 )
378  temp1 = sva( p )
379  sva( p ) = sva( q )
380  sva( q ) = temp1
381  temp1 = d( p )
382  d( p ) = d( q )
383  d( q ) = temp1
384  END IF
385 *
386  IF( ir1.EQ.0 ) THEN
387 *
388 * Column norms are periodically updated by explicit
389 * norm computation.
390 * Caveat:
391 * Some BLAS implementations compute SNRM2(M,A(1,p),1)
392 * as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may result in
393 * overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and
394 * undeflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
395 * Hence, SNRM2 cannot be trusted, not even in the case when
396 * the true norm is far from the under(over)flow boundaries.
397 * If properly implemented SNRM2 is available, the IF-THEN-ELSE
398 * below should read "AAPP = SNRM2( M, A(1,p), 1 ) * D(p)".
399 *
400  IF( ( sva( p ).LT.rootbig ) .AND.
401  $ ( sva( p ).GT.rootsfmin ) ) THEN
402  sva( p ) = snrm2( m, a( 1, p ), 1 )*d( p )
403  ELSE
404  temp1 = zero
405  aapp = one
406  CALL slassq( m, a( 1, p ), 1, temp1, aapp )
407  sva( p ) = temp1*sqrt( aapp )*d( p )
408  END IF
409  aapp = sva( p )
410  ELSE
411  aapp = sva( p )
412  END IF
413 
414 *
415  IF( aapp.GT.zero ) THEN
416 *
417  pskipped = 0
418 *
419  DO 2002 q = p + 1, min0( igl+kbl-1, n )
420 *
421  aaqq = sva( q )
422 
423  IF( aaqq.GT.zero ) THEN
424 *
425  aapp0 = aapp
426  IF( aaqq.GE.one ) THEN
427  rotok = ( small*aapp ).LE.aaqq
428  IF( aapp.LT.( big / aaqq ) ) THEN
429  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
430  $ q ), 1 )*d( p )*d( q ) / aaqq )
431  $ / aapp
432  ELSE
433  CALL scopy( m, a( 1, p ), 1, work, 1 )
434  CALL slascl( 'G', 0, 0, aapp, d( p ),
435  $ m, 1, work, lda, ierr )
436  aapq = sdot( m, work, 1, a( 1, q ),
437  $ 1 )*d( q ) / aaqq
438  END IF
439  ELSE
440  rotok = aapp.LE.( aaqq / small )
441  IF( aapp.GT.( small / aaqq ) ) THEN
442  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
443  $ q ), 1 )*d( p )*d( q ) / aaqq )
444  $ / aapp
445  ELSE
446  CALL scopy( m, a( 1, q ), 1, work, 1 )
447  CALL slascl( 'G', 0, 0, aaqq, d( q ),
448  $ m, 1, work, lda, ierr )
449  aapq = sdot( m, work, 1, a( 1, p ),
450  $ 1 )*d( p ) / aapp
451  END IF
452  END IF
453 *
454  mxaapq = amax1( mxaapq, abs( aapq ) )
455 *
456 * TO rotate or NOT to rotate, THAT is the question ...
457 *
458  IF( abs( aapq ).GT.tol ) THEN
459 *
460 * .. rotate
461 * ROTATED = ROTATED + ONE
462 *
463  IF( ir1.EQ.0 ) THEN
464  notrot = 0
465  pskipped = 0
466  iswrot = iswrot + 1
467  END IF
468 *
469  IF( rotok ) THEN
470 *
471  aqoap = aaqq / aapp
472  apoaq = aapp / aaqq
473  theta = -half*abs( aqoap-apoaq ) / aapq
474 *
475  IF( abs( theta ).GT.bigtheta ) THEN
476 *
477  t = half / theta
478  fastr( 3 ) = t*d( p ) / d( q )
479  fastr( 4 ) = -t*d( q ) / d( p )
480  CALL srotm( m, a( 1, p ), 1,
481  $ a( 1, q ), 1, fastr )
482  IF( rsvec )CALL srotm( mvl,
483  $ v( 1, p ), 1,
484  $ v( 1, q ), 1,
485  $ fastr )
486  sva( q ) = aaqq*sqrt( amax1( zero,
487  $ one+t*apoaq*aapq ) )
488  aapp = aapp*sqrt( amax1( zero,
489  $ one-t*aqoap*aapq ) )
490  mxsinj = amax1( mxsinj, abs( t ) )
491 *
492  ELSE
493 *
494 * .. choose correct signum for THETA and rotate
495 *
496  thsign = -sign( one, aapq )
497  t = one / ( theta+thsign*
498  $ sqrt( one+theta*theta ) )
499  cs = sqrt( one / ( one+t*t ) )
500  sn = t*cs
501 *
502  mxsinj = amax1( mxsinj, abs( sn ) )
503  sva( q ) = aaqq*sqrt( amax1( zero,
504  $ one+t*apoaq*aapq ) )
505  aapp = aapp*sqrt( amax1( zero,
506  $ one-t*aqoap*aapq ) )
507 *
508  apoaq = d( p ) / d( q )
509  aqoap = d( q ) / d( p )
510  IF( d( p ).GE.one ) THEN
511  IF( d( q ).GE.one ) THEN
512  fastr( 3 ) = t*apoaq
513  fastr( 4 ) = -t*aqoap
514  d( p ) = d( p )*cs
515  d( q ) = d( q )*cs
516  CALL srotm( m, a( 1, p ), 1,
517  $ a( 1, q ), 1,
518  $ fastr )
519  IF( rsvec )CALL srotm( mvl,
520  $ v( 1, p ), 1, v( 1, q ),
521  $ 1, fastr )
522  ELSE
523  CALL saxpy( m, -t*aqoap,
524  $ a( 1, q ), 1,
525  $ a( 1, p ), 1 )
526  CALL saxpy( m, cs*sn*apoaq,
527  $ a( 1, p ), 1,
528  $ a( 1, q ), 1 )
529  d( p ) = d( p )*cs
530  d( q ) = d( q ) / cs
531  IF( rsvec ) THEN
532  CALL saxpy( mvl, -t*aqoap,
533  $ v( 1, q ), 1,
534  $ v( 1, p ), 1 )
535  CALL saxpy( mvl,
536  $ cs*sn*apoaq,
537  $ v( 1, p ), 1,
538  $ v( 1, q ), 1 )
539  END IF
540  END IF
541  ELSE
542  IF( d( q ).GE.one ) THEN
543  CALL saxpy( m, t*apoaq,
544  $ a( 1, p ), 1,
545  $ a( 1, q ), 1 )
546  CALL saxpy( m, -cs*sn*aqoap,
547  $ a( 1, q ), 1,
548  $ a( 1, p ), 1 )
549  d( p ) = d( p ) / cs
550  d( q ) = d( q )*cs
551  IF( rsvec ) THEN
552  CALL saxpy( mvl, t*apoaq,
553  $ v( 1, p ), 1,
554  $ v( 1, q ), 1 )
555  CALL saxpy( mvl,
556  $ -cs*sn*aqoap,
557  $ v( 1, q ), 1,
558  $ v( 1, p ), 1 )
559  END IF
560  ELSE
561  IF( d( p ).GE.d( q ) ) THEN
562  CALL saxpy( m, -t*aqoap,
563  $ a( 1, q ), 1,
564  $ a( 1, p ), 1 )
565  CALL saxpy( m, cs*sn*apoaq,
566  $ a( 1, p ), 1,
567  $ a( 1, q ), 1 )
568  d( p ) = d( p )*cs
569  d( q ) = d( q ) / cs
570  IF( rsvec ) THEN
571  CALL saxpy( mvl,
572  $ -t*aqoap,
573  $ v( 1, q ), 1,
574  $ v( 1, p ), 1 )
575  CALL saxpy( mvl,
576  $ cs*sn*apoaq,
577  $ v( 1, p ), 1,
578  $ v( 1, q ), 1 )
579  END IF
580  ELSE
581  CALL saxpy( m, t*apoaq,
582  $ a( 1, p ), 1,
583  $ a( 1, q ), 1 )
584  CALL saxpy( m,
585  $ -cs*sn*aqoap,
586  $ a( 1, q ), 1,
587  $ a( 1, p ), 1 )
588  d( p ) = d( p ) / cs
589  d( q ) = d( q )*cs
590  IF( rsvec ) THEN
591  CALL saxpy( mvl,
592  $ t*apoaq, v( 1, p ),
593  $ 1, v( 1, q ), 1 )
594  CALL saxpy( mvl,
595  $ -cs*sn*aqoap,
596  $ v( 1, q ), 1,
597  $ v( 1, p ), 1 )
598  END IF
599  END IF
600  END IF
601  END IF
602  END IF
603 *
604  ELSE
605 * .. have to use modified Gram-Schmidt like transformation
606  CALL scopy( m, a( 1, p ), 1, work, 1 )
607  CALL slascl( 'G', 0, 0, aapp, one, m,
608  $ 1, work, lda, ierr )
609  CALL slascl( 'G', 0, 0, aaqq, one, m,
610  $ 1, a( 1, q ), lda, ierr )
611  temp1 = -aapq*d( p ) / d( q )
612  CALL saxpy( m, temp1, work, 1,
613  $ a( 1, q ), 1 )
614  CALL slascl( 'G', 0, 0, one, aaqq, m,
615  $ 1, a( 1, q ), lda, ierr )
616  sva( q ) = aaqq*sqrt( amax1( zero,
617  $ one-aapq*aapq ) )
618  mxsinj = amax1( mxsinj, sfmin )
619  END IF
620 * END IF ROTOK THEN ... ELSE
621 *
622 * In the case of cancellation in updating SVA(q), SVA(p)
623 * recompute SVA(q), SVA(p).
624  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
625  $ THEN
626  IF( ( aaqq.LT.rootbig ) .AND.
627  $ ( aaqq.GT.rootsfmin ) ) THEN
628  sva( q ) = snrm2( m, a( 1, q ), 1 )*
629  $ d( q )
630  ELSE
631  t = zero
632  aaqq = one
633  CALL slassq( m, a( 1, q ), 1, t,
634  $ aaqq )
635  sva( q ) = t*sqrt( aaqq )*d( q )
636  END IF
637  END IF
638  IF( ( aapp / aapp0 ).LE.rooteps ) THEN
639  IF( ( aapp.LT.rootbig ) .AND.
640  $ ( aapp.GT.rootsfmin ) ) THEN
641  aapp = snrm2( m, a( 1, p ), 1 )*
642  $ d( p )
643  ELSE
644  t = zero
645  aapp = one
646  CALL slassq( m, a( 1, p ), 1, t,
647  $ aapp )
648  aapp = t*sqrt( aapp )*d( p )
649  END IF
650  sva( p ) = aapp
651  END IF
652 *
653  ELSE
654 * A(:,p) and A(:,q) already numerically orthogonal
655  IF( ir1.EQ.0 )notrot = notrot + 1
656  pskipped = pskipped + 1
657  END IF
658  ELSE
659 * A(:,q) is zero column
660  IF( ir1.EQ.0 )notrot = notrot + 1
661  pskipped = pskipped + 1
662  END IF
663 *
664  IF( ( i.LE.swband ) .AND.
665  $ ( pskipped.GT.rowskip ) ) THEN
666  IF( ir1.EQ.0 )aapp = -aapp
667  notrot = 0
668  go to 2103
669  END IF
670 *
671  2002 continue
672 * END q-LOOP
673 *
674  2103 continue
675 * bailed out of q-loop
676 
677  sva( p ) = aapp
678 
679  ELSE
680  sva( p ) = aapp
681  IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
682  $ notrot = notrot + min0( igl+kbl-1, n ) - p
683  END IF
684 *
685  2001 continue
686 * end of the p-loop
687 * end of doing the block ( ibr, ibr )
688  1002 continue
689 * end of ir1-loop
690 *
691 *........................................................
692 * ... go to the off diagonal blocks
693 *
694  igl = ( ibr-1 )*kbl + 1
695 *
696  DO 2010 jbc = ibr + 1, nbl
697 *
698  jgl = ( jbc-1 )*kbl + 1
699 *
700 * doing the block at ( ibr, jbc )
701 *
702  ijblsk = 0
703  DO 2100 p = igl, min0( igl+kbl-1, n )
704 *
705  aapp = sva( p )
706 *
707  IF( aapp.GT.zero ) THEN
708 *
709  pskipped = 0
710 *
711  DO 2200 q = jgl, min0( jgl+kbl-1, n )
712 *
713  aaqq = sva( q )
714 *
715  IF( aaqq.GT.zero ) THEN
716  aapp0 = aapp
717 *
718 * .. M x 2 Jacobi SVD ..
719 *
720 * .. Safe Gram matrix computation ..
721 *
722  IF( aaqq.GE.one ) THEN
723  IF( aapp.GE.aaqq ) THEN
724  rotok = ( small*aapp ).LE.aaqq
725  ELSE
726  rotok = ( small*aaqq ).LE.aapp
727  END IF
728  IF( aapp.LT.( big / aaqq ) ) THEN
729  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
730  $ q ), 1 )*d( p )*d( q ) / aaqq )
731  $ / aapp
732  ELSE
733  CALL scopy( m, a( 1, p ), 1, work, 1 )
734  CALL slascl( 'G', 0, 0, aapp, d( p ),
735  $ m, 1, work, lda, ierr )
736  aapq = sdot( m, work, 1, a( 1, q ),
737  $ 1 )*d( q ) / aaqq
738  END IF
739  ELSE
740  IF( aapp.GE.aaqq ) THEN
741  rotok = aapp.LE.( aaqq / small )
742  ELSE
743  rotok = aaqq.LE.( aapp / small )
744  END IF
745  IF( aapp.GT.( small / aaqq ) ) THEN
746  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
747  $ q ), 1 )*d( p )*d( q ) / aaqq )
748  $ / aapp
749  ELSE
750  CALL scopy( m, a( 1, q ), 1, work, 1 )
751  CALL slascl( 'G', 0, 0, aaqq, d( q ),
752  $ m, 1, work, lda, ierr )
753  aapq = sdot( m, work, 1, a( 1, p ),
754  $ 1 )*d( p ) / aapp
755  END IF
756  END IF
757 *
758  mxaapq = amax1( mxaapq, abs( aapq ) )
759 *
760 * TO rotate or NOT to rotate, THAT is the question ...
761 *
762  IF( abs( aapq ).GT.tol ) THEN
763  notrot = 0
764 * ROTATED = ROTATED + 1
765  pskipped = 0
766  iswrot = iswrot + 1
767 *
768  IF( rotok ) THEN
769 *
770  aqoap = aaqq / aapp
771  apoaq = aapp / aaqq
772  theta = -half*abs( aqoap-apoaq ) / aapq
773  IF( aaqq.GT.aapp0 )theta = -theta
774 *
775  IF( abs( theta ).GT.bigtheta ) THEN
776  t = half / theta
777  fastr( 3 ) = t*d( p ) / d( q )
778  fastr( 4 ) = -t*d( q ) / d( p )
779  CALL srotm( m, a( 1, p ), 1,
780  $ a( 1, q ), 1, fastr )
781  IF( rsvec )CALL srotm( mvl,
782  $ v( 1, p ), 1,
783  $ v( 1, q ), 1,
784  $ fastr )
785  sva( q ) = aaqq*sqrt( amax1( zero,
786  $ one+t*apoaq*aapq ) )
787  aapp = aapp*sqrt( amax1( zero,
788  $ one-t*aqoap*aapq ) )
789  mxsinj = amax1( mxsinj, abs( t ) )
790  ELSE
791 *
792 * .. choose correct signum for THETA and rotate
793 *
794  thsign = -sign( one, aapq )
795  IF( aaqq.GT.aapp0 )thsign = -thsign
796  t = one / ( theta+thsign*
797  $ sqrt( one+theta*theta ) )
798  cs = sqrt( one / ( one+t*t ) )
799  sn = t*cs
800  mxsinj = amax1( mxsinj, abs( sn ) )
801  sva( q ) = aaqq*sqrt( amax1( zero,
802  $ one+t*apoaq*aapq ) )
803  aapp = aapp*sqrt( amax1( zero,
804  $ one-t*aqoap*aapq ) )
805 *
806  apoaq = d( p ) / d( q )
807  aqoap = d( q ) / d( p )
808  IF( d( p ).GE.one ) THEN
809 *
810  IF( d( q ).GE.one ) THEN
811  fastr( 3 ) = t*apoaq
812  fastr( 4 ) = -t*aqoap
813  d( p ) = d( p )*cs
814  d( q ) = d( q )*cs
815  CALL srotm( m, a( 1, p ), 1,
816  $ a( 1, q ), 1,
817  $ fastr )
818  IF( rsvec )CALL srotm( mvl,
819  $ v( 1, p ), 1, v( 1, q ),
820  $ 1, fastr )
821  ELSE
822  CALL saxpy( m, -t*aqoap,
823  $ a( 1, q ), 1,
824  $ a( 1, p ), 1 )
825  CALL saxpy( m, cs*sn*apoaq,
826  $ a( 1, p ), 1,
827  $ a( 1, q ), 1 )
828  IF( rsvec ) THEN
829  CALL saxpy( mvl, -t*aqoap,
830  $ v( 1, q ), 1,
831  $ v( 1, p ), 1 )
832  CALL saxpy( mvl,
833  $ cs*sn*apoaq,
834  $ v( 1, p ), 1,
835  $ v( 1, q ), 1 )
836  END IF
837  d( p ) = d( p )*cs
838  d( q ) = d( q ) / cs
839  END IF
840  ELSE
841  IF( d( q ).GE.one ) THEN
842  CALL saxpy( m, t*apoaq,
843  $ a( 1, p ), 1,
844  $ a( 1, q ), 1 )
845  CALL saxpy( m, -cs*sn*aqoap,
846  $ a( 1, q ), 1,
847  $ a( 1, p ), 1 )
848  IF( rsvec ) THEN
849  CALL saxpy( mvl, t*apoaq,
850  $ v( 1, p ), 1,
851  $ v( 1, q ), 1 )
852  CALL saxpy( mvl,
853  $ -cs*sn*aqoap,
854  $ v( 1, q ), 1,
855  $ v( 1, p ), 1 )
856  END IF
857  d( p ) = d( p ) / cs
858  d( q ) = d( q )*cs
859  ELSE
860  IF( d( p ).GE.d( q ) ) THEN
861  CALL saxpy( m, -t*aqoap,
862  $ a( 1, q ), 1,
863  $ a( 1, p ), 1 )
864  CALL saxpy( m, cs*sn*apoaq,
865  $ a( 1, p ), 1,
866  $ a( 1, q ), 1 )
867  d( p ) = d( p )*cs
868  d( q ) = d( q ) / cs
869  IF( rsvec ) THEN
870  CALL saxpy( mvl,
871  $ -t*aqoap,
872  $ v( 1, q ), 1,
873  $ v( 1, p ), 1 )
874  CALL saxpy( mvl,
875  $ cs*sn*apoaq,
876  $ v( 1, p ), 1,
877  $ v( 1, q ), 1 )
878  END IF
879  ELSE
880  CALL saxpy( m, t*apoaq,
881  $ a( 1, p ), 1,
882  $ a( 1, q ), 1 )
883  CALL saxpy( m,
884  $ -cs*sn*aqoap,
885  $ a( 1, q ), 1,
886  $ a( 1, p ), 1 )
887  d( p ) = d( p ) / cs
888  d( q ) = d( q )*cs
889  IF( rsvec ) THEN
890  CALL saxpy( mvl,
891  $ t*apoaq, v( 1, p ),
892  $ 1, v( 1, q ), 1 )
893  CALL saxpy( mvl,
894  $ -cs*sn*aqoap,
895  $ v( 1, q ), 1,
896  $ v( 1, p ), 1 )
897  END IF
898  END IF
899  END IF
900  END IF
901  END IF
902 *
903  ELSE
904  IF( aapp.GT.aaqq ) THEN
905  CALL scopy( m, a( 1, p ), 1, work,
906  $ 1 )
907  CALL slascl( 'G', 0, 0, aapp, one,
908  $ m, 1, work, lda, ierr )
909  CALL slascl( 'G', 0, 0, aaqq, one,
910  $ m, 1, a( 1, q ), lda,
911  $ ierr )
912  temp1 = -aapq*d( p ) / d( q )
913  CALL saxpy( m, temp1, work, 1,
914  $ a( 1, q ), 1 )
915  CALL slascl( 'G', 0, 0, one, aaqq,
916  $ m, 1, a( 1, q ), lda,
917  $ ierr )
918  sva( q ) = aaqq*sqrt( amax1( zero,
919  $ one-aapq*aapq ) )
920  mxsinj = amax1( mxsinj, sfmin )
921  ELSE
922  CALL scopy( m, a( 1, q ), 1, work,
923  $ 1 )
924  CALL slascl( 'G', 0, 0, aaqq, one,
925  $ m, 1, work, lda, ierr )
926  CALL slascl( 'G', 0, 0, aapp, one,
927  $ m, 1, a( 1, p ), lda,
928  $ ierr )
929  temp1 = -aapq*d( q ) / d( p )
930  CALL saxpy( m, temp1, work, 1,
931  $ a( 1, p ), 1 )
932  CALL slascl( 'G', 0, 0, one, aapp,
933  $ m, 1, a( 1, p ), lda,
934  $ ierr )
935  sva( p ) = aapp*sqrt( amax1( zero,
936  $ one-aapq*aapq ) )
937  mxsinj = amax1( mxsinj, sfmin )
938  END IF
939  END IF
940 * END IF ROTOK THEN ... ELSE
941 *
942 * In the case of cancellation in updating SVA(q)
943 * .. recompute SVA(q)
944  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
945  $ THEN
946  IF( ( aaqq.LT.rootbig ) .AND.
947  $ ( aaqq.GT.rootsfmin ) ) THEN
948  sva( q ) = snrm2( m, a( 1, q ), 1 )*
949  $ d( q )
950  ELSE
951  t = zero
952  aaqq = one
953  CALL slassq( m, a( 1, q ), 1, t,
954  $ aaqq )
955  sva( q ) = t*sqrt( aaqq )*d( q )
956  END IF
957  END IF
958  IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
959  IF( ( aapp.LT.rootbig ) .AND.
960  $ ( aapp.GT.rootsfmin ) ) THEN
961  aapp = snrm2( m, a( 1, p ), 1 )*
962  $ d( p )
963  ELSE
964  t = zero
965  aapp = one
966  CALL slassq( m, a( 1, p ), 1, t,
967  $ aapp )
968  aapp = t*sqrt( aapp )*d( p )
969  END IF
970  sva( p ) = aapp
971  END IF
972 * end of OK rotation
973  ELSE
974  notrot = notrot + 1
975  pskipped = pskipped + 1
976  ijblsk = ijblsk + 1
977  END IF
978  ELSE
979  notrot = notrot + 1
980  pskipped = pskipped + 1
981  ijblsk = ijblsk + 1
982  END IF
983 *
984  IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
985  $ THEN
986  sva( p ) = aapp
987  notrot = 0
988  go to 2011
989  END IF
990  IF( ( i.LE.swband ) .AND.
991  $ ( pskipped.GT.rowskip ) ) THEN
992  aapp = -aapp
993  notrot = 0
994  go to 2203
995  END IF
996 *
997  2200 continue
998 * end of the q-loop
999  2203 continue
1000 *
1001  sva( p ) = aapp
1002 *
1003  ELSE
1004  IF( aapp.EQ.zero )notrot = notrot +
1005  $ min0( jgl+kbl-1, n ) - jgl + 1
1006  IF( aapp.LT.zero )notrot = 0
1007  END IF
1008 
1009  2100 continue
1010 * end of the p-loop
1011  2010 continue
1012 * end of the jbc-loop
1013  2011 continue
1014 *2011 bailed out of the jbc-loop
1015  DO 2012 p = igl, min0( igl+kbl-1, n )
1016  sva( p ) = abs( sva( p ) )
1017  2012 continue
1018 *
1019  2000 continue
1020 *2000 :: end of the ibr-loop
1021 *
1022 * .. update SVA(N)
1023  IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1024  $ THEN
1025  sva( n ) = snrm2( m, a( 1, n ), 1 )*d( n )
1026  ELSE
1027  t = zero
1028  aapp = one
1029  CALL slassq( m, a( 1, n ), 1, t, aapp )
1030  sva( n ) = t*sqrt( aapp )*d( n )
1031  END IF
1032 *
1033 * Additional steering devices
1034 *
1035  IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1036  $ ( iswrot.LE.n ) ) )swband = i
1037 *
1038  IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.float( n )*tol ) .AND.
1039  $ ( float( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1040  go to 1994
1041  END IF
1042 *
1043  IF( notrot.GE.emptsw )go to 1994
1044 
1045  1993 continue
1046 * end i=1:NSWEEP loop
1047 * #:) Reaching this point means that the procedure has comleted the given
1048 * number of iterations.
1049  info = nsweep - 1
1050  go to 1995
1051  1994 continue
1052 * #:) Reaching this point means that during the i-th sweep all pivots were
1053 * below the given tolerance, causing early exit.
1054 *
1055  info = 0
1056 * #:) INFO = 0 confirms successful iterations.
1057  1995 continue
1058 *
1059 * Sort the vector D.
1060  DO 5991 p = 1, n - 1
1061  q = isamax( n-p+1, sva( p ), 1 ) + p - 1
1062  IF( p.NE.q ) THEN
1063  temp1 = sva( p )
1064  sva( p ) = sva( q )
1065  sva( q ) = temp1
1066  temp1 = d( p )
1067  d( p ) = d( q )
1068  d( q ) = temp1
1069  CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1070  IF( rsvec )CALL sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1071  END IF
1072  5991 continue
1073 *
1074  return
1075 * ..
1076 * .. END OF SGSVJ0
1077 * ..
1078  END