LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
sgsvj0.f
Go to the documentation of this file.
1*> \brief \b SGSVJ0 pre-processor for the routine sgesvj.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SGSVJ0 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgsvj0.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgsvj0.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgsvj0.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
22* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
23*
24* .. Scalar Arguments ..
25* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
26* REAL EPS, SFMIN, TOL
27* CHARACTER*1 JOBV
28* ..
29* .. Array Arguments ..
30* REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ),
31* $ WORK( LWORK )
32* ..
33*
34*
35*> \par Purpose:
36* =============
37*>
38*> \verbatim
39*>
40*> SGSVJ0 is called from SGESVJ as a pre-processor and that is its main
41*> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but
42*> it does not check convergence (stopping criterion). Few tuning
43*> parameters (marked by [TP]) are available for the implementer.
44*> \endverbatim
45*
46* Arguments:
47* ==========
48*
49*> \param[in] JOBV
50*> \verbatim
51*> JOBV is CHARACTER*1
52*> Specifies whether the output from this procedure is used
53*> to compute the matrix V:
54*> = 'V': the product of the Jacobi rotations is accumulated
55*> by postmultiplying the N-by-N array V.
56*> (See the description of V.)
57*> = 'A': the product of the Jacobi rotations is accumulated
58*> by postmultiplying the MV-by-N array V.
59*> (See the descriptions of MV and V.)
60*> = 'N': the Jacobi rotations are not accumulated.
61*> \endverbatim
62*>
63*> \param[in] M
64*> \verbatim
65*> M is INTEGER
66*> The number of rows of the input matrix A. M >= 0.
67*> \endverbatim
68*>
69*> \param[in] N
70*> \verbatim
71*> N is INTEGER
72*> The number of columns of the input matrix A.
73*> M >= N >= 0.
74*> \endverbatim
75*>
76*> \param[in,out] A
77*> \verbatim
78*> A is REAL array, dimension (LDA,N)
79*> On entry, M-by-N matrix A, such that A*diag(D) represents
80*> the input matrix.
81*> On exit,
82*> A_onexit * D_onexit represents the input matrix A*diag(D)
83*> post-multiplied by a sequence of Jacobi rotations, where the
84*> rotation threshold and the total number of sweeps are given in
85*> TOL and NSWEEP, respectively.
86*> (See the descriptions of D, TOL and NSWEEP.)
87*> \endverbatim
88*>
89*> \param[in] LDA
90*> \verbatim
91*> LDA is INTEGER
92*> The leading dimension of the array A. LDA >= max(1,M).
93*> \endverbatim
94*>
95*> \param[in,out] D
96*> \verbatim
97*> D is REAL array, dimension (N)
98*> The array D accumulates the scaling factors from the fast scaled
99*> Jacobi rotations.
100*> On entry, A*diag(D) represents the input matrix.
101*> On exit, A_onexit*diag(D_onexit) represents the input matrix
102*> post-multiplied by a sequence of Jacobi rotations, where the
103*> rotation threshold and the total number of sweeps are given in
104*> TOL and NSWEEP, respectively.
105*> (See the descriptions of A, TOL and NSWEEP.)
106*> \endverbatim
107*>
108*> \param[in,out] SVA
109*> \verbatim
110*> SVA is REAL array, dimension (N)
111*> On entry, SVA contains the Euclidean norms of the columns of
112*> the matrix A*diag(D).
113*> On exit, SVA contains the Euclidean norms of the columns of
114*> the matrix onexit*diag(D_onexit).
115*> \endverbatim
116*>
117*> \param[in] MV
118*> \verbatim
119*> MV is INTEGER
120*> If JOBV = 'A', then MV rows of V are post-multiplied by a
121*> sequence of Jacobi rotations.
122*> If JOBV = 'N', then MV is not referenced.
123*> \endverbatim
124*>
125*> \param[in,out] V
126*> \verbatim
127*> V is REAL array, dimension (LDV,N)
128*> If JOBV = 'V' then N rows of V are post-multiplied by a
129*> sequence of Jacobi rotations.
130*> If JOBV = 'A' then MV rows of V are post-multiplied by a
131*> sequence of Jacobi rotations.
132*> If JOBV = 'N', then V is not referenced.
133*> \endverbatim
134*>
135*> \param[in] LDV
136*> \verbatim
137*> LDV is INTEGER
138*> The leading dimension of the array V, LDV >= 1.
139*> If JOBV = 'V', LDV >= N.
140*> If JOBV = 'A', LDV >= MV.
141*> \endverbatim
142*>
143*> \param[in] EPS
144*> \verbatim
145*> EPS is REAL
146*> EPS = SLAMCH('Epsilon')
147*> \endverbatim
148*>
149*> \param[in] SFMIN
150*> \verbatim
151*> SFMIN is REAL
152*> SFMIN = SLAMCH('Safe Minimum')
153*> \endverbatim
154*>
155*> \param[in] TOL
156*> \verbatim
157*> TOL is REAL
158*> TOL is the threshold for Jacobi rotations. For a pair
159*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
160*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL.
161*> \endverbatim
162*>
163*> \param[in] NSWEEP
164*> \verbatim
165*> NSWEEP is INTEGER
166*> NSWEEP is the number of sweeps of Jacobi rotations to be
167*> performed.
168*> \endverbatim
169*>
170*> \param[out] WORK
171*> \verbatim
172*> WORK is REAL array, dimension (LWORK)
173*> \endverbatim
174*>
175*> \param[in] LWORK
176*> \verbatim
177*> LWORK is INTEGER
178*> LWORK is the dimension of WORK. LWORK >= M.
179*> \endverbatim
180*>
181*> \param[out] INFO
182*> \verbatim
183*> INFO is INTEGER
184*> = 0: successful exit.
185*> < 0: if INFO = -i, then the i-th argument had an illegal value
186*> \endverbatim
187*
188* Authors:
189* ========
190*
191*> \author Univ. of Tennessee
192*> \author Univ. of California Berkeley
193*> \author Univ. of Colorado Denver
194*> \author NAG Ltd.
195*
196*> \ingroup gsvj0
197*
198*> \par Further Details:
199* =====================
200*>
201*> SGSVJ0 is used just to enable SGESVJ to call a simplified version of
202*> itself to work on a submatrix of the original matrix.
203*>
204*> \par Contributors:
205* ==================
206*>
207*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
208*>
209*> \par Bugs, Examples and Comments:
210* =================================
211*>
212*> Please report all bugs and send interesting test examples and comments to
213*> drmac@math.hr. Thank you.
214*
215* =====================================================================
216 SUBROUTINE sgsvj0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
217 $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
218*
219* -- LAPACK computational routine --
220* -- LAPACK is a software package provided by Univ. of Tennessee, --
221* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
222*
223* .. Scalar Arguments ..
224 INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
225 REAL EPS, SFMIN, TOL
226 CHARACTER*1 JOBV
227* ..
228* .. Array Arguments ..
229 REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ),
230 $ work( lwork )
231* ..
232*
233* =====================================================================
234*
235* .. Local Parameters ..
236 REAL ZERO, HALF, ONE
237 parameter( zero = 0.0e0, half = 0.5e0, one = 1.0e0)
238* ..
239* .. Local Scalars ..
240 REAL AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG,
241 $ bigtheta, cs, mxaapq, mxsinj, rootbig, rooteps,
242 $ rootsfmin, roottol, small, sn, t, temp1, theta,
243 $ thsign
244 INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1,
245 $ iswrot, jbc, jgl, kbl, lkahead, mvl, nbl,
246 $ notrot, p, pskipped, q, rowskip, swband
247 LOGICAL APPLV, ROTOK, RSVEC
248* ..
249* .. Local Arrays ..
250 REAL FASTR( 5 )
251* ..
252* .. Intrinsic Functions ..
253 INTRINSIC abs, max, float, min, sign, sqrt
254* ..
255* .. External Functions ..
256 REAL SDOT, SNRM2
257 INTEGER ISAMAX
258 LOGICAL LSAME
259 EXTERNAL isamax, lsame, sdot, snrm2
260* ..
261* .. External Subroutines ..
262 EXTERNAL saxpy, scopy, slascl, slassq, srotm, sswap,
263 $ xerbla
264* ..
265* .. Executable Statements ..
266*
267* Test the input parameters.
268*
269 applv = lsame( jobv, 'A' )
270 rsvec = lsame( jobv, 'V' )
271 IF( .NOT.( rsvec .OR. applv .OR. lsame( jobv, 'N' ) ) ) THEN
272 info = -1
273 ELSE IF( m.LT.0 ) THEN
274 info = -2
275 ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
276 info = -3
277 ELSE IF( lda.LT.m ) THEN
278 info = -5
279 ELSE IF( ( rsvec.OR.applv ) .AND. ( mv.LT.0 ) ) THEN
280 info = -8
281 ELSE IF( ( rsvec.AND.( ldv.LT.n ) ).OR.
282 $ ( applv.AND.( ldv.LT.mv ) ) ) THEN
283 info = -10
284 ELSE IF( tol.LE.eps ) THEN
285 info = -13
286 ELSE IF( nsweep.LT.0 ) THEN
287 info = -14
288 ELSE IF( lwork.LT.m ) THEN
289 info = -16
290 ELSE
291 info = 0
292 END IF
293*
294* #:(
295 IF( info.NE.0 ) THEN
296 CALL xerbla( 'SGSVJ0', -info )
297 RETURN
298 END IF
299*
300 IF( rsvec ) THEN
301 mvl = n
302 ELSE IF( applv ) THEN
303 mvl = mv
304 END IF
305 rsvec = rsvec .OR. applv
306
307 rooteps = sqrt( eps )
308 rootsfmin = sqrt( sfmin )
309 small = sfmin / eps
310 big = one / sfmin
311 rootbig = one / rootsfmin
312 bigtheta = one / rooteps
313 roottol = sqrt( tol )
314*
315* .. Row-cyclic Jacobi SVD algorithm with column pivoting ..
316*
317 emptsw = ( n*( n-1 ) ) / 2
318 notrot = 0
319 fastr( 1 ) = zero
320*
321* .. Row-cyclic pivot strategy with de Rijk's pivoting ..
322*
323
324 swband = 0
325*[TP] SWBAND is a tuning parameter. It is meaningful and effective
326* if SGESVJ is used as a computational routine in the preconditioned
327* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure
328* ......
329
330 kbl = min( 8, n )
331*[TP] KBL is a tuning parameter that defines the tile size in the
332* tiling of the p-q loops of pivot pairs. In general, an optimal
333* value of KBL depends on the matrix dimensions and on the
334* parameters of the computer's memory.
335*
336 nbl = n / kbl
337 IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
338
339 blskip = ( kbl**2 ) + 1
340*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
341
342 rowskip = min( 5, kbl )
343*[TP] ROWSKIP is a tuning parameter.
344
345 lkahead = 1
346*[TP] LKAHEAD is a tuning parameter.
347 swband = 0
348 pskipped = 0
349*
350 DO 1993 i = 1, nsweep
351* .. go go go ...
352*
353 mxaapq = zero
354 mxsinj = zero
355 iswrot = 0
356*
357 notrot = 0
358 pskipped = 0
359*
360 DO 2000 ibr = 1, nbl
361
362 igl = ( ibr-1 )*kbl + 1
363*
364 DO 1002 ir1 = 0, min( lkahead, nbl-ibr )
365*
366 igl = igl + ir1*kbl
367*
368 DO 2001 p = igl, min( igl+kbl-1, n-1 )
369
370* .. de Rijk's pivoting
371 q = isamax( n-p+1, sva( p ), 1 ) + p - 1
372 IF( p.NE.q ) THEN
373 CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
374 IF( rsvec )CALL sswap( mvl, v( 1, p ), 1,
375 $ v( 1, q ), 1 )
376 temp1 = sva( p )
377 sva( p ) = sva( q )
378 sva( q ) = temp1
379 temp1 = d( p )
380 d( p ) = d( q )
381 d( q ) = temp1
382 END IF
383*
384 IF( ir1.EQ.0 ) THEN
385*
386* Column norms are periodically updated by explicit
387* norm computation.
388* Caveat:
389* Some BLAS implementations compute SNRM2(M,A(1,p),1)
390* as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may result in
391* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and
392* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
393* Hence, SNRM2 cannot be trusted, not even in the case when
394* the true norm is far from the under(over)flow boundaries.
395* If properly implemented SNRM2 is available, the IF-THEN-ELSE
396* below should read "AAPP = SNRM2( M, A(1,p), 1 ) * D(p)".
397*
398 IF( ( sva( p ).LT.rootbig ) .AND.
399 $ ( sva( p ).GT.rootsfmin ) ) THEN
400 sva( p ) = snrm2( m, a( 1, p ), 1 )*d( p )
401 ELSE
402 temp1 = zero
403 aapp = one
404 CALL slassq( m, a( 1, p ), 1, temp1, aapp )
405 sva( p ) = temp1*sqrt( aapp )*d( p )
406 END IF
407 aapp = sva( p )
408 ELSE
409 aapp = sva( p )
410 END IF
411
412*
413 IF( aapp.GT.zero ) THEN
414*
415 pskipped = 0
416*
417 DO 2002 q = p + 1, min( igl+kbl-1, n )
418*
419 aaqq = sva( q )
420
421 IF( aaqq.GT.zero ) THEN
422*
423 aapp0 = aapp
424 IF( aaqq.GE.one ) THEN
425 rotok = ( small*aapp ).LE.aaqq
426 IF( aapp.LT.( big / aaqq ) ) THEN
427 aapq = ( sdot( m, a( 1, p ), 1, a( 1,
428 $ q ), 1 )*d( p )*d( q ) / aaqq )
429 $ / aapp
430 ELSE
431 CALL scopy( m, a( 1, p ), 1, work, 1 )
432 CALL slascl( 'G', 0, 0, aapp, d( p ),
433 $ m, 1, work, lda, ierr )
434 aapq = sdot( m, work, 1, a( 1, q ),
435 $ 1 )*d( q ) / aaqq
436 END IF
437 ELSE
438 rotok = aapp.LE.( aaqq / small )
439 IF( aapp.GT.( small / aaqq ) ) THEN
440 aapq = ( sdot( m, a( 1, p ), 1, a( 1,
441 $ q ), 1 )*d( p )*d( q ) / aaqq )
442 $ / aapp
443 ELSE
444 CALL scopy( m, a( 1, q ), 1, work, 1 )
445 CALL slascl( 'G', 0, 0, aaqq, d( q ),
446 $ m, 1, work, lda, ierr )
447 aapq = sdot( m, work, 1, a( 1, p ),
448 $ 1 )*d( p ) / aapp
449 END IF
450 END IF
451*
452 mxaapq = max( mxaapq, abs( aapq ) )
453*
454* TO rotate or NOT to rotate, THAT is the question ...
455*
456 IF( abs( aapq ).GT.tol ) THEN
457*
458* .. rotate
459* ROTATED = ROTATED + ONE
460*
461 IF( ir1.EQ.0 ) THEN
462 notrot = 0
463 pskipped = 0
464 iswrot = iswrot + 1
465 END IF
466*
467 IF( rotok ) THEN
468*
469 aqoap = aaqq / aapp
470 apoaq = aapp / aaqq
471 theta = -half*abs( aqoap-apoaq ) / aapq
472*
473 IF( abs( theta ).GT.bigtheta ) THEN
474*
475 t = half / theta
476 fastr( 3 ) = t*d( p ) / d( q )
477 fastr( 4 ) = -t*d( q ) / d( p )
478 CALL srotm( m, a( 1, p ), 1,
479 $ a( 1, q ), 1, fastr )
480 IF( rsvec )CALL srotm( mvl,
481 $ v( 1, p ), 1,
482 $ v( 1, q ), 1,
483 $ fastr )
484 sva( q ) = aaqq*sqrt( max( zero,
485 $ one+t*apoaq*aapq ) )
486 aapp = aapp*sqrt( max( zero,
487 $ one-t*aqoap*aapq ) )
488 mxsinj = max( mxsinj, abs( t ) )
489*
490 ELSE
491*
492* .. choose correct signum for THETA and rotate
493*
494 thsign = -sign( one, aapq )
495 t = one / ( theta+thsign*
496 $ sqrt( one+theta*theta ) )
497 cs = sqrt( one / ( one+t*t ) )
498 sn = t*cs
499*
500 mxsinj = max( mxsinj, abs( sn ) )
501 sva( q ) = aaqq*sqrt( max( zero,
502 $ one+t*apoaq*aapq ) )
503 aapp = aapp*sqrt( max( zero,
504 $ one-t*aqoap*aapq ) )
505*
506 apoaq = d( p ) / d( q )
507 aqoap = d( q ) / d( p )
508 IF( d( p ).GE.one ) THEN
509 IF( d( q ).GE.one ) THEN
510 fastr( 3 ) = t*apoaq
511 fastr( 4 ) = -t*aqoap
512 d( p ) = d( p )*cs
513 d( q ) = d( q )*cs
514 CALL srotm( m, a( 1, p ), 1,
515 $ a( 1, q ), 1,
516 $ fastr )
517 IF( rsvec )CALL srotm( mvl,
518 $ v( 1, p ), 1, v( 1, q ),
519 $ 1, fastr )
520 ELSE
521 CALL saxpy( m, -t*aqoap,
522 $ a( 1, q ), 1,
523 $ a( 1, p ), 1 )
524 CALL saxpy( m, cs*sn*apoaq,
525 $ a( 1, p ), 1,
526 $ a( 1, q ), 1 )
527 d( p ) = d( p )*cs
528 d( q ) = d( q ) / cs
529 IF( rsvec ) THEN
530 CALL saxpy( mvl, -t*aqoap,
531 $ v( 1, q ), 1,
532 $ v( 1, p ), 1 )
533 CALL saxpy( mvl,
534 $ cs*sn*apoaq,
535 $ v( 1, p ), 1,
536 $ v( 1, q ), 1 )
537 END IF
538 END IF
539 ELSE
540 IF( d( q ).GE.one ) THEN
541 CALL saxpy( m, t*apoaq,
542 $ a( 1, p ), 1,
543 $ a( 1, q ), 1 )
544 CALL saxpy( m, -cs*sn*aqoap,
545 $ a( 1, q ), 1,
546 $ a( 1, p ), 1 )
547 d( p ) = d( p ) / cs
548 d( q ) = d( q )*cs
549 IF( rsvec ) THEN
550 CALL saxpy( mvl, t*apoaq,
551 $ v( 1, p ), 1,
552 $ v( 1, q ), 1 )
553 CALL saxpy( mvl,
554 $ -cs*sn*aqoap,
555 $ v( 1, q ), 1,
556 $ v( 1, p ), 1 )
557 END IF
558 ELSE
559 IF( d( p ).GE.d( q ) ) THEN
560 CALL saxpy( m, -t*aqoap,
561 $ a( 1, q ), 1,
562 $ a( 1, p ), 1 )
563 CALL saxpy( m, cs*sn*apoaq,
564 $ a( 1, p ), 1,
565 $ a( 1, q ), 1 )
566 d( p ) = d( p )*cs
567 d( q ) = d( q ) / cs
568 IF( rsvec ) THEN
569 CALL saxpy( mvl,
570 $ -t*aqoap,
571 $ v( 1, q ), 1,
572 $ v( 1, p ), 1 )
573 CALL saxpy( mvl,
574 $ cs*sn*apoaq,
575 $ v( 1, p ), 1,
576 $ v( 1, q ), 1 )
577 END IF
578 ELSE
579 CALL saxpy( m, t*apoaq,
580 $ a( 1, p ), 1,
581 $ a( 1, q ), 1 )
582 CALL saxpy( m,
583 $ -cs*sn*aqoap,
584 $ a( 1, q ), 1,
585 $ a( 1, p ), 1 )
586 d( p ) = d( p ) / cs
587 d( q ) = d( q )*cs
588 IF( rsvec ) THEN
589 CALL saxpy( mvl,
590 $ t*apoaq, v( 1, p ),
591 $ 1, v( 1, q ), 1 )
592 CALL saxpy( mvl,
593 $ -cs*sn*aqoap,
594 $ v( 1, q ), 1,
595 $ v( 1, p ), 1 )
596 END IF
597 END IF
598 END IF
599 END IF
600 END IF
601*
602 ELSE
603* .. have to use modified Gram-Schmidt like transformation
604 CALL scopy( m, a( 1, p ), 1, work, 1 )
605 CALL slascl( 'G', 0, 0, aapp, one, m,
606 $ 1, work, lda, ierr )
607 CALL slascl( 'G', 0, 0, aaqq, one, m,
608 $ 1, a( 1, q ), lda, ierr )
609 temp1 = -aapq*d( p ) / d( q )
610 CALL saxpy( m, temp1, work, 1,
611 $ a( 1, q ), 1 )
612 CALL slascl( 'G', 0, 0, one, aaqq, m,
613 $ 1, a( 1, q ), lda, ierr )
614 sva( q ) = aaqq*sqrt( max( zero,
615 $ one-aapq*aapq ) )
616 mxsinj = max( mxsinj, sfmin )
617 END IF
618* END IF ROTOK THEN ... ELSE
619*
620* In the case of cancellation in updating SVA(q), SVA(p)
621* recompute SVA(q), SVA(p).
622 IF( ( sva( q ) / aaqq )**2.LE.rooteps )
623 $ THEN
624 IF( ( aaqq.LT.rootbig ) .AND.
625 $ ( aaqq.GT.rootsfmin ) ) THEN
626 sva( q ) = snrm2( m, a( 1, q ), 1 )*
627 $ d( q )
628 ELSE
629 t = zero
630 aaqq = one
631 CALL slassq( m, a( 1, q ), 1, t,
632 $ aaqq )
633 sva( q ) = t*sqrt( aaqq )*d( q )
634 END IF
635 END IF
636 IF( ( aapp / aapp0 ).LE.rooteps ) THEN
637 IF( ( aapp.LT.rootbig ) .AND.
638 $ ( aapp.GT.rootsfmin ) ) THEN
639 aapp = snrm2( m, a( 1, p ), 1 )*
640 $ d( p )
641 ELSE
642 t = zero
643 aapp = one
644 CALL slassq( m, a( 1, p ), 1, t,
645 $ aapp )
646 aapp = t*sqrt( aapp )*d( p )
647 END IF
648 sva( p ) = aapp
649 END IF
650*
651 ELSE
652* A(:,p) and A(:,q) already numerically orthogonal
653 IF( ir1.EQ.0 )notrot = notrot + 1
654 pskipped = pskipped + 1
655 END IF
656 ELSE
657* A(:,q) is zero column
658 IF( ir1.EQ.0 )notrot = notrot + 1
659 pskipped = pskipped + 1
660 END IF
661*
662 IF( ( i.LE.swband ) .AND.
663 $ ( pskipped.GT.rowskip ) ) THEN
664 IF( ir1.EQ.0 )aapp = -aapp
665 notrot = 0
666 GO TO 2103
667 END IF
668*
669 2002 CONTINUE
670* END q-LOOP
671*
672 2103 CONTINUE
673* bailed out of q-loop
674
675 sva( p ) = aapp
676
677 ELSE
678 sva( p ) = aapp
679 IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
680 $ notrot = notrot + min( igl+kbl-1, n ) - p
681 END IF
682*
683 2001 CONTINUE
684* end of the p-loop
685* end of doing the block ( ibr, ibr )
686 1002 CONTINUE
687* end of ir1-loop
688*
689*........................................................
690* ... go to the off diagonal blocks
691*
692 igl = ( ibr-1 )*kbl + 1
693*
694 DO 2010 jbc = ibr + 1, nbl
695*
696 jgl = ( jbc-1 )*kbl + 1
697*
698* doing the block at ( ibr, jbc )
699*
700 ijblsk = 0
701 DO 2100 p = igl, min( igl+kbl-1, n )
702*
703 aapp = sva( p )
704*
705 IF( aapp.GT.zero ) THEN
706*
707 pskipped = 0
708*
709 DO 2200 q = jgl, min( jgl+kbl-1, n )
710*
711 aaqq = sva( q )
712*
713 IF( aaqq.GT.zero ) THEN
714 aapp0 = aapp
715*
716* .. M x 2 Jacobi SVD ..
717*
718* .. Safe Gram matrix computation ..
719*
720 IF( aaqq.GE.one ) THEN
721 IF( aapp.GE.aaqq ) THEN
722 rotok = ( small*aapp ).LE.aaqq
723 ELSE
724 rotok = ( small*aaqq ).LE.aapp
725 END IF
726 IF( aapp.LT.( big / aaqq ) ) THEN
727 aapq = ( sdot( m, a( 1, p ), 1, a( 1,
728 $ q ), 1 )*d( p )*d( q ) / aaqq )
729 $ / aapp
730 ELSE
731 CALL scopy( m, a( 1, p ), 1, work, 1 )
732 CALL slascl( 'G', 0, 0, aapp, d( p ),
733 $ m, 1, work, lda, ierr )
734 aapq = sdot( m, work, 1, a( 1, q ),
735 $ 1 )*d( q ) / aaqq
736 END IF
737 ELSE
738 IF( aapp.GE.aaqq ) THEN
739 rotok = aapp.LE.( aaqq / small )
740 ELSE
741 rotok = aaqq.LE.( aapp / small )
742 END IF
743 IF( aapp.GT.( small / aaqq ) ) THEN
744 aapq = ( sdot( m, a( 1, p ), 1, a( 1,
745 $ q ), 1 )*d( p )*d( q ) / aaqq )
746 $ / aapp
747 ELSE
748 CALL scopy( m, a( 1, q ), 1, work, 1 )
749 CALL slascl( 'G', 0, 0, aaqq, d( q ),
750 $ m, 1, work, lda, ierr )
751 aapq = sdot( m, work, 1, a( 1, p ),
752 $ 1 )*d( p ) / aapp
753 END IF
754 END IF
755*
756 mxaapq = max( mxaapq, abs( aapq ) )
757*
758* TO rotate or NOT to rotate, THAT is the question ...
759*
760 IF( abs( aapq ).GT.tol ) THEN
761 notrot = 0
762* ROTATED = ROTATED + 1
763 pskipped = 0
764 iswrot = iswrot + 1
765*
766 IF( rotok ) THEN
767*
768 aqoap = aaqq / aapp
769 apoaq = aapp / aaqq
770 theta = -half*abs( aqoap-apoaq ) / aapq
771 IF( aaqq.GT.aapp0 )theta = -theta
772*
773 IF( abs( theta ).GT.bigtheta ) THEN
774 t = half / theta
775 fastr( 3 ) = t*d( p ) / d( q )
776 fastr( 4 ) = -t*d( q ) / d( p )
777 CALL srotm( m, a( 1, p ), 1,
778 $ a( 1, q ), 1, fastr )
779 IF( rsvec )CALL srotm( mvl,
780 $ v( 1, p ), 1,
781 $ v( 1, q ), 1,
782 $ fastr )
783 sva( q ) = aaqq*sqrt( max( zero,
784 $ one+t*apoaq*aapq ) )
785 aapp = aapp*sqrt( max( zero,
786 $ one-t*aqoap*aapq ) )
787 mxsinj = max( mxsinj, abs( t ) )
788 ELSE
789*
790* .. choose correct signum for THETA and rotate
791*
792 thsign = -sign( one, aapq )
793 IF( aaqq.GT.aapp0 )thsign = -thsign
794 t = one / ( theta+thsign*
795 $ sqrt( one+theta*theta ) )
796 cs = sqrt( one / ( one+t*t ) )
797 sn = t*cs
798 mxsinj = max( mxsinj, abs( sn ) )
799 sva( q ) = aaqq*sqrt( max( zero,
800 $ one+t*apoaq*aapq ) )
801 aapp = aapp*sqrt( max( zero,
802 $ one-t*aqoap*aapq ) )
803*
804 apoaq = d( p ) / d( q )
805 aqoap = d( q ) / d( p )
806 IF( d( p ).GE.one ) THEN
807*
808 IF( d( q ).GE.one ) THEN
809 fastr( 3 ) = t*apoaq
810 fastr( 4 ) = -t*aqoap
811 d( p ) = d( p )*cs
812 d( q ) = d( q )*cs
813 CALL srotm( m, a( 1, p ), 1,
814 $ a( 1, q ), 1,
815 $ fastr )
816 IF( rsvec )CALL srotm( mvl,
817 $ v( 1, p ), 1, v( 1, q ),
818 $ 1, fastr )
819 ELSE
820 CALL saxpy( m, -t*aqoap,
821 $ a( 1, q ), 1,
822 $ a( 1, p ), 1 )
823 CALL saxpy( m, cs*sn*apoaq,
824 $ a( 1, p ), 1,
825 $ a( 1, q ), 1 )
826 IF( rsvec ) THEN
827 CALL saxpy( mvl, -t*aqoap,
828 $ v( 1, q ), 1,
829 $ v( 1, p ), 1 )
830 CALL saxpy( mvl,
831 $ cs*sn*apoaq,
832 $ v( 1, p ), 1,
833 $ v( 1, q ), 1 )
834 END IF
835 d( p ) = d( p )*cs
836 d( q ) = d( q ) / cs
837 END IF
838 ELSE
839 IF( d( q ).GE.one ) THEN
840 CALL saxpy( m, t*apoaq,
841 $ a( 1, p ), 1,
842 $ a( 1, q ), 1 )
843 CALL saxpy( m, -cs*sn*aqoap,
844 $ a( 1, q ), 1,
845 $ a( 1, p ), 1 )
846 IF( rsvec ) THEN
847 CALL saxpy( mvl, t*apoaq,
848 $ v( 1, p ), 1,
849 $ v( 1, q ), 1 )
850 CALL saxpy( mvl,
851 $ -cs*sn*aqoap,
852 $ v( 1, q ), 1,
853 $ v( 1, p ), 1 )
854 END IF
855 d( p ) = d( p ) / cs
856 d( q ) = d( q )*cs
857 ELSE
858 IF( d( p ).GE.d( q ) ) THEN
859 CALL saxpy( m, -t*aqoap,
860 $ a( 1, q ), 1,
861 $ a( 1, p ), 1 )
862 CALL saxpy( m, cs*sn*apoaq,
863 $ a( 1, p ), 1,
864 $ a( 1, q ), 1 )
865 d( p ) = d( p )*cs
866 d( q ) = d( q ) / cs
867 IF( rsvec ) THEN
868 CALL saxpy( mvl,
869 $ -t*aqoap,
870 $ v( 1, q ), 1,
871 $ v( 1, p ), 1 )
872 CALL saxpy( mvl,
873 $ cs*sn*apoaq,
874 $ v( 1, p ), 1,
875 $ v( 1, q ), 1 )
876 END IF
877 ELSE
878 CALL saxpy( m, t*apoaq,
879 $ a( 1, p ), 1,
880 $ a( 1, q ), 1 )
881 CALL saxpy( m,
882 $ -cs*sn*aqoap,
883 $ a( 1, q ), 1,
884 $ a( 1, p ), 1 )
885 d( p ) = d( p ) / cs
886 d( q ) = d( q )*cs
887 IF( rsvec ) THEN
888 CALL saxpy( mvl,
889 $ t*apoaq, v( 1, p ),
890 $ 1, v( 1, q ), 1 )
891 CALL saxpy( mvl,
892 $ -cs*sn*aqoap,
893 $ v( 1, q ), 1,
894 $ v( 1, p ), 1 )
895 END IF
896 END IF
897 END IF
898 END IF
899 END IF
900*
901 ELSE
902 IF( aapp.GT.aaqq ) THEN
903 CALL scopy( m, a( 1, p ), 1, work,
904 $ 1 )
905 CALL slascl( 'G', 0, 0, aapp, one,
906 $ m, 1, work, lda, ierr )
907 CALL slascl( 'G', 0, 0, aaqq, one,
908 $ m, 1, a( 1, q ), lda,
909 $ ierr )
910 temp1 = -aapq*d( p ) / d( q )
911 CALL saxpy( m, temp1, work, 1,
912 $ a( 1, q ), 1 )
913 CALL slascl( 'G', 0, 0, one, aaqq,
914 $ m, 1, a( 1, q ), lda,
915 $ ierr )
916 sva( q ) = aaqq*sqrt( max( zero,
917 $ one-aapq*aapq ) )
918 mxsinj = max( mxsinj, sfmin )
919 ELSE
920 CALL scopy( m, a( 1, q ), 1, work,
921 $ 1 )
922 CALL slascl( 'G', 0, 0, aaqq, one,
923 $ m, 1, work, lda, ierr )
924 CALL slascl( 'G', 0, 0, aapp, one,
925 $ m, 1, a( 1, p ), lda,
926 $ ierr )
927 temp1 = -aapq*d( q ) / d( p )
928 CALL saxpy( m, temp1, work, 1,
929 $ a( 1, p ), 1 )
930 CALL slascl( 'G', 0, 0, one, aapp,
931 $ m, 1, a( 1, p ), lda,
932 $ ierr )
933 sva( p ) = aapp*sqrt( max( zero,
934 $ one-aapq*aapq ) )
935 mxsinj = max( mxsinj, sfmin )
936 END IF
937 END IF
938* END IF ROTOK THEN ... ELSE
939*
940* In the case of cancellation in updating SVA(q)
941* .. recompute SVA(q)
942 IF( ( sva( q ) / aaqq )**2.LE.rooteps )
943 $ THEN
944 IF( ( aaqq.LT.rootbig ) .AND.
945 $ ( aaqq.GT.rootsfmin ) ) THEN
946 sva( q ) = snrm2( m, a( 1, q ), 1 )*
947 $ d( q )
948 ELSE
949 t = zero
950 aaqq = one
951 CALL slassq( m, a( 1, q ), 1, t,
952 $ aaqq )
953 sva( q ) = t*sqrt( aaqq )*d( q )
954 END IF
955 END IF
956 IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
957 IF( ( aapp.LT.rootbig ) .AND.
958 $ ( aapp.GT.rootsfmin ) ) THEN
959 aapp = snrm2( m, a( 1, p ), 1 )*
960 $ d( p )
961 ELSE
962 t = zero
963 aapp = one
964 CALL slassq( m, a( 1, p ), 1, t,
965 $ aapp )
966 aapp = t*sqrt( aapp )*d( p )
967 END IF
968 sva( p ) = aapp
969 END IF
970* end of OK rotation
971 ELSE
972 notrot = notrot + 1
973 pskipped = pskipped + 1
974 ijblsk = ijblsk + 1
975 END IF
976 ELSE
977 notrot = notrot + 1
978 pskipped = pskipped + 1
979 ijblsk = ijblsk + 1
980 END IF
981*
982 IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
983 $ THEN
984 sva( p ) = aapp
985 notrot = 0
986 GO TO 2011
987 END IF
988 IF( ( i.LE.swband ) .AND.
989 $ ( pskipped.GT.rowskip ) ) THEN
990 aapp = -aapp
991 notrot = 0
992 GO TO 2203
993 END IF
994*
995 2200 CONTINUE
996* end of the q-loop
997 2203 CONTINUE
998*
999 sva( p ) = aapp
1000*
1001 ELSE
1002 IF( aapp.EQ.zero )notrot = notrot +
1003 $ min( jgl+kbl-1, n ) - jgl + 1
1004 IF( aapp.LT.zero )notrot = 0
1005 END IF
1006
1007 2100 CONTINUE
1008* end of the p-loop
1009 2010 CONTINUE
1010* end of the jbc-loop
1011 2011 CONTINUE
1012*2011 bailed out of the jbc-loop
1013 DO 2012 p = igl, min( igl+kbl-1, n )
1014 sva( p ) = abs( sva( p ) )
1015 2012 CONTINUE
1016*
1017 2000 CONTINUE
1018*2000 :: end of the ibr-loop
1019*
1020* .. update SVA(N)
1021 IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1022 $ THEN
1023 sva( n ) = snrm2( m, a( 1, n ), 1 )*d( n )
1024 ELSE
1025 t = zero
1026 aapp = one
1027 CALL slassq( m, a( 1, n ), 1, t, aapp )
1028 sva( n ) = t*sqrt( aapp )*d( n )
1029 END IF
1030*
1031* Additional steering devices
1032*
1033 IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1034 $ ( iswrot.LE.n ) ) )swband = i
1035*
1036 IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.float( n )*tol ) .AND.
1037 $ ( float( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1038 GO TO 1994
1039 END IF
1040*
1041 IF( notrot.GE.emptsw )GO TO 1994
1042
1043 1993 CONTINUE
1044* end i=1:NSWEEP loop
1045* #:) Reaching this point means that the procedure has completed the given
1046* number of iterations.
1047 info = nsweep - 1
1048 GO TO 1995
1049 1994 CONTINUE
1050* #:) Reaching this point means that during the i-th sweep all pivots were
1051* below the given tolerance, causing early exit.
1052*
1053 info = 0
1054* #:) INFO = 0 confirms successful iterations.
1055 1995 CONTINUE
1056*
1057* Sort the vector D.
1058 DO 5991 p = 1, n - 1
1059 q = isamax( n-p+1, sva( p ), 1 ) + p - 1
1060 IF( p.NE.q ) THEN
1061 temp1 = sva( p )
1062 sva( p ) = sva( q )
1063 sva( q ) = temp1
1064 temp1 = d( p )
1065 d( p ) = d( q )
1066 d( q ) = temp1
1067 CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1068 IF( rsvec )CALL sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1069 END IF
1070 5991 CONTINUE
1071*
1072 RETURN
1073* ..
1074* .. END OF SGSVJ0
1075* ..
1076 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine saxpy(n, sa, sx, incx, sy, incy)
SAXPY
Definition saxpy.f:89
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82
subroutine sgsvj0(jobv, m, n, a, lda, d, sva, mv, v, ldv, eps, sfmin, tol, nsweep, work, lwork, info)
SGSVJ0 pre-processor for the routine sgesvj.
Definition sgsvj0.f:218
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:143
subroutine slassq(n, x, incx, scale, sumsq)
SLASSQ updates a sum of squares represented in scaled form.
Definition slassq.f90:124
subroutine srotm(n, sx, incx, sy, incy, sparam)
SROTM
Definition srotm.f:97
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
Definition sswap.f:82