001:       SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
002:      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
003:      $                   LDT, NV, WV, LDWV, WORK, LWORK )
004: *
005: *  -- LAPACK auxiliary routine (version 3.2) --
006: *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
007: *     November 2006
008: *
009: *     .. Scalar Arguments ..
010:       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
011:      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
012:       LOGICAL            WANTT, WANTZ
013: *     ..
014: *     .. Array Arguments ..
015:       REAL               H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
016:      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),
017:      $                   Z( LDZ, * )
018: *     ..
019: *
020: *     This subroutine is identical to SLAQR3 except that it avoids
021: *     recursion by calling SLAHQR instead of SLAQR4.
022: *
023: *
024: *     ******************************************************************
025: *     Aggressive early deflation:
026: *
027: *     This subroutine accepts as input an upper Hessenberg matrix
028: *     H and performs an orthogonal similarity transformation
029: *     designed to detect and deflate fully converged eigenvalues from
030: *     a trailing principal submatrix.  On output H has been over-
031: *     written by a new Hessenberg matrix that is a perturbation of
032: *     an orthogonal similarity transformation of H.  It is to be
033: *     hoped that the final version of H has many zero subdiagonal
034: *     entries.
035: *
036: *     ******************************************************************
037: *     WANTT   (input) LOGICAL
038: *          If .TRUE., then the Hessenberg matrix H is fully updated
039: *          so that the quasi-triangular Schur factor may be
040: *          computed (in cooperation with the calling subroutine).
041: *          If .FALSE., then only enough of H is updated to preserve
042: *          the eigenvalues.
043: *
044: *     WANTZ   (input) LOGICAL
045: *          If .TRUE., then the orthogonal matrix Z is updated so
046: *          so that the orthogonal Schur factor may be computed
047: *          (in cooperation with the calling subroutine).
048: *          If .FALSE., then Z is not referenced.
049: *
050: *     N       (input) INTEGER
051: *          The order of the matrix H and (if WANTZ is .TRUE.) the
052: *          order of the orthogonal matrix Z.
053: *
054: *     KTOP    (input) INTEGER
055: *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
056: *          KBOT and KTOP together determine an isolated block
057: *          along the diagonal of the Hessenberg matrix.
058: *
059: *     KBOT    (input) INTEGER
060: *          It is assumed without a check that either
061: *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
062: *          determine an isolated block along the diagonal of the
063: *          Hessenberg matrix.
064: *
065: *     NW      (input) INTEGER
066: *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
067: *
068: *     H       (input/output) REAL array, dimension (LDH,N)
069: *          On input the initial N-by-N section of H stores the
070: *          Hessenberg matrix undergoing aggressive early deflation.
071: *          On output H has been transformed by an orthogonal
072: *          similarity transformation, perturbed, and the returned
073: *          to Hessenberg form that (it is to be hoped) has some
074: *          zero subdiagonal entries.
075: *
076: *     LDH     (input) integer
077: *          Leading dimension of H just as declared in the calling
078: *          subroutine.  N .LE. LDH
079: *
080: *     ILOZ    (input) INTEGER
081: *     IHIZ    (input) INTEGER
082: *          Specify the rows of Z to which transformations must be
083: *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
084: *
085: *     Z       (input/output) REAL array, dimension (LDZ,N)
086: *          IF WANTZ is .TRUE., then on output, the orthogonal
087: *          similarity transformation mentioned above has been
088: *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
089: *          If WANTZ is .FALSE., then Z is unreferenced.
090: *
091: *     LDZ     (input) integer
092: *          The leading dimension of Z just as declared in the
093: *          calling subroutine.  1 .LE. LDZ.
094: *
095: *     NS      (output) integer
096: *          The number of unconverged (ie approximate) eigenvalues
097: *          returned in SR and SI that may be used as shifts by the
098: *          calling subroutine.
099: *
100: *     ND      (output) integer
101: *          The number of converged eigenvalues uncovered by this
102: *          subroutine.
103: *
104: *     SR      (output) REAL array, dimension KBOT
105: *     SI      (output) REAL array, dimension KBOT
106: *          On output, the real and imaginary parts of approximate
107: *          eigenvalues that may be used for shifts are stored in
108: *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
109: *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
110: *          The real and imaginary parts of converged eigenvalues
111: *          are stored in SR(KBOT-ND+1) through SR(KBOT) and
112: *          SI(KBOT-ND+1) through SI(KBOT), respectively.
113: *
114: *     V       (workspace) REAL array, dimension (LDV,NW)
115: *          An NW-by-NW work array.
116: *
117: *     LDV     (input) integer scalar
118: *          The leading dimension of V just as declared in the
119: *          calling subroutine.  NW .LE. LDV
120: *
121: *     NH      (input) integer scalar
122: *          The number of columns of T.  NH.GE.NW.
123: *
124: *     T       (workspace) REAL array, dimension (LDT,NW)
125: *
126: *     LDT     (input) integer
127: *          The leading dimension of T just as declared in the
128: *          calling subroutine.  NW .LE. LDT
129: *
130: *     NV      (input) integer
131: *          The number of rows of work array WV available for
132: *          workspace.  NV.GE.NW.
133: *
134: *     WV      (workspace) REAL array, dimension (LDWV,NW)
135: *
136: *     LDWV    (input) integer
137: *          The leading dimension of W just as declared in the
138: *          calling subroutine.  NW .LE. LDV
139: *
140: *     WORK    (workspace) REAL array, dimension LWORK.
141: *          On exit, WORK(1) is set to an estimate of the optimal value
142: *          of LWORK for the given values of N, NW, KTOP and KBOT.
143: *
144: *     LWORK   (input) integer
145: *          The dimension of the work array WORK.  LWORK = 2*NW
146: *          suffices, but greater efficiency may result from larger
147: *          values of LWORK.
148: *
149: *          If LWORK = -1, then a workspace query is assumed; SLAQR2
150: *          only estimates the optimal workspace size for the given
151: *          values of N, NW, KTOP and KBOT.  The estimate is returned
152: *          in WORK(1).  No error message related to LWORK is issued
153: *          by XERBLA.  Neither H nor Z are accessed.
154: *
155: *     ================================================================
156: *     Based on contributions by
157: *        Karen Braman and Ralph Byers, Department of Mathematics,
158: *        University of Kansas, USA
159: *
160: *     ================================================================
161: *     .. Parameters ..
162:       REAL               ZERO, ONE
163:       PARAMETER          ( ZERO = 0.0e0, ONE = 1.0e0 )
164: *     ..
165: *     .. Local Scalars ..
166:       REAL               AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
167:      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
168:       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
169:      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
170:      $                   LWKOPT
171:       LOGICAL            BULGE, SORTED
172: *     ..
173: *     .. External Functions ..
174:       REAL               SLAMCH
175:       EXTERNAL           SLAMCH
176: *     ..
177: *     .. External Subroutines ..
178:       EXTERNAL           SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR,
179:      $                   SLANV2, SLARF, SLARFG, SLASET, SORMHR, STREXC
180: *     ..
181: *     .. Intrinsic Functions ..
182:       INTRINSIC          ABS, INT, MAX, MIN, REAL, SQRT
183: *     ..
184: *     .. Executable Statements ..
185: *
186: *     ==== Estimate optimal workspace. ====
187: *
188:       JW = MIN( NW, KBOT-KTOP+1 )
189:       IF( JW.LE.2 ) THEN
190:          LWKOPT = 1
191:       ELSE
192: *
193: *        ==== Workspace query call to SGEHRD ====
194: *
195:          CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196:          LWK1 = INT( WORK( 1 ) )
197: *
198: *        ==== Workspace query call to SORMHR ====
199: *
200:          CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
201:      $                WORK( JW+1 ), -1, INFO )
202:          LWK2 = INT( WORK( JW+1 ) )
203: *
204: *        ==== Optimal workspace ====
205: *
206:          LWKOPT = JW + MAX( LWK1, LWK2 )
207:       END IF
208: *
209: *     ==== Quick return in case of workspace query. ====
210: *
211:       IF( LWORK.EQ.-1 ) THEN
212:          WORK( 1 ) = REAL( LWKOPT )
213:          RETURN
214:       END IF
215: *
216: *     ==== Nothing to do ...
217: *     ... for an empty active block ... ====
218:       NS = 0
219:       ND = 0
220:       WORK( 1 ) = ONE
221:       IF( KTOP.GT.KBOT )
222:      $   RETURN
223: *     ... nor for an empty deflation window. ====
224:       IF( NW.LT.1 )
225:      $   RETURN
226: *
227: *     ==== Machine constants ====
228: *
229:       SAFMIN = SLAMCH( 'SAFE MINIMUM' )
230:       SAFMAX = ONE / SAFMIN
231:       CALL SLABAD( SAFMIN, SAFMAX )
232:       ULP = SLAMCH( 'PRECISION' )
233:       SMLNUM = SAFMIN*( REAL( N ) / ULP )
234: *
235: *     ==== Setup deflation window ====
236: *
237:       JW = MIN( NW, KBOT-KTOP+1 )
238:       KWTOP = KBOT - JW + 1
239:       IF( KWTOP.EQ.KTOP ) THEN
240:          S = ZERO
241:       ELSE
242:          S = H( KWTOP, KWTOP-1 )
243:       END IF
244: *
245:       IF( KBOT.EQ.KWTOP ) THEN
246: *
247: *        ==== 1-by-1 deflation window: not much to do ====
248: *
249:          SR( KWTOP ) = H( KWTOP, KWTOP )
250:          SI( KWTOP ) = ZERO
251:          NS = 1
252:          ND = 0
253:          IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
254:      $        THEN
255:             NS = 0
256:             ND = 1
257:             IF( KWTOP.GT.KTOP )
258:      $         H( KWTOP, KWTOP-1 ) = ZERO
259:          END IF
260:          WORK( 1 ) = ONE
261:          RETURN
262:       END IF
263: *
264: *     ==== Convert to spike-triangular form.  (In case of a
265: *     .    rare QR failure, this routine continues to do
266: *     .    aggressive early deflation using that part of
267: *     .    the deflation window that converged using INFQR
268: *     .    here and there to keep track.) ====
269: *
270:       CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
271:       CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
272: *
273:       CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
274:       CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
275:      $             SI( KWTOP ), 1, JW, V, LDV, INFQR )
276: *
277: *     ==== STREXC needs a clean margin near the diagonal ====
278: *
279:       DO 10 J = 1, JW - 3
280:          T( J+2, J ) = ZERO
281:          T( J+3, J ) = ZERO
282:    10 CONTINUE
283:       IF( JW.GT.2 )
284:      $   T( JW, JW-2 ) = ZERO
285: *
286: *     ==== Deflation detection loop ====
287: *
288:       NS = JW
289:       ILST = INFQR + 1
290:    20 CONTINUE
291:       IF( ILST.LE.NS ) THEN
292:          IF( NS.EQ.1 ) THEN
293:             BULGE = .FALSE.
294:          ELSE
295:             BULGE = T( NS, NS-1 ).NE.ZERO
296:          END IF
297: *
298: *        ==== Small spike tip test for deflation ====
299: *
300:          IF( .NOT.BULGE ) THEN
301: *
302: *           ==== Real eigenvalue ====
303: *
304:             FOO = ABS( T( NS, NS ) )
305:             IF( FOO.EQ.ZERO )
306:      $         FOO = ABS( S )
307:             IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
308: *
309: *              ==== Deflatable ====
310: *
311:                NS = NS - 1
312:             ELSE
313: *
314: *              ==== Undeflatable.   Move it up out of the way.
315: *              .    (STREXC can not fail in this case.) ====
316: *
317:                IFST = NS
318:                CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
319:      $                      INFO )
320:                ILST = ILST + 1
321:             END IF
322:          ELSE
323: *
324: *           ==== Complex conjugate pair ====
325: *
326:             FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
327:      $            SQRT( ABS( T( NS-1, NS ) ) )
328:             IF( FOO.EQ.ZERO )
329:      $         FOO = ABS( S )
330:             IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
331:      $          MAX( SMLNUM, ULP*FOO ) ) THEN
332: *
333: *              ==== Deflatable ====
334: *
335:                NS = NS - 2
336:             ELSE
337: *
338: *              ==== Undeflatable. Move them up out of the way.
339: *              .    Fortunately, STREXC does the right thing with
340: *              .    ILST in case of a rare exchange failure. ====
341: *
342:                IFST = NS
343:                CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
344:      $                      INFO )
345:                ILST = ILST + 2
346:             END IF
347:          END IF
348: *
349: *        ==== End deflation detection loop ====
350: *
351:          GO TO 20
352:       END IF
353: *
354: *        ==== Return to Hessenberg form ====
355: *
356:       IF( NS.EQ.0 )
357:      $   S = ZERO
358: *
359:       IF( NS.LT.JW ) THEN
360: *
361: *        ==== sorting diagonal blocks of T improves accuracy for
362: *        .    graded matrices.  Bubble sort deals well with
363: *        .    exchange failures. ====
364: *
365:          SORTED = .false.
366:          I = NS + 1
367:    30    CONTINUE
368:          IF( SORTED )
369:      $      GO TO 50
370:          SORTED = .true.
371: *
372:          KEND = I - 1
373:          I = INFQR + 1
374:          IF( I.EQ.NS ) THEN
375:             K = I + 1
376:          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
377:             K = I + 1
378:          ELSE
379:             K = I + 2
380:          END IF
381:    40    CONTINUE
382:          IF( K.LE.KEND ) THEN
383:             IF( K.EQ.I+1 ) THEN
384:                EVI = ABS( T( I, I ) )
385:             ELSE
386:                EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
387:      $               SQRT( ABS( T( I, I+1 ) ) )
388:             END IF
389: *
390:             IF( K.EQ.KEND ) THEN
391:                EVK = ABS( T( K, K ) )
392:             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
393:                EVK = ABS( T( K, K ) )
394:             ELSE
395:                EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
396:      $               SQRT( ABS( T( K, K+1 ) ) )
397:             END IF
398: *
399:             IF( EVI.GE.EVK ) THEN
400:                I = K
401:             ELSE
402:                SORTED = .false.
403:                IFST = I
404:                ILST = K
405:                CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
406:      $                      INFO )
407:                IF( INFO.EQ.0 ) THEN
408:                   I = ILST
409:                ELSE
410:                   I = K
411:                END IF
412:             END IF
413:             IF( I.EQ.KEND ) THEN
414:                K = I + 1
415:             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
416:                K = I + 1
417:             ELSE
418:                K = I + 2
419:             END IF
420:             GO TO 40
421:          END IF
422:          GO TO 30
423:    50    CONTINUE
424:       END IF
425: *
426: *     ==== Restore shift/eigenvalue array from T ====
427: *
428:       I = JW
429:    60 CONTINUE
430:       IF( I.GE.INFQR+1 ) THEN
431:          IF( I.EQ.INFQR+1 ) THEN
432:             SR( KWTOP+I-1 ) = T( I, I )
433:             SI( KWTOP+I-1 ) = ZERO
434:             I = I - 1
435:          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
436:             SR( KWTOP+I-1 ) = T( I, I )
437:             SI( KWTOP+I-1 ) = ZERO
438:             I = I - 1
439:          ELSE
440:             AA = T( I-1, I-1 )
441:             CC = T( I, I-1 )
442:             BB = T( I-1, I )
443:             DD = T( I, I )
444:             CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
445:      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
446:      $                   SI( KWTOP+I-1 ), CS, SN )
447:             I = I - 2
448:          END IF
449:          GO TO 60
450:       END IF
451: *
452:       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
453:          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
454: *
455: *           ==== Reflect spike back into lower triangle ====
456: *
457:             CALL SCOPY( NS, V, LDV, WORK, 1 )
458:             BETA = WORK( 1 )
459:             CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU )
460:             WORK( 1 ) = ONE
461: *
462:             CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
463: *
464:             CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
465:      $                  WORK( JW+1 ) )
466:             CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
467:      $                  WORK( JW+1 ) )
468:             CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
469:      $                  WORK( JW+1 ) )
470: *
471:             CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
472:      $                   LWORK-JW, INFO )
473:          END IF
474: *
475: *        ==== Copy updated reduced window into place ====
476: *
477:          IF( KWTOP.GT.1 )
478:      $      H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
479:          CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
480:          CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
481:      $               LDH+1 )
482: *
483: *        ==== Accumulate orthogonal matrix in order update
484: *        .    H and Z, if requested.  ====
485: *
486:          IF( NS.GT.1 .AND. S.NE.ZERO )
487:      $      CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
488:      $                   WORK( JW+1 ), LWORK-JW, INFO )
489: *
490: *        ==== Update vertical slab in H ====
491: *
492:          IF( WANTT ) THEN
493:             LTOP = 1
494:          ELSE
495:             LTOP = KTOP
496:          END IF
497:          DO 70 KROW = LTOP, KWTOP - 1, NV
498:             KLN = MIN( NV, KWTOP-KROW )
499:             CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
500:      $                  LDH, V, LDV, ZERO, WV, LDWV )
501:             CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
502:    70    CONTINUE
503: *
504: *        ==== Update horizontal slab in H ====
505: *
506:          IF( WANTT ) THEN
507:             DO 80 KCOL = KBOT + 1, N, NH
508:                KLN = MIN( NH, N-KCOL+1 )
509:                CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
510:      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
511:                CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
512:      $                      LDH )
513:    80       CONTINUE
514:          END IF
515: *
516: *        ==== Update vertical slab in Z ====
517: *
518:          IF( WANTZ ) THEN
519:             DO 90 KROW = ILOZ, IHIZ, NV
520:                KLN = MIN( NV, IHIZ-KROW+1 )
521:                CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
522:      $                     LDZ, V, LDV, ZERO, WV, LDWV )
523:                CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
524:      $                      LDZ )
525:    90       CONTINUE
526:          END IF
527:       END IF
528: *
529: *     ==== Return the number of deflations ... ====
530: *
531:       ND = JW - NS
532: *
533: *     ==== ... and the number of shifts. (Subtracting
534: *     .    INFQR from the spike length takes care
535: *     .    of the case of a rare QR failure while
536: *     .    calculating eigenvalues of the deflation
537: *     .    window.)  ====
538: *
539:       NS = NS - INFQR
540: *
541: *      ==== Return optimal workspace. ====
542: *
543:       WORK( 1 ) = REAL( LWKOPT )
544: *
545: *     ==== End of SLAQR2 ====
546: *
547:       END
548: