LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ slaqr5()

subroutine slaqr5 ( logical  WANTT,
logical  WANTZ,
integer  KACC22,
integer  N,
integer  KTOP,
integer  KBOT,
integer  NSHFTS,
real, dimension( * )  SR,
real, dimension( * )  SI,
real, dimension( ldh, * )  H,
integer  LDH,
integer  ILOZ,
integer  IHIZ,
real, dimension( ldz, * )  Z,
integer  LDZ,
real, dimension( ldv, * )  V,
integer  LDV,
real, dimension( ldu, * )  U,
integer  LDU,
integer  NV,
real, dimension( ldwv, * )  WV,
integer  LDWV,
integer  NH,
real, dimension( ldwh, * )  WH,
integer  LDWH 
)

SLAQR5 performs a single small-bulge multi-shift QR sweep.

Download SLAQR5 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
    SLAQR5, called by SLAQR0, performs a
    single small-bulge multi-shift QR sweep.
Parameters
[in]WANTT
          WANTT is LOGICAL
             WANTT = .true. if the quasi-triangular Schur factor
             is being computed.  WANTT is set to .false. otherwise.
[in]WANTZ
          WANTZ is LOGICAL
             WANTZ = .true. if the orthogonal Schur factor is being
             computed.  WANTZ is set to .false. otherwise.
[in]KACC22
          KACC22 is INTEGER with value 0, 1, or 2.
             Specifies the computation mode of far-from-diagonal
             orthogonal updates.
        = 0: SLAQR5 does not accumulate reflections and does not
             use matrix-matrix multiply to update far-from-diagonal
             matrix entries.
        = 1: SLAQR5 accumulates reflections and uses matrix-matrix
             multiply to update the far-from-diagonal matrix entries.
        = 2: Same as KACC22 = 1. This option used to enable exploiting
             the 2-by-2 structure during matrix multiplications, but
             this is no longer supported.
[in]N
          N is INTEGER
             N is the order of the Hessenberg matrix H upon which this
             subroutine operates.
[in]KTOP
          KTOP is INTEGER
[in]KBOT
          KBOT is INTEGER
             These are the first and last rows and columns of an
             isolated diagonal block upon which the QR sweep is to be
             applied. It is assumed without a check that
                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
             and
                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
[in]NSHFTS
          NSHFTS is INTEGER
             NSHFTS gives the number of simultaneous shifts.  NSHFTS
             must be positive and even.
[in,out]SR
          SR is REAL array, dimension (NSHFTS)
[in,out]SI
          SI is REAL array, dimension (NSHFTS)
             SR contains the real parts and SI contains the imaginary
             parts of the NSHFTS shifts of origin that define the
             multi-shift QR sweep.  On output SR and SI may be
             reordered.
[in,out]H
          H is REAL array, dimension (LDH,N)
             On input H contains a Hessenberg matrix.  On output a
             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
             to the isolated diagonal block in rows and columns KTOP
             through KBOT.
[in]LDH
          LDH is INTEGER
             LDH is the leading dimension of H just as declared in the
             calling procedure.  LDH >= MAX(1,N).
[in]ILOZ
          ILOZ is INTEGER
[in]IHIZ
          IHIZ is INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
[in,out]Z
          Z is REAL array, dimension (LDZ,IHIZ)
             If WANTZ = .TRUE., then the QR Sweep orthogonal
             similarity transformation is accumulated into
             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
             If WANTZ = .FALSE., then Z is unreferenced.
[in]LDZ
          LDZ is INTEGER
             LDA is the leading dimension of Z just as declared in
             the calling procedure. LDZ >= N.
[out]V
          V is REAL array, dimension (LDV,NSHFTS/2)
[in]LDV
          LDV is INTEGER
             LDV is the leading dimension of V as declared in the
             calling procedure.  LDV >= 3.
[out]U
          U is REAL array, dimension (LDU,2*NSHFTS)
[in]LDU
          LDU is INTEGER
             LDU is the leading dimension of U just as declared in the
             in the calling subroutine.  LDU >= 2*NSHFTS.
[in]NV
          NV is INTEGER
             NV is the number of rows in WV agailable for workspace.
             NV >= 1.
[out]WV
          WV is REAL array, dimension (LDWV,2*NSHFTS)
[in]LDWV
          LDWV is INTEGER
             LDWV is the leading dimension of WV as declared in the
             in the calling subroutine.  LDWV >= NV.
[in]NH
          NH is INTEGER
             NH is the number of columns in array WH available for
             workspace. NH >= 1.
[out]WH
          WH is REAL array, dimension (LDWH,NH)
[in]LDWH
          LDWH is INTEGER
             Leading dimension of WH just as declared in the
             calling procedure.  LDWH >= 2*NSHFTS.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Karen Braman and Ralph Byers, Department of Mathematics, University of Kansas, USA

Lars Karlsson, Daniel Kressner, and Bruno Lang

Thijs Steel, Department of Computer science, KU Leuven, Belgium

References:
K. Braman, R. Byers and R. Mathias, The Multi-Shift QR Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 Performance, SIAM Journal of Matrix Analysis, volume 23, pages 929–947, 2002.

Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).

Definition at line 262 of file slaqr5.f.

265  IMPLICIT NONE
266 *
267 * -- LAPACK auxiliary routine --
268 * -- LAPACK is a software package provided by Univ. of Tennessee, --
269 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270 *
271 * .. Scalar Arguments ..
272  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273  $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274  LOGICAL WANTT, WANTZ
275 * ..
276 * .. Array Arguments ..
277  REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278  $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279  $ Z( LDZ, * )
280 * ..
281 *
282 * ================================================================
283 * .. Parameters ..
284  REAL ZERO, ONE
285  parameter( zero = 0.0e0, one = 1.0e0 )
286 * ..
287 * .. Local Scalars ..
288  REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289  $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
290  $ ULP
291  INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292  $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293  $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
294  $ NS, NU
295  LOGICAL ACCUM, BMP22
296 * ..
297 * .. External Functions ..
298  REAL SLAMCH
299  EXTERNAL slamch
300 * ..
301 * .. Intrinsic Functions ..
302 *
303  INTRINSIC abs, max, min, mod, real
304 * ..
305 * .. Local Arrays ..
306  REAL VT( 3 )
307 * ..
308 * .. External Subroutines ..
309  EXTERNAL sgemm, slabad, slacpy, slaqr1, slarfg, slaset,
310  $ strmm
311 * ..
312 * .. Executable Statements ..
313 *
314 * ==== If there are no shifts, then there is nothing to do. ====
315 *
316  IF( nshfts.LT.2 )
317  $ RETURN
318 *
319 * ==== If the active block is empty or 1-by-1, then there
320 * . is nothing to do. ====
321 *
322  IF( ktop.GE.kbot )
323  $ RETURN
324 *
325 * ==== Shuffle shifts into pairs of real shifts and pairs
326 * . of complex conjugate shifts assuming complex
327 * . conjugate shifts are already adjacent to one
328 * . another. ====
329 *
330  DO 10 i = 1, nshfts - 2, 2
331  IF( si( i ).NE.-si( i+1 ) ) THEN
332 *
333  swap = sr( i )
334  sr( i ) = sr( i+1 )
335  sr( i+1 ) = sr( i+2 )
336  sr( i+2 ) = swap
337 *
338  swap = si( i )
339  si( i ) = si( i+1 )
340  si( i+1 ) = si( i+2 )
341  si( i+2 ) = swap
342  END IF
343  10 CONTINUE
344 *
345 * ==== NSHFTS is supposed to be even, but if it is odd,
346 * . then simply reduce it by one. The shuffle above
347 * . ensures that the dropped shift is real and that
348 * . the remaining shifts are paired. ====
349 *
350  ns = nshfts - mod( nshfts, 2 )
351 *
352 * ==== Machine constants for deflation ====
353 *
354  safmin = slamch( 'SAFE MINIMUM' )
355  safmax = one / safmin
356  CALL slabad( safmin, safmax )
357  ulp = slamch( 'PRECISION' )
358  smlnum = safmin*( real( n ) / ulp )
359 *
360 * ==== Use accumulated reflections to update far-from-diagonal
361 * . entries ? ====
362 *
363  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364 *
365 * ==== clear trash ====
366 *
367  IF( ktop+2.LE.kbot )
368  $ h( ktop+2, ktop ) = zero
369 *
370 * ==== NBMPS = number of 2-shift bulges in the chain ====
371 *
372  nbmps = ns / 2
373 *
374 * ==== KDU = width of slab ====
375 *
376  kdu = 4*nbmps
377 *
378 * ==== Create and chase chains of NBMPS bulges ====
379 *
380  DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381 *
382 * JTOP = Index from which updates from the right start.
383 *
384  IF( accum ) THEN
385  jtop = max( ktop, incol )
386  ELSE IF( wantt ) THEN
387  jtop = 1
388  ELSE
389  jtop = ktop
390  END IF
391 *
392  ndcol = incol + kdu
393  IF( accum )
394  $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
395 *
396 * ==== Near-the-diagonal bulge chase. The following loop
397 * . performs the near-the-diagonal part of a small bulge
398 * . multi-shift QR sweep. Each 4*NBMPS column diagonal
399 * . chunk extends from column INCOL to column NDCOL
400 * . (including both column INCOL and column NDCOL). The
401 * . following loop chases a 2*NBMPS+1 column long chain of
402 * . NBMPS bulges 2*NBMPS-1 columns to the right. (INCOL
403 * . may be less than KTOP and and NDCOL may be greater than
404 * . KBOT indicating phantom columns from which to chase
405 * . bulges before they are actually introduced or to which
406 * . to chase bulges beyond column KBOT.) ====
407 *
408  DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409 *
410 * ==== Bulges number MTOP to MBOT are active double implicit
411 * . shift bulges. There may or may not also be small
412 * . 2-by-2 bulge, if there is room. The inactive bulges
413 * . (if any) must wait until the active bulges have moved
414 * . down the diagonal to make room. The phantom matrix
415 * . paradigm described above helps keep track. ====
416 *
417  mtop = max( 1, ( ktop-krcol ) / 2+1 )
418  mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419  m22 = mbot + 1
420  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421  $ ( kbot-2 )
422 *
423 * ==== Generate reflections to chase the chain right
424 * . one column. (The minimum value of K is KTOP-1.) ====
425 *
426  IF ( bmp22 ) THEN
427 *
428 * ==== Special case: 2-by-2 reflection at bottom treated
429 * . separately ====
430 *
431  k = krcol + 2*( m22-1 )
432  IF( k.EQ.ktop-1 ) THEN
433  CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435  $ v( 1, m22 ) )
436  beta = v( 1, m22 )
437  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438  ELSE
439  beta = h( k+1, k )
440  v( 2, m22 ) = h( k+2, k )
441  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442  h( k+1, k ) = beta
443  h( k+2, k ) = zero
444  END IF
445 
446 *
447 * ==== Perform update from right within
448 * . computational window. ====
449 *
450  DO 30 j = jtop, min( kbot, k+3 )
451  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
452  $ h( j, k+2 ) )
453  h( j, k+1 ) = h( j, k+1 ) - refsum
454  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
455  30 CONTINUE
456 *
457 * ==== Perform update from left within
458 * . computational window. ====
459 *
460  IF( accum ) THEN
461  jbot = min( ndcol, kbot )
462  ELSE IF( wantt ) THEN
463  jbot = n
464  ELSE
465  jbot = kbot
466  END IF
467  DO 40 j = k+1, jbot
468  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
469  $ h( k+2, j ) )
470  h( k+1, j ) = h( k+1, j ) - refsum
471  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
472  40 CONTINUE
473 *
474 * ==== The following convergence test requires that
475 * . the tradition small-compared-to-nearby-diagonals
476 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
477 * . criteria both be satisfied. The latter improves
478 * . accuracy in some examples. Falling back on an
479 * . alternate convergence criterion when TST1 or TST2
480 * . is zero (as done here) is traditional but probably
481 * . unnecessary. ====
482 *
483  IF( k.GE.ktop ) THEN
484  IF( h( k+1, k ).NE.zero ) THEN
485  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
486  IF( tst1.EQ.zero ) THEN
487  IF( k.GE.ktop+1 )
488  $ tst1 = tst1 + abs( h( k, k-1 ) )
489  IF( k.GE.ktop+2 )
490  $ tst1 = tst1 + abs( h( k, k-2 ) )
491  IF( k.GE.ktop+3 )
492  $ tst1 = tst1 + abs( h( k, k-3 ) )
493  IF( k.LE.kbot-2 )
494  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
495  IF( k.LE.kbot-3 )
496  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
497  IF( k.LE.kbot-4 )
498  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
499  END IF
500  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
501  $ THEN
502  h12 = max( abs( h( k+1, k ) ),
503  $ abs( h( k, k+1 ) ) )
504  h21 = min( abs( h( k+1, k ) ),
505  $ abs( h( k, k+1 ) ) )
506  h11 = max( abs( h( k+1, k+1 ) ),
507  $ abs( h( k, k )-h( k+1, k+1 ) ) )
508  h22 = min( abs( h( k+1, k+1 ) ),
509  $ abs( h( k, k )-h( k+1, k+1 ) ) )
510  scl = h11 + h12
511  tst2 = h22*( h11 / scl )
512 *
513  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
514  $ max( smlnum, ulp*tst2 ) ) THEN
515  h( k+1, k ) = zero
516  END IF
517  END IF
518  END IF
519  END IF
520 *
521 * ==== Accumulate orthogonal transformations. ====
522 *
523  IF( accum ) THEN
524  kms = k - incol
525  DO 50 j = max( 1, ktop-incol ), kdu
526  refsum = v( 1, m22 )*( u( j, kms+1 )+
527  $ v( 2, m22 )*u( j, kms+2 ) )
528  u( j, kms+1 ) = u( j, kms+1 ) - refsum
529  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m22 )
530  50 CONTINUE
531  ELSE IF( wantz ) THEN
532  DO 60 j = iloz, ihiz
533  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
534  $ z( j, k+2 ) )
535  z( j, k+1 ) = z( j, k+1 ) - refsum
536  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
537  60 CONTINUE
538  END IF
539  END IF
540 *
541 * ==== Normal case: Chain of 3-by-3 reflections ====
542 *
543  DO 80 m = mbot, mtop, -1
544  k = krcol + 2*( m-1 )
545  IF( k.EQ.ktop-1 ) THEN
546  CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
547  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
548  $ v( 1, m ) )
549  alpha = v( 1, m )
550  CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
551  ELSE
552 *
553 * ==== Perform delayed transformation of row below
554 * . Mth bulge. Exploit fact that first two elements
555 * . of row are actually zero. ====
556 *
557  refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
558  h( k+3, k ) = -refsum
559  h( k+3, k+1 ) = -refsum*v( 2, m )
560  h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
561 *
562 * ==== Calculate reflection to move
563 * . Mth bulge one step. ====
564 *
565  beta = h( k+1, k )
566  v( 2, m ) = h( k+2, k )
567  v( 3, m ) = h( k+3, k )
568  CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
569 *
570 * ==== A Bulge may collapse because of vigilant
571 * . deflation or destructive underflow. In the
572 * . underflow case, try the two-small-subdiagonals
573 * . trick to try to reinflate the bulge. ====
574 *
575  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
576  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
577 *
578 * ==== Typical case: not collapsed (yet). ====
579 *
580  h( k+1, k ) = beta
581  h( k+2, k ) = zero
582  h( k+3, k ) = zero
583  ELSE
584 *
585 * ==== Atypical case: collapsed. Attempt to
586 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
587 * . If the fill resulting from the new
588 * . reflector is too large, then abandon it.
589 * . Otherwise, use the new one. ====
590 *
591  CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
592  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
593  $ vt )
594  alpha = vt( 1 )
595  CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
596  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
597  $ h( k+2, k ) )
598 *
599  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
600  $ abs( refsum*vt( 3 ) ).GT.ulp*
601  $ ( abs( h( k, k ) )+abs( h( k+1,
602  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
603 *
604 * ==== Starting a new bulge here would
605 * . create non-negligible fill. Use
606 * . the old one with trepidation. ====
607 *
608  h( k+1, k ) = beta
609  h( k+2, k ) = zero
610  h( k+3, k ) = zero
611  ELSE
612 *
613 * ==== Starting a new bulge here would
614 * . create only negligible fill.
615 * . Replace the old reflector with
616 * . the new one. ====
617 *
618  h( k+1, k ) = h( k+1, k ) - refsum
619  h( k+2, k ) = zero
620  h( k+3, k ) = zero
621  v( 1, m ) = vt( 1 )
622  v( 2, m ) = vt( 2 )
623  v( 3, m ) = vt( 3 )
624  END IF
625  END IF
626  END IF
627 *
628 * ==== Apply reflection from the right and
629 * . the first column of update from the left.
630 * . These updates are required for the vigilant
631 * . deflation check. We still delay most of the
632 * . updates from the left for efficiency. ====
633 *
634  DO 70 j = jtop, min( kbot, k+3 )
635  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
636  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
637  h( j, k+1 ) = h( j, k+1 ) - refsum
638  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
639  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
640  70 CONTINUE
641 *
642 * ==== Perform update from left for subsequent
643 * . column. ====
644 *
645  refsum = v( 1, m )*( h( k+1, k+1 )+v( 2, m )*
646  $ h( k+2, k+1 )+v( 3, m )*h( k+3, k+1 ) )
647  h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
648  h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m )
649  h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m )
650 *
651 * ==== The following convergence test requires that
652 * . the tradition small-compared-to-nearby-diagonals
653 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
654 * . criteria both be satisfied. The latter improves
655 * . accuracy in some examples. Falling back on an
656 * . alternate convergence criterion when TST1 or TST2
657 * . is zero (as done here) is traditional but probably
658 * . unnecessary. ====
659 *
660  IF( k.LT.ktop)
661  $ cycle
662  IF( h( k+1, k ).NE.zero ) THEN
663  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
664  IF( tst1.EQ.zero ) THEN
665  IF( k.GE.ktop+1 )
666  $ tst1 = tst1 + abs( h( k, k-1 ) )
667  IF( k.GE.ktop+2 )
668  $ tst1 = tst1 + abs( h( k, k-2 ) )
669  IF( k.GE.ktop+3 )
670  $ tst1 = tst1 + abs( h( k, k-3 ) )
671  IF( k.LE.kbot-2 )
672  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
673  IF( k.LE.kbot-3 )
674  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
675  IF( k.LE.kbot-4 )
676  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
677  END IF
678  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
679  $ THEN
680  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
681  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
682  h11 = max( abs( h( k+1, k+1 ) ),
683  $ abs( h( k, k )-h( k+1, k+1 ) ) )
684  h22 = min( abs( h( k+1, k+1 ) ),
685  $ abs( h( k, k )-h( k+1, k+1 ) ) )
686  scl = h11 + h12
687  tst2 = h22*( h11 / scl )
688 *
689  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
690  $ max( smlnum, ulp*tst2 ) ) THEN
691  h( k+1, k ) = zero
692  END IF
693  END IF
694  END IF
695  80 CONTINUE
696 *
697 * ==== Multiply H by reflections from the left ====
698 *
699  IF( accum ) THEN
700  jbot = min( ndcol, kbot )
701  ELSE IF( wantt ) THEN
702  jbot = n
703  ELSE
704  jbot = kbot
705  END IF
706 *
707  DO 100 m = mbot, mtop, -1
708  k = krcol + 2*( m-1 )
709  DO 90 j = max( ktop, krcol + 2*m ), jbot
710  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
711  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
712  h( k+1, j ) = h( k+1, j ) - refsum
713  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
714  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
715  90 CONTINUE
716  100 CONTINUE
717 *
718 * ==== Accumulate orthogonal transformations. ====
719 *
720  IF( accum ) THEN
721 *
722 * ==== Accumulate U. (If needed, update Z later
723 * . with an efficient matrix-matrix
724 * . multiply.) ====
725 *
726  DO 120 m = mbot, mtop, -1
727  k = krcol + 2*( m-1 )
728  kms = k - incol
729  i2 = max( 1, ktop-incol )
730  i2 = max( i2, kms-(krcol-incol)+1 )
731  i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
732  DO 110 j = i2, i4
733  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
734  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
735  u( j, kms+1 ) = u( j, kms+1 ) - refsum
736  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
737  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
738  110 CONTINUE
739  120 CONTINUE
740  ELSE IF( wantz ) THEN
741 *
742 * ==== U is not accumulated, so update Z
743 * . now by multiplying by reflections
744 * . from the right. ====
745 *
746  DO 140 m = mbot, mtop, -1
747  k = krcol + 2*( m-1 )
748  DO 130 j = iloz, ihiz
749  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
750  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
751  z( j, k+1 ) = z( j, k+1 ) - refsum
752  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
753  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
754  130 CONTINUE
755  140 CONTINUE
756  END IF
757 *
758 * ==== End of near-the-diagonal bulge chase. ====
759 *
760  145 CONTINUE
761 *
762 * ==== Use U (if accumulated) to update far-from-diagonal
763 * . entries in H. If required, use U to update Z as
764 * . well. ====
765 *
766  IF( accum ) THEN
767  IF( wantt ) THEN
768  jtop = 1
769  jbot = n
770  ELSE
771  jtop = ktop
772  jbot = kbot
773  END IF
774  k1 = max( 1, ktop-incol )
775  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
776 *
777 * ==== Horizontal Multiply ====
778 *
779  DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
780  jlen = min( nh, jbot-jcol+1 )
781  CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
782  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
783  $ ldwh )
784  CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
785  $ h( incol+k1, jcol ), ldh )
786  150 CONTINUE
787 *
788 * ==== Vertical multiply ====
789 *
790  DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
791  jlen = min( nv, max( ktop, incol )-jrow )
792  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
793  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
794  $ ldu, zero, wv, ldwv )
795  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
796  $ h( jrow, incol+k1 ), ldh )
797  160 CONTINUE
798 *
799 * ==== Z multiply (also vertical) ====
800 *
801  IF( wantz ) THEN
802  DO 170 jrow = iloz, ihiz, nv
803  jlen = min( nv, ihiz-jrow+1 )
804  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
805  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
806  $ ldu, zero, wv, ldwv )
807  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
808  $ z( jrow, incol+k1 ), ldz )
809  170 CONTINUE
810  END IF
811  END IF
812  180 CONTINUE
813 *
814 * ==== End of SLAQR5 ====
815 *
subroutine slabad(SMALL, LARGE)
SLABAD
Definition: slabad.f:74
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:106
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: slaqr1.f:121
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:177
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:68
Here is the call graph for this function:
Here is the caller graph for this function: