LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
dlaqr5.f
Go to the documentation of this file.
1 *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22 * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23 * LDU, NV, WV, LDWV, NH, WH, LDWH )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
29 * ..
30 * .. Array Arguments ..
31 * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32 * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33 * $ Z( LDZ, * )
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> DLAQR5, called by DLAQR0, performs a
43 *> single small-bulge multi-shift QR sweep.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] WANTT
50 *> \verbatim
51 *> WANTT is LOGICAL
52 *> WANTT = .true. if the quasi-triangular Schur factor
53 *> is being computed. WANTT is set to .false. otherwise.
54 *> \endverbatim
55 *>
56 *> \param[in] WANTZ
57 *> \verbatim
58 *> WANTZ is LOGICAL
59 *> WANTZ = .true. if the orthogonal Schur factor is being
60 *> computed. WANTZ is set to .false. otherwise.
61 *> \endverbatim
62 *>
63 *> \param[in] KACC22
64 *> \verbatim
65 *> KACC22 is INTEGER with value 0, 1, or 2.
66 *> Specifies the computation mode of far-from-diagonal
67 *> orthogonal updates.
68 *> = 0: DLAQR5 does not accumulate reflections and does not
69 *> use matrix-matrix multiply to update far-from-diagonal
70 *> matrix entries.
71 *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72 *> multiply to update the far-from-diagonal matrix entries.
73 *> = 2: Same as KACC22 = 1. This option used to enable exploiting
74 *> the 2-by-2 structure during matrix multiplications, but
75 *> this is no longer supported.
76 *> \endverbatim
77 *>
78 *> \param[in] N
79 *> \verbatim
80 *> N is INTEGER
81 *> N is the order of the Hessenberg matrix H upon which this
82 *> subroutine operates.
83 *> \endverbatim
84 *>
85 *> \param[in] KTOP
86 *> \verbatim
87 *> KTOP is INTEGER
88 *> \endverbatim
89 *>
90 *> \param[in] KBOT
91 *> \verbatim
92 *> KBOT is INTEGER
93 *> These are the first and last rows and columns of an
94 *> isolated diagonal block upon which the QR sweep is to be
95 *> applied. It is assumed without a check that
96 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97 *> and
98 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
99 *> \endverbatim
100 *>
101 *> \param[in] NSHFTS
102 *> \verbatim
103 *> NSHFTS is INTEGER
104 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
105 *> must be positive and even.
106 *> \endverbatim
107 *>
108 *> \param[in,out] SR
109 *> \verbatim
110 *> SR is DOUBLE PRECISION array, dimension (NSHFTS)
111 *> \endverbatim
112 *>
113 *> \param[in,out] SI
114 *> \verbatim
115 *> SI is DOUBLE PRECISION array, dimension (NSHFTS)
116 *> SR contains the real parts and SI contains the imaginary
117 *> parts of the NSHFTS shifts of origin that define the
118 *> multi-shift QR sweep. On output SR and SI may be
119 *> reordered.
120 *> \endverbatim
121 *>
122 *> \param[in,out] H
123 *> \verbatim
124 *> H is DOUBLE PRECISION array, dimension (LDH,N)
125 *> On input H contains a Hessenberg matrix. On output a
126 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
127 *> to the isolated diagonal block in rows and columns KTOP
128 *> through KBOT.
129 *> \endverbatim
130 *>
131 *> \param[in] LDH
132 *> \verbatim
133 *> LDH is INTEGER
134 *> LDH is the leading dimension of H just as declared in the
135 *> calling procedure. LDH >= MAX(1,N).
136 *> \endverbatim
137 *>
138 *> \param[in] ILOZ
139 *> \verbatim
140 *> ILOZ is INTEGER
141 *> \endverbatim
142 *>
143 *> \param[in] IHIZ
144 *> \verbatim
145 *> IHIZ is INTEGER
146 *> Specify the rows of Z to which transformations must be
147 *> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
148 *> \endverbatim
149 *>
150 *> \param[in,out] Z
151 *> \verbatim
152 *> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
153 *> If WANTZ = .TRUE., then the QR Sweep orthogonal
154 *> similarity transformation is accumulated into
155 *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
156 *> If WANTZ = .FALSE., then Z is unreferenced.
157 *> \endverbatim
158 *>
159 *> \param[in] LDZ
160 *> \verbatim
161 *> LDZ is INTEGER
162 *> LDA is the leading dimension of Z just as declared in
163 *> the calling procedure. LDZ >= N.
164 *> \endverbatim
165 *>
166 *> \param[out] V
167 *> \verbatim
168 *> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
169 *> \endverbatim
170 *>
171 *> \param[in] LDV
172 *> \verbatim
173 *> LDV is INTEGER
174 *> LDV is the leading dimension of V as declared in the
175 *> calling procedure. LDV >= 3.
176 *> \endverbatim
177 *>
178 *> \param[out] U
179 *> \verbatim
180 *> U is DOUBLE PRECISION array, dimension (LDU,2*NSHFTS)
181 *> \endverbatim
182 *>
183 *> \param[in] LDU
184 *> \verbatim
185 *> LDU is INTEGER
186 *> LDU is the leading dimension of U just as declared in the
187 *> in the calling subroutine. LDU >= 2*NSHFTS.
188 *> \endverbatim
189 *>
190 *> \param[in] NV
191 *> \verbatim
192 *> NV is INTEGER
193 *> NV is the number of rows in WV agailable for workspace.
194 *> NV >= 1.
195 *> \endverbatim
196 *>
197 *> \param[out] WV
198 *> \verbatim
199 *> WV is DOUBLE PRECISION array, dimension (LDWV,2*NSHFTS)
200 *> \endverbatim
201 *>
202 *> \param[in] LDWV
203 *> \verbatim
204 *> LDWV is INTEGER
205 *> LDWV is the leading dimension of WV as declared in the
206 *> in the calling subroutine. LDWV >= NV.
207 *> \endverbatim
208 *
209 *> \param[in] NH
210 *> \verbatim
211 *> NH is INTEGER
212 *> NH is the number of columns in array WH available for
213 *> workspace. NH >= 1.
214 *> \endverbatim
215 *>
216 *> \param[out] WH
217 *> \verbatim
218 *> WH is DOUBLE PRECISION array, dimension (LDWH,NH)
219 *> \endverbatim
220 *>
221 *> \param[in] LDWH
222 *> \verbatim
223 *> LDWH is INTEGER
224 *> Leading dimension of WH just as declared in the
225 *> calling procedure. LDWH >= 2*NSHFTS.
226 *> \endverbatim
227 *>
228 * Authors:
229 * ========
230 *
231 *> \author Univ. of Tennessee
232 *> \author Univ. of California Berkeley
233 *> \author Univ. of Colorado Denver
234 *> \author NAG Ltd.
235 *
236 *> \ingroup doubleOTHERauxiliary
237 *
238 *> \par Contributors:
239 * ==================
240 *>
241 *> Karen Braman and Ralph Byers, Department of Mathematics,
242 *> University of Kansas, USA
243 *>
244 *> Lars Karlsson, Daniel Kressner, and Bruno Lang
245 *>
246 *> Thijs Steel, Department of Computer science,
247 *> KU Leuven, Belgium
248 *
249 *> \par References:
250 * ================
251 *>
252 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255 *> 929--947, 2002.
256 *>
257 *> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
258 *> chains of bulges in multishift QR algorithms.
259 *> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
260 *>
261 * =====================================================================
262  SUBROUTINE dlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
263  $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
264  $ LDU, NV, WV, LDWV, NH, WH, LDWH )
265  IMPLICIT NONE
266 *
267 * -- LAPACK auxiliary routine --
268 * -- LAPACK is a software package provided by Univ. of Tennessee, --
269 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270 *
271 * .. Scalar Arguments ..
272  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273  $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274  LOGICAL WANTT, WANTZ
275 * ..
276 * .. Array Arguments ..
277  DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278  $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279  $ z( ldz, * )
280 * ..
281 *
282 * ================================================================
283 * .. Parameters ..
284  DOUBLE PRECISION ZERO, ONE
285  PARAMETER ( ZERO = 0.0d0, one = 1.0d0 )
286 * ..
287 * .. Local Scalars ..
288  DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289  $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
290  $ ulp
291  INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292  $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293  $ m, m22, mbot, mtop, nbmps, ndcol,
294  $ ns, nu
295  LOGICAL ACCUM, BMP22
296 * ..
297 * .. External Functions ..
298  DOUBLE PRECISION DLAMCH
299  EXTERNAL DLAMCH
300 * ..
301 * .. Intrinsic Functions ..
302 *
303  INTRINSIC abs, dble, max, min, mod
304 * ..
305 * .. Local Arrays ..
306  DOUBLE PRECISION VT( 3 )
307 * ..
308 * .. External Subroutines ..
309  EXTERNAL dgemm, dlabad, dlacpy, dlaqr1, dlarfg, dlaset,
310  $ dtrmm
311 * ..
312 * .. Executable Statements ..
313 *
314 * ==== If there are no shifts, then there is nothing to do. ====
315 *
316  IF( nshfts.LT.2 )
317  $ RETURN
318 *
319 * ==== If the active block is empty or 1-by-1, then there
320 * . is nothing to do. ====
321 *
322  IF( ktop.GE.kbot )
323  $ RETURN
324 *
325 * ==== Shuffle shifts into pairs of real shifts and pairs
326 * . of complex conjugate shifts assuming complex
327 * . conjugate shifts are already adjacent to one
328 * . another. ====
329 *
330  DO 10 i = 1, nshfts - 2, 2
331  IF( si( i ).NE.-si( i+1 ) ) THEN
332 *
333  swap = sr( i )
334  sr( i ) = sr( i+1 )
335  sr( i+1 ) = sr( i+2 )
336  sr( i+2 ) = swap
337 *
338  swap = si( i )
339  si( i ) = si( i+1 )
340  si( i+1 ) = si( i+2 )
341  si( i+2 ) = swap
342  END IF
343  10 CONTINUE
344 *
345 * ==== NSHFTS is supposed to be even, but if it is odd,
346 * . then simply reduce it by one. The shuffle above
347 * . ensures that the dropped shift is real and that
348 * . the remaining shifts are paired. ====
349 *
350  ns = nshfts - mod( nshfts, 2 )
351 *
352 * ==== Machine constants for deflation ====
353 *
354  safmin = dlamch( 'SAFE MINIMUM' )
355  safmax = one / safmin
356  CALL dlabad( safmin, safmax )
357  ulp = dlamch( 'PRECISION' )
358  smlnum = safmin*( dble( n ) / ulp )
359 *
360 * ==== Use accumulated reflections to update far-from-diagonal
361 * . entries ? ====
362 *
363  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364 *
365 * ==== clear trash ====
366 *
367  IF( ktop+2.LE.kbot )
368  $ h( ktop+2, ktop ) = zero
369 *
370 * ==== NBMPS = number of 2-shift bulges in the chain ====
371 *
372  nbmps = ns / 2
373 *
374 * ==== KDU = width of slab ====
375 *
376  kdu = 4*nbmps
377 *
378 * ==== Create and chase chains of NBMPS bulges ====
379 *
380  DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381 *
382 * JTOP = Index from which updates from the right start.
383 *
384  IF( accum ) THEN
385  jtop = max( ktop, incol )
386  ELSE IF( wantt ) THEN
387  jtop = 1
388  ELSE
389  jtop = ktop
390  END IF
391 *
392  ndcol = incol + kdu
393  IF( accum )
394  $ CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
395 *
396 * ==== Near-the-diagonal bulge chase. The following loop
397 * . performs the near-the-diagonal part of a small bulge
398 * . multi-shift QR sweep. Each 4*NBMPS column diagonal
399 * . chunk extends from column INCOL to column NDCOL
400 * . (including both column INCOL and column NDCOL). The
401 * . following loop chases a 2*NBMPS+1 column long chain of
402 * . NBMPS bulges 2*NBMPS columns to the right. (INCOL
403 * . may be less than KTOP and and NDCOL may be greater than
404 * . KBOT indicating phantom columns from which to chase
405 * . bulges before they are actually introduced or to which
406 * . to chase bulges beyond column KBOT.) ====
407 *
408  DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409 *
410 * ==== Bulges number MTOP to MBOT are active double implicit
411 * . shift bulges. There may or may not also be small
412 * . 2-by-2 bulge, if there is room. The inactive bulges
413 * . (if any) must wait until the active bulges have moved
414 * . down the diagonal to make room. The phantom matrix
415 * . paradigm described above helps keep track. ====
416 *
417  mtop = max( 1, ( ktop-krcol ) / 2+1 )
418  mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419  m22 = mbot + 1
420  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421  $ ( kbot-2 )
422 *
423 * ==== Generate reflections to chase the chain right
424 * . one column. (The minimum value of K is KTOP-1.) ====
425 *
426  IF ( bmp22 ) THEN
427 *
428 * ==== Special case: 2-by-2 reflection at bottom treated
429 * . separately ====
430 *
431  k = krcol + 2*( m22-1 )
432  IF( k.EQ.ktop-1 ) THEN
433  CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435  $ v( 1, m22 ) )
436  beta = v( 1, m22 )
437  CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438  ELSE
439  beta = h( k+1, k )
440  v( 2, m22 ) = h( k+2, k )
441  CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442  h( k+1, k ) = beta
443  h( k+2, k ) = zero
444  END IF
445 
446 *
447 * ==== Perform update from right within
448 * . computational window. ====
449 *
450  DO 30 j = jtop, min( kbot, k+3 )
451  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
452  $ h( j, k+2 ) )
453  h( j, k+1 ) = h( j, k+1 ) - refsum
454  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
455  30 CONTINUE
456 *
457 * ==== Perform update from left within
458 * . computational window. ====
459 *
460  IF( accum ) THEN
461  jbot = min( ndcol, kbot )
462  ELSE IF( wantt ) THEN
463  jbot = n
464  ELSE
465  jbot = kbot
466  END IF
467  DO 40 j = k+1, jbot
468  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
469  $ h( k+2, j ) )
470  h( k+1, j ) = h( k+1, j ) - refsum
471  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
472  40 CONTINUE
473 *
474 * ==== The following convergence test requires that
475 * . the tradition small-compared-to-nearby-diagonals
476 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
477 * . criteria both be satisfied. The latter improves
478 * . accuracy in some examples. Falling back on an
479 * . alternate convergence criterion when TST1 or TST2
480 * . is zero (as done here) is traditional but probably
481 * . unnecessary. ====
482 *
483  IF( k.GE.ktop ) THEN
484  IF( h( k+1, k ).NE.zero ) THEN
485  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
486  IF( tst1.EQ.zero ) THEN
487  IF( k.GE.ktop+1 )
488  $ tst1 = tst1 + abs( h( k, k-1 ) )
489  IF( k.GE.ktop+2 )
490  $ tst1 = tst1 + abs( h( k, k-2 ) )
491  IF( k.GE.ktop+3 )
492  $ tst1 = tst1 + abs( h( k, k-3 ) )
493  IF( k.LE.kbot-2 )
494  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
495  IF( k.LE.kbot-3 )
496  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
497  IF( k.LE.kbot-4 )
498  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
499  END IF
500  IF( abs( h( k+1, k ) )
501  $ .LE.max( smlnum, ulp*tst1 ) ) THEN
502  h12 = max( abs( h( k+1, k ) ),
503  $ abs( h( k, k+1 ) ) )
504  h21 = min( abs( h( k+1, k ) ),
505  $ abs( h( k, k+1 ) ) )
506  h11 = max( abs( h( k+1, k+1 ) ),
507  $ abs( h( k, k )-h( k+1, k+1 ) ) )
508  h22 = min( abs( h( k+1, k+1 ) ),
509  $ abs( h( k, k )-h( k+1, k+1 ) ) )
510  scl = h11 + h12
511  tst2 = h22*( h11 / scl )
512 *
513  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
514  $ max( smlnum, ulp*tst2 ) ) THEN
515  h( k+1, k ) = zero
516  END IF
517  END IF
518  END IF
519  END IF
520 *
521 * ==== Accumulate orthogonal transformations. ====
522 *
523  IF( accum ) THEN
524  kms = k - incol
525  DO 50 j = max( 1, ktop-incol ), kdu
526  refsum = v( 1, m22 )*( u( j, kms+1 )+
527  $ v( 2, m22 )*u( j, kms+2 ) )
528  u( j, kms+1 ) = u( j, kms+1 ) - refsum
529  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m22 )
530  50 CONTINUE
531  ELSE IF( wantz ) THEN
532  DO 60 j = iloz, ihiz
533  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
534  $ z( j, k+2 ) )
535  z( j, k+1 ) = z( j, k+1 ) - refsum
536  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
537  60 CONTINUE
538  END IF
539  END IF
540 *
541 * ==== Normal case: Chain of 3-by-3 reflections ====
542 *
543  DO 80 m = mbot, mtop, -1
544  k = krcol + 2*( m-1 )
545  IF( k.EQ.ktop-1 ) THEN
546  CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
547  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
548  $ v( 1, m ) )
549  alpha = v( 1, m )
550  CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
551  ELSE
552 *
553 * ==== Perform delayed transformation of row below
554 * . Mth bulge. Exploit fact that first two elements
555 * . of row are actually zero. ====
556 *
557  refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
558  h( k+3, k ) = -refsum
559  h( k+3, k+1 ) = -refsum*v( 2, m )
560  h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
561 *
562 * ==== Calculate reflection to move
563 * . Mth bulge one step. ====
564 *
565  beta = h( k+1, k )
566  v( 2, m ) = h( k+2, k )
567  v( 3, m ) = h( k+3, k )
568  CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
569 *
570 * ==== A Bulge may collapse because of vigilant
571 * . deflation or destructive underflow. In the
572 * . underflow case, try the two-small-subdiagonals
573 * . trick to try to reinflate the bulge. ====
574 *
575  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
576  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
577 *
578 * ==== Typical case: not collapsed (yet). ====
579 *
580  h( k+1, k ) = beta
581  h( k+2, k ) = zero
582  h( k+3, k ) = zero
583  ELSE
584 *
585 * ==== Atypical case: collapsed. Attempt to
586 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
587 * . If the fill resulting from the new
588 * . reflector is too large, then abandon it.
589 * . Otherwise, use the new one. ====
590 *
591  CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
592  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
593  $ vt )
594  alpha = vt( 1 )
595  CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
596  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
597  $ h( k+2, k ) )
598 *
599  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
600  $ abs( refsum*vt( 3 ) ).GT.ulp*
601  $ ( abs( h( k, k ) )+abs( h( k+1,
602  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
603 *
604 * ==== Starting a new bulge here would
605 * . create non-negligible fill. Use
606 * . the old one with trepidation. ====
607 *
608  h( k+1, k ) = beta
609  h( k+2, k ) = zero
610  h( k+3, k ) = zero
611  ELSE
612 *
613 * ==== Starting a new bulge here would
614 * . create only negligible fill.
615 * . Replace the old reflector with
616 * . the new one. ====
617 *
618  h( k+1, k ) = h( k+1, k ) - refsum
619  h( k+2, k ) = zero
620  h( k+3, k ) = zero
621  v( 1, m ) = vt( 1 )
622  v( 2, m ) = vt( 2 )
623  v( 3, m ) = vt( 3 )
624  END IF
625  END IF
626  END IF
627 *
628 * ==== Apply reflection from the right and
629 * . the first column of update from the left.
630 * . These updates are required for the vigilant
631 * . deflation check. We still delay most of the
632 * . updates from the left for efficiency. ====
633 *
634  DO 70 j = jtop, min( kbot, k+3 )
635  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
636  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
637  h( j, k+1 ) = h( j, k+1 ) - refsum
638  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
639  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
640  70 CONTINUE
641 *
642 * ==== Perform update from left for subsequent
643 * . column. ====
644 *
645  refsum = v( 1, m )*( h( k+1, k+1 )+v( 2, m )*
646  $ h( k+2, k+1 )+v( 3, m )*h( k+3, k+1 ) )
647  h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
648  h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m )
649  h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m )
650 *
651 * ==== The following convergence test requires that
652 * . the tradition small-compared-to-nearby-diagonals
653 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
654 * . criteria both be satisfied. The latter improves
655 * . accuracy in some examples. Falling back on an
656 * . alternate convergence criterion when TST1 or TST2
657 * . is zero (as done here) is traditional but probably
658 * . unnecessary. ====
659 *
660  IF( k.LT.ktop)
661  $ cycle
662  IF( h( k+1, k ).NE.zero ) THEN
663  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
664  IF( tst1.EQ.zero ) THEN
665  IF( k.GE.ktop+1 )
666  $ tst1 = tst1 + abs( h( k, k-1 ) )
667  IF( k.GE.ktop+2 )
668  $ tst1 = tst1 + abs( h( k, k-2 ) )
669  IF( k.GE.ktop+3 )
670  $ tst1 = tst1 + abs( h( k, k-3 ) )
671  IF( k.LE.kbot-2 )
672  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
673  IF( k.LE.kbot-3 )
674  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
675  IF( k.LE.kbot-4 )
676  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
677  END IF
678  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
679  $ THEN
680  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
681  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
682  h11 = max( abs( h( k+1, k+1 ) ),
683  $ abs( h( k, k )-h( k+1, k+1 ) ) )
684  h22 = min( abs( h( k+1, k+1 ) ),
685  $ abs( h( k, k )-h( k+1, k+1 ) ) )
686  scl = h11 + h12
687  tst2 = h22*( h11 / scl )
688 *
689  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
690  $ max( smlnum, ulp*tst2 ) ) THEN
691  h( k+1, k ) = zero
692  END IF
693  END IF
694  END IF
695  80 CONTINUE
696 *
697 * ==== Multiply H by reflections from the left ====
698 *
699  IF( accum ) THEN
700  jbot = min( ndcol, kbot )
701  ELSE IF( wantt ) THEN
702  jbot = n
703  ELSE
704  jbot = kbot
705  END IF
706 *
707  DO 100 m = mbot, mtop, -1
708  k = krcol + 2*( m-1 )
709  DO 90 j = max( ktop, krcol + 2*m ), jbot
710  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
711  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
712  h( k+1, j ) = h( k+1, j ) - refsum
713  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
714  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
715  90 CONTINUE
716  100 CONTINUE
717 *
718 * ==== Accumulate orthogonal transformations. ====
719 *
720  IF( accum ) THEN
721 *
722 * ==== Accumulate U. (If needed, update Z later
723 * . with an efficient matrix-matrix
724 * . multiply.) ====
725 *
726  DO 120 m = mbot, mtop, -1
727  k = krcol + 2*( m-1 )
728  kms = k - incol
729  i2 = max( 1, ktop-incol )
730  i2 = max( i2, kms-(krcol-incol)+1 )
731  i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
732  DO 110 j = i2, i4
733  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
734  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
735  u( j, kms+1 ) = u( j, kms+1 ) - refsum
736  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
737  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
738  110 CONTINUE
739  120 CONTINUE
740  ELSE IF( wantz ) THEN
741 *
742 * ==== U is not accumulated, so update Z
743 * . now by multiplying by reflections
744 * . from the right. ====
745 *
746  DO 140 m = mbot, mtop, -1
747  k = krcol + 2*( m-1 )
748  DO 130 j = iloz, ihiz
749  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
750  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
751  z( j, k+1 ) = z( j, k+1 ) - refsum
752  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
753  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
754  130 CONTINUE
755  140 CONTINUE
756  END IF
757 *
758 * ==== End of near-the-diagonal bulge chase. ====
759 *
760  145 CONTINUE
761 *
762 * ==== Use U (if accumulated) to update far-from-diagonal
763 * . entries in H. If required, use U to update Z as
764 * . well. ====
765 *
766  IF( accum ) THEN
767  IF( wantt ) THEN
768  jtop = 1
769  jbot = n
770  ELSE
771  jtop = ktop
772  jbot = kbot
773  END IF
774  k1 = max( 1, ktop-incol )
775  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
776 *
777 * ==== Horizontal Multiply ====
778 *
779  DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
780  jlen = min( nh, jbot-jcol+1 )
781  CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
782  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
783  $ ldwh )
784  CALL dlacpy( 'ALL', nu, jlen, wh, ldwh,
785  $ h( incol+k1, jcol ), ldh )
786  150 CONTINUE
787 *
788 * ==== Vertical multiply ====
789 *
790  DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
791  jlen = min( nv, max( ktop, incol )-jrow )
792  CALL dgemm( 'N', 'N', jlen, nu, nu, one,
793  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
794  $ ldu, zero, wv, ldwv )
795  CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
796  $ h( jrow, incol+k1 ), ldh )
797  160 CONTINUE
798 *
799 * ==== Z multiply (also vertical) ====
800 *
801  IF( wantz ) THEN
802  DO 170 jrow = iloz, ihiz, nv
803  jlen = min( nv, ihiz-jrow+1 )
804  CALL dgemm( 'N', 'N', jlen, nu, nu, one,
805  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
806  $ ldu, zero, wv, ldwv )
807  CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
808  $ z( jrow, incol+k1 ), ldz )
809  170 CONTINUE
810  END IF
811  END IF
812  180 CONTINUE
813 *
814 * ==== End of DLAQR5 ====
815 *
816  END
subroutine dlabad(SMALL, LARGE)
DLABAD
Definition: dlabad.f:74
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: dlaset.f:110
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dtrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
DTRMM
Definition: dtrmm.f:177
subroutine dlaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
DLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: dlaqr5.f:265
subroutine dlaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: dlaqr1.f:121
subroutine dlarfg(N, ALPHA, X, INCX, TAU)
DLARFG generates an elementary reflector (Householder matrix).
Definition: dlarfg.f:106