LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
zlaqr5.f
Go to the documentation of this file.
1 *> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22 * H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23 * WV, LDWV, NH, WH, LDWH )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
29 * ..
30 * .. Array Arguments ..
31 * COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32 * $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33 * ..
34 *
35 *
36 *> \par Purpose:
37 * =============
38 *>
39 *> \verbatim
40 *>
41 *> ZLAQR5, called by ZLAQR0, performs a
42 *> single small-bulge multi-shift QR sweep.
43 *> \endverbatim
44 *
45 * Arguments:
46 * ==========
47 *
48 *> \param[in] WANTT
49 *> \verbatim
50 *> WANTT is LOGICAL
51 *> WANTT = .true. if the triangular Schur factor
52 *> is being computed. WANTT is set to .false. otherwise.
53 *> \endverbatim
54 *>
55 *> \param[in] WANTZ
56 *> \verbatim
57 *> WANTZ is LOGICAL
58 *> WANTZ = .true. if the unitary Schur factor is being
59 *> computed. WANTZ is set to .false. otherwise.
60 *> \endverbatim
61 *>
62 *> \param[in] KACC22
63 *> \verbatim
64 *> KACC22 is INTEGER with value 0, 1, or 2.
65 *> Specifies the computation mode of far-from-diagonal
66 *> orthogonal updates.
67 *> = 0: ZLAQR5 does not accumulate reflections and does not
68 *> use matrix-matrix multiply to update far-from-diagonal
69 *> matrix entries.
70 *> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71 *> multiply to update the far-from-diagonal matrix entries.
72 *> = 2: Same as KACC22 = 1. This option used to enable exploiting
73 *> the 2-by-2 structure during matrix multiplications, but
74 *> this is no longer supported.
75 *> \endverbatim
76 *>
77 *> \param[in] N
78 *> \verbatim
79 *> N is INTEGER
80 *> N is the order of the Hessenberg matrix H upon which this
81 *> subroutine operates.
82 *> \endverbatim
83 *>
84 *> \param[in] KTOP
85 *> \verbatim
86 *> KTOP is INTEGER
87 *> \endverbatim
88 *>
89 *> \param[in] KBOT
90 *> \verbatim
91 *> KBOT is INTEGER
92 *> These are the first and last rows and columns of an
93 *> isolated diagonal block upon which the QR sweep is to be
94 *> applied. It is assumed without a check that
95 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
96 *> and
97 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
98 *> \endverbatim
99 *>
100 *> \param[in] NSHFTS
101 *> \verbatim
102 *> NSHFTS is INTEGER
103 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
104 *> must be positive and even.
105 *> \endverbatim
106 *>
107 *> \param[in,out] S
108 *> \verbatim
109 *> S is COMPLEX*16 array, dimension (NSHFTS)
110 *> S contains the shifts of origin that define the multi-
111 *> shift QR sweep. On output S may be reordered.
112 *> \endverbatim
113 *>
114 *> \param[in,out] H
115 *> \verbatim
116 *> H is COMPLEX*16 array, dimension (LDH,N)
117 *> On input H contains a Hessenberg matrix. On output a
118 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
119 *> to the isolated diagonal block in rows and columns KTOP
120 *> through KBOT.
121 *> \endverbatim
122 *>
123 *> \param[in] LDH
124 *> \verbatim
125 *> LDH is INTEGER
126 *> LDH is the leading dimension of H just as declared in the
127 *> calling procedure. LDH >= MAX(1,N).
128 *> \endverbatim
129 *>
130 *> \param[in] ILOZ
131 *> \verbatim
132 *> ILOZ is INTEGER
133 *> \endverbatim
134 *>
135 *> \param[in] IHIZ
136 *> \verbatim
137 *> IHIZ is INTEGER
138 *> Specify the rows of Z to which transformations must be
139 *> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
140 *> \endverbatim
141 *>
142 *> \param[in,out] Z
143 *> \verbatim
144 *> Z is COMPLEX*16 array, dimension (LDZ,IHIZ)
145 *> If WANTZ = .TRUE., then the QR Sweep unitary
146 *> similarity transformation is accumulated into
147 *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
148 *> If WANTZ = .FALSE., then Z is unreferenced.
149 *> \endverbatim
150 *>
151 *> \param[in] LDZ
152 *> \verbatim
153 *> LDZ is INTEGER
154 *> LDA is the leading dimension of Z just as declared in
155 *> the calling procedure. LDZ >= N.
156 *> \endverbatim
157 *>
158 *> \param[out] V
159 *> \verbatim
160 *> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2)
161 *> \endverbatim
162 *>
163 *> \param[in] LDV
164 *> \verbatim
165 *> LDV is INTEGER
166 *> LDV is the leading dimension of V as declared in the
167 *> calling procedure. LDV >= 3.
168 *> \endverbatim
169 *>
170 *> \param[out] U
171 *> \verbatim
172 *> U is COMPLEX*16 array, dimension (LDU,2*NSHFTS)
173 *> \endverbatim
174 *>
175 *> \param[in] LDU
176 *> \verbatim
177 *> LDU is INTEGER
178 *> LDU is the leading dimension of U just as declared in the
179 *> in the calling subroutine. LDU >= 2*NSHFTS.
180 *> \endverbatim
181 *>
182 *> \param[in] NV
183 *> \verbatim
184 *> NV is INTEGER
185 *> NV is the number of rows in WV agailable for workspace.
186 *> NV >= 1.
187 *> \endverbatim
188 *>
189 *> \param[out] WV
190 *> \verbatim
191 *> WV is COMPLEX*16 array, dimension (LDWV,2*NSHFTS)
192 *> \endverbatim
193 *>
194 *> \param[in] LDWV
195 *> \verbatim
196 *> LDWV is INTEGER
197 *> LDWV is the leading dimension of WV as declared in the
198 *> in the calling subroutine. LDWV >= NV.
199 *> \endverbatim
200 *
201 *> \param[in] NH
202 *> \verbatim
203 *> NH is INTEGER
204 *> NH is the number of columns in array WH available for
205 *> workspace. NH >= 1.
206 *> \endverbatim
207 *>
208 *> \param[out] WH
209 *> \verbatim
210 *> WH is COMPLEX*16 array, dimension (LDWH,NH)
211 *> \endverbatim
212 *>
213 *> \param[in] LDWH
214 *> \verbatim
215 *> LDWH is INTEGER
216 *> Leading dimension of WH just as declared in the
217 *> calling procedure. LDWH >= 2*NSHFTS.
218 *> \endverbatim
219 *>
220 * Authors:
221 * ========
222 *
223 *> \author Univ. of Tennessee
224 *> \author Univ. of California Berkeley
225 *> \author Univ. of Colorado Denver
226 *> \author NAG Ltd.
227 *
228 *> \ingroup complex16OTHERauxiliary
229 *
230 *> \par Contributors:
231 * ==================
232 *>
233 *> Karen Braman and Ralph Byers, Department of Mathematics,
234 *> University of Kansas, USA
235 *>
236 *> Lars Karlsson, Daniel Kressner, and Bruno Lang
237 *>
238 *> Thijs Steel, Department of Computer science,
239 *> KU Leuven, Belgium
240 *
241 *> \par References:
242 * ================
243 *>
244 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
245 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
246 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
247 *> 929--947, 2002.
248 *>
249 *> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
250 *> chains of bulges in multishift QR algorithms.
251 *> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
252 *>
253 * =====================================================================
254  SUBROUTINE zlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
255  $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
256  $ WV, LDWV, NH, WH, LDWH )
257  IMPLICIT NONE
258 *
259 * -- LAPACK auxiliary routine --
260 * -- LAPACK is a software package provided by Univ. of Tennessee, --
261 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
262 *
263 * .. Scalar Arguments ..
264  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
265  $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
266  LOGICAL WANTT, WANTZ
267 * ..
268 * .. Array Arguments ..
269  COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
270  $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
271 * ..
272 *
273 * ================================================================
274 * .. Parameters ..
275  COMPLEX*16 ZERO, ONE
276  PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
277  $ one = ( 1.0d0, 0.0d0 ) )
278  DOUBLE PRECISION RZERO, RONE
279  PARAMETER ( RZERO = 0.0d0, rone = 1.0d0 )
280 * ..
281 * .. Local Scalars ..
282  COMPLEX*16 ALPHA, BETA, CDUM, REFSUM
283  DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
284  $ smlnum, tst1, tst2, ulp
285  INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
286  $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
287  $ m, m22, mbot, mtop, nbmps, ndcol,
288  $ ns, nu
289  LOGICAL ACCUM, BMP22
290 * ..
291 * .. External Functions ..
292  DOUBLE PRECISION DLAMCH
293  EXTERNAL DLAMCH
294 * ..
295 * .. Intrinsic Functions ..
296 *
297  INTRINSIC abs, dble, dconjg, dimag, max, min, mod
298 * ..
299 * .. Local Arrays ..
300  COMPLEX*16 VT( 3 )
301 * ..
302 * .. External Subroutines ..
303  EXTERNAL dlabad, zgemm, zlacpy, zlaqr1, zlarfg, zlaset,
304  $ ztrmm
305 * ..
306 * .. Statement Functions ..
307  DOUBLE PRECISION CABS1
308 * ..
309 * .. Statement Function definitions ..
310  cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
311 * ..
312 * .. Executable Statements ..
313 *
314 * ==== If there are no shifts, then there is nothing to do. ====
315 *
316  IF( nshfts.LT.2 )
317  $ RETURN
318 *
319 * ==== If the active block is empty or 1-by-1, then there
320 * . is nothing to do. ====
321 *
322  IF( ktop.GE.kbot )
323  $ RETURN
324 *
325 * ==== NSHFTS is supposed to be even, but if it is odd,
326 * . then simply reduce it by one. ====
327 *
328  ns = nshfts - mod( nshfts, 2 )
329 *
330 * ==== Machine constants for deflation ====
331 *
332  safmin = dlamch( 'SAFE MINIMUM' )
333  safmax = rone / safmin
334  CALL dlabad( safmin, safmax )
335  ulp = dlamch( 'PRECISION' )
336  smlnum = safmin*( dble( n ) / ulp )
337 *
338 * ==== Use accumulated reflections to update far-from-diagonal
339 * . entries ? ====
340 *
341  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
342 *
343 * ==== clear trash ====
344 *
345  IF( ktop+2.LE.kbot )
346  $ h( ktop+2, ktop ) = zero
347 *
348 * ==== NBMPS = number of 2-shift bulges in the chain ====
349 *
350  nbmps = ns / 2
351 *
352 * ==== KDU = width of slab ====
353 *
354  kdu = 4*nbmps
355 *
356 * ==== Create and chase chains of NBMPS bulges ====
357 *
358  DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
359 *
360 * JTOP = Index from which updates from the right start.
361 *
362  IF( accum ) THEN
363  jtop = max( ktop, incol )
364  ELSE IF( wantt ) THEN
365  jtop = 1
366  ELSE
367  jtop = ktop
368  END IF
369 *
370  ndcol = incol + kdu
371  IF( accum )
372  $ CALL zlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
373 *
374 * ==== Near-the-diagonal bulge chase. The following loop
375 * . performs the near-the-diagonal part of a small bulge
376 * . multi-shift QR sweep. Each 4*NBMPS column diagonal
377 * . chunk extends from column INCOL to column NDCOL
378 * . (including both column INCOL and column NDCOL). The
379 * . following loop chases a 2*NBMPS+1 column long chain of
380 * . NBMPS bulges 2*NBMPS columns to the right. (INCOL
381 * . may be less than KTOP and and NDCOL may be greater than
382 * . KBOT indicating phantom columns from which to chase
383 * . bulges before they are actually introduced or to which
384 * . to chase bulges beyond column KBOT.) ====
385 *
386  DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
387 *
388 * ==== Bulges number MTOP to MBOT are active double implicit
389 * . shift bulges. There may or may not also be small
390 * . 2-by-2 bulge, if there is room. The inactive bulges
391 * . (if any) must wait until the active bulges have moved
392 * . down the diagonal to make room. The phantom matrix
393 * . paradigm described above helps keep track. ====
394 *
395  mtop = max( 1, ( ktop-krcol ) / 2+1 )
396  mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
397  m22 = mbot + 1
398  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
399  $ ( kbot-2 )
400 *
401 * ==== Generate reflections to chase the chain right
402 * . one column. (The minimum value of K is KTOP-1.) ====
403 *
404  IF ( bmp22 ) THEN
405 *
406 * ==== Special case: 2-by-2 reflection at bottom treated
407 * . separately ====
408 *
409  k = krcol + 2*( m22-1 )
410  IF( k.EQ.ktop-1 ) THEN
411  CALL zlaqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
412  $ s( 2*m22 ), v( 1, m22 ) )
413  beta = v( 1, m22 )
414  CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
415  ELSE
416  beta = h( k+1, k )
417  v( 2, m22 ) = h( k+2, k )
418  CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
419  h( k+1, k ) = beta
420  h( k+2, k ) = zero
421  END IF
422 
423 *
424 * ==== Perform update from right within
425 * . computational window. ====
426 *
427  DO 30 j = jtop, min( kbot, k+3 )
428  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
429  $ h( j, k+2 ) )
430  h( j, k+1 ) = h( j, k+1 ) - refsum
431  h( j, k+2 ) = h( j, k+2 ) -
432  $ refsum*dconjg( v( 2, m22 ) )
433  30 CONTINUE
434 *
435 * ==== Perform update from left within
436 * . computational window. ====
437 *
438  IF( accum ) THEN
439  jbot = min( ndcol, kbot )
440  ELSE IF( wantt ) THEN
441  jbot = n
442  ELSE
443  jbot = kbot
444  END IF
445  DO 40 j = k+1, jbot
446  refsum = dconjg( v( 1, m22 ) )*
447  $ ( h( k+1, j )+dconjg( v( 2, m22 ) )*
448  $ h( k+2, j ) )
449  h( k+1, j ) = h( k+1, j ) - refsum
450  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
451  40 CONTINUE
452 *
453 * ==== The following convergence test requires that
454 * . the tradition small-compared-to-nearby-diagonals
455 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
456 * . criteria both be satisfied. The latter improves
457 * . accuracy in some examples. Falling back on an
458 * . alternate convergence criterion when TST1 or TST2
459 * . is zero (as done here) is traditional but probably
460 * . unnecessary. ====
461 *
462  IF( k.GE.ktop ) THEN
463  IF( h( k+1, k ).NE.zero ) THEN
464  tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
465  IF( tst1.EQ.rzero ) THEN
466  IF( k.GE.ktop+1 )
467  $ tst1 = tst1 + cabs1( h( k, k-1 ) )
468  IF( k.GE.ktop+2 )
469  $ tst1 = tst1 + cabs1( h( k, k-2 ) )
470  IF( k.GE.ktop+3 )
471  $ tst1 = tst1 + cabs1( h( k, k-3 ) )
472  IF( k.LE.kbot-2 )
473  $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
474  IF( k.LE.kbot-3 )
475  $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
476  IF( k.LE.kbot-4 )
477  $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
478  END IF
479  IF( cabs1( h( k+1, k ) )
480  $ .LE.max( smlnum, ulp*tst1 ) ) THEN
481  h12 = max( cabs1( h( k+1, k ) ),
482  $ cabs1( h( k, k+1 ) ) )
483  h21 = min( cabs1( h( k+1, k ) ),
484  $ cabs1( h( k, k+1 ) ) )
485  h11 = max( cabs1( h( k+1, k+1 ) ),
486  $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
487  h22 = min( cabs1( h( k+1, k+1 ) ),
488  $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
489  scl = h11 + h12
490  tst2 = h22*( h11 / scl )
491 *
492  IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
493  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
494  END IF
495  END IF
496  END IF
497 *
498 * ==== Accumulate orthogonal transformations. ====
499 *
500  IF( accum ) THEN
501  kms = k - incol
502  DO 50 j = max( 1, ktop-incol ), kdu
503  refsum = v( 1, m22 )*( u( j, kms+1 )+
504  $ v( 2, m22 )*u( j, kms+2 ) )
505  u( j, kms+1 ) = u( j, kms+1 ) - refsum
506  u( j, kms+2 ) = u( j, kms+2 ) -
507  $ refsum*dconjg( v( 2, m22 ) )
508  50 CONTINUE
509  ELSE IF( wantz ) THEN
510  DO 60 j = iloz, ihiz
511  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
512  $ z( j, k+2 ) )
513  z( j, k+1 ) = z( j, k+1 ) - refsum
514  z( j, k+2 ) = z( j, k+2 ) -
515  $ refsum*dconjg( v( 2, m22 ) )
516  60 CONTINUE
517  END IF
518  END IF
519 *
520 * ==== Normal case: Chain of 3-by-3 reflections ====
521 *
522  DO 80 m = mbot, mtop, -1
523  k = krcol + 2*( m-1 )
524  IF( k.EQ.ktop-1 ) THEN
525  CALL zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
526  $ s( 2*m ), v( 1, m ) )
527  alpha = v( 1, m )
528  CALL zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
529  ELSE
530 *
531 * ==== Perform delayed transformation of row below
532 * . Mth bulge. Exploit fact that first two elements
533 * . of row are actually zero. ====
534 *
535  refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
536  h( k+3, k ) = -refsum
537  h( k+3, k+1 ) = -refsum*dconjg( v( 2, m ) )
538  h( k+3, k+2 ) = h( k+3, k+2 ) -
539  $ refsum*dconjg( v( 3, m ) )
540 *
541 * ==== Calculate reflection to move
542 * . Mth bulge one step. ====
543 *
544  beta = h( k+1, k )
545  v( 2, m ) = h( k+2, k )
546  v( 3, m ) = h( k+3, k )
547  CALL zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
548 *
549 * ==== A Bulge may collapse because of vigilant
550 * . deflation or destructive underflow. In the
551 * . underflow case, try the two-small-subdiagonals
552 * . trick to try to reinflate the bulge. ====
553 *
554  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
555  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
556 *
557 * ==== Typical case: not collapsed (yet). ====
558 *
559  h( k+1, k ) = beta
560  h( k+2, k ) = zero
561  h( k+3, k ) = zero
562  ELSE
563 *
564 * ==== Atypical case: collapsed. Attempt to
565 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
566 * . If the fill resulting from the new
567 * . reflector is too large, then abandon it.
568 * . Otherwise, use the new one. ====
569 *
570  CALL zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
571  $ s( 2*m ), vt )
572  alpha = vt( 1 )
573  CALL zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
574  refsum = dconjg( vt( 1 ) )*
575  $ ( h( k+1, k )+dconjg( vt( 2 ) )*
576  $ h( k+2, k ) )
577 *
578  IF( cabs1( h( k+2, k )-refsum*vt( 2 ) )+
579  $ cabs1( refsum*vt( 3 ) ).GT.ulp*
580  $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
581  $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
582 *
583 * ==== Starting a new bulge here would
584 * . create non-negligible fill. Use
585 * . the old one with trepidation. ====
586 *
587  h( k+1, k ) = beta
588  h( k+2, k ) = zero
589  h( k+3, k ) = zero
590  ELSE
591 *
592 * ==== Starting a new bulge here would
593 * . create only negligible fill.
594 * . Replace the old reflector with
595 * . the new one. ====
596 *
597  h( k+1, k ) = h( k+1, k ) - refsum
598  h( k+2, k ) = zero
599  h( k+3, k ) = zero
600  v( 1, m ) = vt( 1 )
601  v( 2, m ) = vt( 2 )
602  v( 3, m ) = vt( 3 )
603  END IF
604  END IF
605  END IF
606 *
607 * ==== Apply reflection from the right and
608 * . the first column of update from the left.
609 * . These updates are required for the vigilant
610 * . deflation check. We still delay most of the
611 * . updates from the left for efficiency. ====
612 *
613  DO 70 j = jtop, min( kbot, k+3 )
614  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
615  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
616  h( j, k+1 ) = h( j, k+1 ) - refsum
617  h( j, k+2 ) = h( j, k+2 ) -
618  $ refsum*dconjg( v( 2, m ) )
619  h( j, k+3 ) = h( j, k+3 ) -
620  $ refsum*dconjg( v( 3, m ) )
621  70 CONTINUE
622 *
623 * ==== Perform update from left for subsequent
624 * . column. ====
625 *
626  refsum = dconjg( v( 1, m ) )*( h( k+1, k+1 )
627  $ +dconjg( v( 2, m ) )*h( k+2, k+1 )
628  $ +dconjg( v( 3, m ) )*h( k+3, k+1 ) )
629  h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
630  h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m )
631  h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m )
632 *
633 * ==== The following convergence test requires that
634 * . the tradition small-compared-to-nearby-diagonals
635 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
636 * . criteria both be satisfied. The latter improves
637 * . accuracy in some examples. Falling back on an
638 * . alternate convergence criterion when TST1 or TST2
639 * . is zero (as done here) is traditional but probably
640 * . unnecessary. ====
641 *
642  IF( k.LT.ktop)
643  $ cycle
644  IF( h( k+1, k ).NE.zero ) THEN
645  tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
646  IF( tst1.EQ.rzero ) THEN
647  IF( k.GE.ktop+1 )
648  $ tst1 = tst1 + cabs1( h( k, k-1 ) )
649  IF( k.GE.ktop+2 )
650  $ tst1 = tst1 + cabs1( h( k, k-2 ) )
651  IF( k.GE.ktop+3 )
652  $ tst1 = tst1 + cabs1( h( k, k-3 ) )
653  IF( k.LE.kbot-2 )
654  $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
655  IF( k.LE.kbot-3 )
656  $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
657  IF( k.LE.kbot-4 )
658  $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
659  END IF
660  IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
661  $ THEN
662  h12 = max( cabs1( h( k+1, k ) ),
663  $ cabs1( h( k, k+1 ) ) )
664  h21 = min( cabs1( h( k+1, k ) ),
665  $ cabs1( h( k, k+1 ) ) )
666  h11 = max( cabs1( h( k+1, k+1 ) ),
667  $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
668  h22 = min( cabs1( h( k+1, k+1 ) ),
669  $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
670  scl = h11 + h12
671  tst2 = h22*( h11 / scl )
672 *
673  IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
674  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
675  END IF
676  END IF
677  80 CONTINUE
678 *
679 * ==== Multiply H by reflections from the left ====
680 *
681  IF( accum ) THEN
682  jbot = min( ndcol, kbot )
683  ELSE IF( wantt ) THEN
684  jbot = n
685  ELSE
686  jbot = kbot
687  END IF
688 *
689  DO 100 m = mbot, mtop, -1
690  k = krcol + 2*( m-1 )
691  DO 90 j = max( ktop, krcol + 2*m ), jbot
692  refsum = dconjg( v( 1, m ) )*
693  $ ( h( k+1, j )+dconjg( v( 2, m ) )*
694  $ h( k+2, j )+dconjg( v( 3, m ) )*h( k+3, j ) )
695  h( k+1, j ) = h( k+1, j ) - refsum
696  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
697  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
698  90 CONTINUE
699  100 CONTINUE
700 *
701 * ==== Accumulate orthogonal transformations. ====
702 *
703  IF( accum ) THEN
704 *
705 * ==== Accumulate U. (If needed, update Z later
706 * . with an efficient matrix-matrix
707 * . multiply.) ====
708 *
709  DO 120 m = mbot, mtop, -1
710  k = krcol + 2*( m-1 )
711  kms = k - incol
712  i2 = max( 1, ktop-incol )
713  i2 = max( i2, kms-(krcol-incol)+1 )
714  i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
715  DO 110 j = i2, i4
716  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
717  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
718  u( j, kms+1 ) = u( j, kms+1 ) - refsum
719  u( j, kms+2 ) = u( j, kms+2 ) -
720  $ refsum*dconjg( v( 2, m ) )
721  u( j, kms+3 ) = u( j, kms+3 ) -
722  $ refsum*dconjg( v( 3, m ) )
723  110 CONTINUE
724  120 CONTINUE
725  ELSE IF( wantz ) THEN
726 *
727 * ==== U is not accumulated, so update Z
728 * . now by multiplying by reflections
729 * . from the right. ====
730 *
731  DO 140 m = mbot, mtop, -1
732  k = krcol + 2*( m-1 )
733  DO 130 j = iloz, ihiz
734  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
735  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
736  z( j, k+1 ) = z( j, k+1 ) - refsum
737  z( j, k+2 ) = z( j, k+2 ) -
738  $ refsum*dconjg( v( 2, m ) )
739  z( j, k+3 ) = z( j, k+3 ) -
740  $ refsum*dconjg( v( 3, m ) )
741  130 CONTINUE
742  140 CONTINUE
743  END IF
744 *
745 * ==== End of near-the-diagonal bulge chase. ====
746 *
747  145 CONTINUE
748 *
749 * ==== Use U (if accumulated) to update far-from-diagonal
750 * . entries in H. If required, use U to update Z as
751 * . well. ====
752 *
753  IF( accum ) THEN
754  IF( wantt ) THEN
755  jtop = 1
756  jbot = n
757  ELSE
758  jtop = ktop
759  jbot = kbot
760  END IF
761  k1 = max( 1, ktop-incol )
762  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
763 *
764 * ==== Horizontal Multiply ====
765 *
766  DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
767  jlen = min( nh, jbot-jcol+1 )
768  CALL zgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
769  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
770  $ ldwh )
771  CALL zlacpy( 'ALL', nu, jlen, wh, ldwh,
772  $ h( incol+k1, jcol ), ldh )
773  150 CONTINUE
774 *
775 * ==== Vertical multiply ====
776 *
777  DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
778  jlen = min( nv, max( ktop, incol )-jrow )
779  CALL zgemm( 'N', 'N', jlen, nu, nu, one,
780  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
781  $ ldu, zero, wv, ldwv )
782  CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
783  $ h( jrow, incol+k1 ), ldh )
784  160 CONTINUE
785 *
786 * ==== Z multiply (also vertical) ====
787 *
788  IF( wantz ) THEN
789  DO 170 jrow = iloz, ihiz, nv
790  jlen = min( nv, ihiz-jrow+1 )
791  CALL zgemm( 'N', 'N', jlen, nu, nu, one,
792  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
793  $ ldu, zero, wv, ldwv )
794  CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
795  $ z( jrow, incol+k1 ), ldz )
796  170 CONTINUE
797  END IF
798  END IF
799  180 CONTINUE
800 *
801 * ==== End of ZLAQR5 ====
802 *
803  END
subroutine dlabad(SMALL, LARGE)
DLABAD
Definition: dlabad.f:74
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:187
subroutine ztrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRMM
Definition: ztrmm.f:177
subroutine zlaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
ZLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: zlaqr5.f:257
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:103
subroutine zlaqr1(N, H, LDH, S1, S2, V)
ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: zlaqr1.f:107
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: zlaset.f:106
subroutine zlarfg(N, ALPHA, X, INCX, TAU)
ZLARFG generates an elementary reflector (Householder matrix).
Definition: zlarfg.f:106