LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
zlaqr5.f
Go to the documentation of this file.
1 *> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22 * H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23 * WV, LDWV, NH, WH, LDWH )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
29 * ..
30 * .. Array Arguments ..
31 * COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32 * $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33 * ..
34 *
35 *
36 *> \par Purpose:
37 * =============
38 *>
39 *> \verbatim
40 *>
41 *> ZLAQR5, called by ZLAQR0, performs a
42 *> single small-bulge multi-shift QR sweep.
43 *> \endverbatim
44 *
45 * Arguments:
46 * ==========
47 *
48 *> \param[in] WANTT
49 *> \verbatim
50 *> WANTT is logical scalar
51 *> WANTT = .true. if the triangular Schur factor
52 *> is being computed. WANTT is set to .false. otherwise.
53 *> \endverbatim
54 *>
55 *> \param[in] WANTZ
56 *> \verbatim
57 *> WANTZ is logical scalar
58 *> WANTZ = .true. if the unitary Schur factor is being
59 *> computed. WANTZ is set to .false. otherwise.
60 *> \endverbatim
61 *>
62 *> \param[in] KACC22
63 *> \verbatim
64 *> KACC22 is integer with value 0, 1, or 2.
65 *> Specifies the computation mode of far-from-diagonal
66 *> orthogonal updates.
67 *> = 0: ZLAQR5 does not accumulate reflections and does not
68 *> use matrix-matrix multiply to update far-from-diagonal
69 *> matrix entries.
70 *> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71 *> multiply to update the far-from-diagonal matrix entries.
72 *> = 2: ZLAQR5 accumulates reflections, uses matrix-matrix
73 *> multiply to update the far-from-diagonal matrix entries,
74 *> and takes advantage of 2-by-2 block structure during
75 *> matrix multiplies.
76 *> \endverbatim
77 *>
78 *> \param[in] N
79 *> \verbatim
80 *> N is integer scalar
81 *> N is the order of the Hessenberg matrix H upon which this
82 *> subroutine operates.
83 *> \endverbatim
84 *>
85 *> \param[in] KTOP
86 *> \verbatim
87 *> KTOP is integer scalar
88 *> \endverbatim
89 *>
90 *> \param[in] KBOT
91 *> \verbatim
92 *> KBOT is integer scalar
93 *> These are the first and last rows and columns of an
94 *> isolated diagonal block upon which the QR sweep is to be
95 *> applied. It is assumed without a check that
96 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97 *> and
98 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
99 *> \endverbatim
100 *>
101 *> \param[in] NSHFTS
102 *> \verbatim
103 *> NSHFTS is integer scalar
104 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
105 *> must be positive and even.
106 *> \endverbatim
107 *>
108 *> \param[in,out] S
109 *> \verbatim
110 *> S is COMPLEX*16 array of size (NSHFTS)
111 *> S contains the shifts of origin that define the multi-
112 *> shift QR sweep. On output S may be reordered.
113 *> \endverbatim
114 *>
115 *> \param[in,out] H
116 *> \verbatim
117 *> H is COMPLEX*16 array of size (LDH,N)
118 *> On input H contains a Hessenberg matrix. On output a
119 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
120 *> to the isolated diagonal block in rows and columns KTOP
121 *> through KBOT.
122 *> \endverbatim
123 *>
124 *> \param[in] LDH
125 *> \verbatim
126 *> LDH is integer scalar
127 *> LDH is the leading dimension of H just as declared in the
128 *> calling procedure. LDH.GE.MAX(1,N).
129 *> \endverbatim
130 *>
131 *> \param[in] ILOZ
132 *> \verbatim
133 *> ILOZ is INTEGER
134 *> \endverbatim
135 *>
136 *> \param[in] IHIZ
137 *> \verbatim
138 *> IHIZ is INTEGER
139 *> Specify the rows of Z to which transformations must be
140 *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
141 *> \endverbatim
142 *>
143 *> \param[in,out] Z
144 *> \verbatim
145 *> Z is COMPLEX*16 array of size (LDZ,IHIZ)
146 *> If WANTZ = .TRUE., then the QR Sweep unitary
147 *> similarity transformation is accumulated into
148 *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
149 *> If WANTZ = .FALSE., then Z is unreferenced.
150 *> \endverbatim
151 *>
152 *> \param[in] LDZ
153 *> \verbatim
154 *> LDZ is integer scalar
155 *> LDA is the leading dimension of Z just as declared in
156 *> the calling procedure. LDZ.GE.N.
157 *> \endverbatim
158 *>
159 *> \param[out] V
160 *> \verbatim
161 *> V is COMPLEX*16 array of size (LDV,NSHFTS/2)
162 *> \endverbatim
163 *>
164 *> \param[in] LDV
165 *> \verbatim
166 *> LDV is integer scalar
167 *> LDV is the leading dimension of V as declared in the
168 *> calling procedure. LDV.GE.3.
169 *> \endverbatim
170 *>
171 *> \param[out] U
172 *> \verbatim
173 *> U is COMPLEX*16 array of size
174 *> (LDU,3*NSHFTS-3)
175 *> \endverbatim
176 *>
177 *> \param[in] LDU
178 *> \verbatim
179 *> LDU is integer scalar
180 *> LDU is the leading dimension of U just as declared in the
181 *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
182 *> \endverbatim
183 *>
184 *> \param[in] NH
185 *> \verbatim
186 *> NH is integer scalar
187 *> NH is the number of columns in array WH available for
188 *> workspace. NH.GE.1.
189 *> \endverbatim
190 *>
191 *> \param[out] WH
192 *> \verbatim
193 *> WH is COMPLEX*16 array of size (LDWH,NH)
194 *> \endverbatim
195 *>
196 *> \param[in] LDWH
197 *> \verbatim
198 *> LDWH is integer scalar
199 *> Leading dimension of WH just as declared in the
200 *> calling procedure. LDWH.GE.3*NSHFTS-3.
201 *> \endverbatim
202 *>
203 *> \param[in] NV
204 *> \verbatim
205 *> NV is integer scalar
206 *> NV is the number of rows in WV agailable for workspace.
207 *> NV.GE.1.
208 *> \endverbatim
209 *>
210 *> \param[out] WV
211 *> \verbatim
212 *> WV is COMPLEX*16 array of size
213 *> (LDWV,3*NSHFTS-3)
214 *> \endverbatim
215 *>
216 *> \param[in] LDWV
217 *> \verbatim
218 *> LDWV is integer scalar
219 *> LDWV is the leading dimension of WV as declared in the
220 *> in the calling subroutine. LDWV.GE.NV.
221 *> \endverbatim
222 *
223 * Authors:
224 * ========
225 *
226 *> \author Univ. of Tennessee
227 *> \author Univ. of California Berkeley
228 *> \author Univ. of Colorado Denver
229 *> \author NAG Ltd.
230 *
231 *> \date June 2016
232 *
233 *> \ingroup complex16OTHERauxiliary
234 *
235 *> \par Contributors:
236 * ==================
237 *>
238 *> Karen Braman and Ralph Byers, Department of Mathematics,
239 *> University of Kansas, USA
240 *
241 *> \par References:
242 * ================
243 *>
244 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
245 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
246 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
247 *> 929--947, 2002.
248 *>
249 * =====================================================================
250  SUBROUTINE zlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
251  $ h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv,
252  $ wv, ldwv, nh, wh, ldwh )
253 *
254 * -- LAPACK auxiliary routine (version 3.6.1) --
255 * -- LAPACK is a software package provided by Univ. of Tennessee, --
256 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
257 * June 2016
258 *
259 * .. Scalar Arguments ..
260  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
261  $ ldwh, ldwv, ldz, n, nh, nshfts, nv
262  LOGICAL WANTT, WANTZ
263 * ..
264 * .. Array Arguments ..
265  COMPLEX*16 H( ldh, * ), S( * ), U( ldu, * ), V( ldv, * ),
266  $ wh( ldwh, * ), wv( ldwv, * ), z( ldz, * )
267 * ..
268 *
269 * ================================================================
270 * .. Parameters ..
271  COMPLEX*16 ZERO, ONE
272  parameter ( zero = ( 0.0d0, 0.0d0 ),
273  $ one = ( 1.0d0, 0.0d0 ) )
274  DOUBLE PRECISION RZERO, RONE
275  parameter ( rzero = 0.0d0, rone = 1.0d0 )
276 * ..
277 * .. Local Scalars ..
278  COMPLEX*16 ALPHA, BETA, CDUM, REFSUM
279  DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
280  $ smlnum, tst1, tst2, ulp
281  INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
282  $ jrow, jtop, k, k1, kdu, kms, knz, krcol, kzs,
283  $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
284  $ ns, nu
285  LOGICAL ACCUM, BLK22, BMP22
286 * ..
287 * .. External Functions ..
288  DOUBLE PRECISION DLAMCH
289  EXTERNAL dlamch
290 * ..
291 * .. Intrinsic Functions ..
292 *
293  INTRINSIC abs, dble, dconjg, dimag, max, min, mod
294 * ..
295 * .. Local Arrays ..
296  COMPLEX*16 VT( 3 )
297 * ..
298 * .. External Subroutines ..
299  EXTERNAL dlabad, zgemm, zlacpy, zlaqr1, zlarfg, zlaset,
300  $ ztrmm
301 * ..
302 * .. Statement Functions ..
303  DOUBLE PRECISION CABS1
304 * ..
305 * .. Statement Function definitions ..
306  cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
307 * ..
308 * .. Executable Statements ..
309 *
310 * ==== If there are no shifts, then there is nothing to do. ====
311 *
312  IF( nshfts.LT.2 )
313  $ RETURN
314 *
315 * ==== If the active block is empty or 1-by-1, then there
316 * . is nothing to do. ====
317 *
318  IF( ktop.GE.kbot )
319  $ RETURN
320 *
321 * ==== NSHFTS is supposed to be even, but if it is odd,
322 * . then simply reduce it by one. ====
323 *
324  ns = nshfts - mod( nshfts, 2 )
325 *
326 * ==== Machine constants for deflation ====
327 *
328  safmin = dlamch( 'SAFE MINIMUM' )
329  safmax = rone / safmin
330  CALL dlabad( safmin, safmax )
331  ulp = dlamch( 'PRECISION' )
332  smlnum = safmin*( dble( n ) / ulp )
333 *
334 * ==== Use accumulated reflections to update far-from-diagonal
335 * . entries ? ====
336 *
337  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
338 *
339 * ==== If so, exploit the 2-by-2 block structure? ====
340 *
341  blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
342 *
343 * ==== clear trash ====
344 *
345  IF( ktop+2.LE.kbot )
346  $ h( ktop+2, ktop ) = zero
347 *
348 * ==== NBMPS = number of 2-shift bulges in the chain ====
349 *
350  nbmps = ns / 2
351 *
352 * ==== KDU = width of slab ====
353 *
354  kdu = 6*nbmps - 3
355 *
356 * ==== Create and chase chains of NBMPS bulges ====
357 *
358  DO 210 incol = 3*( 1-nbmps ) + ktop - 1, kbot - 2, 3*nbmps - 2
359  ndcol = incol + kdu
360  IF( accum )
361  $ CALL zlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
362 *
363 * ==== Near-the-diagonal bulge chase. The following loop
364 * . performs the near-the-diagonal part of a small bulge
365 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
366 * . chunk extends from column INCOL to column NDCOL
367 * . (including both column INCOL and column NDCOL). The
368 * . following loop chases a 3*NBMPS column long chain of
369 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
370 * . may be less than KTOP and and NDCOL may be greater than
371 * . KBOT indicating phantom columns from which to chase
372 * . bulges before they are actually introduced or to which
373 * . to chase bulges beyond column KBOT.) ====
374 *
375  DO 140 krcol = incol, min( incol+3*nbmps-3, kbot-2 )
376 *
377 * ==== Bulges number MTOP to MBOT are active double implicit
378 * . shift bulges. There may or may not also be small
379 * . 2-by-2 bulge, if there is room. The inactive bulges
380 * . (if any) must wait until the active bulges have moved
381 * . down the diagonal to make room. The phantom matrix
382 * . paradigm described above helps keep track. ====
383 *
384  mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
385  mbot = min( nbmps, ( kbot-krcol ) / 3 )
386  m22 = mbot + 1
387  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
388  $ ( kbot-2 )
389 *
390 * ==== Generate reflections to chase the chain right
391 * . one column. (The minimum value of K is KTOP-1.) ====
392 *
393  DO 10 m = mtop, mbot
394  k = krcol + 3*( m-1 )
395  IF( k.EQ.ktop-1 ) THEN
396  CALL zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
397  $ s( 2*m ), v( 1, m ) )
398  alpha = v( 1, m )
399  CALL zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
400  ELSE
401  beta = h( k+1, k )
402  v( 2, m ) = h( k+2, k )
403  v( 3, m ) = h( k+3, k )
404  CALL zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
405 *
406 * ==== A Bulge may collapse because of vigilant
407 * . deflation or destructive underflow. In the
408 * . underflow case, try the two-small-subdiagonals
409 * . trick to try to reinflate the bulge. ====
410 *
411  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
412  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
413 *
414 * ==== Typical case: not collapsed (yet). ====
415 *
416  h( k+1, k ) = beta
417  h( k+2, k ) = zero
418  h( k+3, k ) = zero
419  ELSE
420 *
421 * ==== Atypical case: collapsed. Attempt to
422 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
423 * . If the fill resulting from the new
424 * . reflector is too large, then abandon it.
425 * . Otherwise, use the new one. ====
426 *
427  CALL zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
428  $ s( 2*m ), vt )
429  alpha = vt( 1 )
430  CALL zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
431  refsum = dconjg( vt( 1 ) )*
432  $ ( h( k+1, k )+dconjg( vt( 2 ) )*
433  $ h( k+2, k ) )
434 *
435  IF( cabs1( h( k+2, k )-refsum*vt( 2 ) )+
436  $ cabs1( refsum*vt( 3 ) ).GT.ulp*
437  $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
438  $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
439 *
440 * ==== Starting a new bulge here would
441 * . create non-negligible fill. Use
442 * . the old one with trepidation. ====
443 *
444  h( k+1, k ) = beta
445  h( k+2, k ) = zero
446  h( k+3, k ) = zero
447  ELSE
448 *
449 * ==== Stating a new bulge here would
450 * . create only negligible fill.
451 * . Replace the old reflector with
452 * . the new one. ====
453 *
454  h( k+1, k ) = h( k+1, k ) - refsum
455  h( k+2, k ) = zero
456  h( k+3, k ) = zero
457  v( 1, m ) = vt( 1 )
458  v( 2, m ) = vt( 2 )
459  v( 3, m ) = vt( 3 )
460  END IF
461  END IF
462  END IF
463  10 CONTINUE
464 *
465 * ==== Generate a 2-by-2 reflection, if needed. ====
466 *
467  k = krcol + 3*( m22-1 )
468  IF( bmp22 ) THEN
469  IF( k.EQ.ktop-1 ) THEN
470  CALL zlaqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
471  $ s( 2*m22 ), v( 1, m22 ) )
472  beta = v( 1, m22 )
473  CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
474  ELSE
475  beta = h( k+1, k )
476  v( 2, m22 ) = h( k+2, k )
477  CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
478  h( k+1, k ) = beta
479  h( k+2, k ) = zero
480  END IF
481  END IF
482 *
483 * ==== Multiply H by reflections from the left ====
484 *
485  IF( accum ) THEN
486  jbot = min( ndcol, kbot )
487  ELSE IF( wantt ) THEN
488  jbot = n
489  ELSE
490  jbot = kbot
491  END IF
492  DO 30 j = max( ktop, krcol ), jbot
493  mend = min( mbot, ( j-krcol+2 ) / 3 )
494  DO 20 m = mtop, mend
495  k = krcol + 3*( m-1 )
496  refsum = dconjg( v( 1, m ) )*
497  $ ( h( k+1, j )+dconjg( v( 2, m ) )*
498  $ h( k+2, j )+dconjg( v( 3, m ) )*h( k+3, j ) )
499  h( k+1, j ) = h( k+1, j ) - refsum
500  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
501  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
502  20 CONTINUE
503  30 CONTINUE
504  IF( bmp22 ) THEN
505  k = krcol + 3*( m22-1 )
506  DO 40 j = max( k+1, ktop ), jbot
507  refsum = dconjg( v( 1, m22 ) )*
508  $ ( h( k+1, j )+dconjg( v( 2, m22 ) )*
509  $ h( k+2, j ) )
510  h( k+1, j ) = h( k+1, j ) - refsum
511  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
512  40 CONTINUE
513  END IF
514 *
515 * ==== Multiply H by reflections from the right.
516 * . Delay filling in the last row until the
517 * . vigilant deflation check is complete. ====
518 *
519  IF( accum ) THEN
520  jtop = max( ktop, incol )
521  ELSE IF( wantt ) THEN
522  jtop = 1
523  ELSE
524  jtop = ktop
525  END IF
526  DO 80 m = mtop, mbot
527  IF( v( 1, m ).NE.zero ) THEN
528  k = krcol + 3*( m-1 )
529  DO 50 j = jtop, min( kbot, k+3 )
530  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
531  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
532  h( j, k+1 ) = h( j, k+1 ) - refsum
533  h( j, k+2 ) = h( j, k+2 ) -
534  $ refsum*dconjg( v( 2, m ) )
535  h( j, k+3 ) = h( j, k+3 ) -
536  $ refsum*dconjg( v( 3, m ) )
537  50 CONTINUE
538 *
539  IF( accum ) THEN
540 *
541 * ==== Accumulate U. (If necessary, update Z later
542 * . with with an efficient matrix-matrix
543 * . multiply.) ====
544 *
545  kms = k - incol
546  DO 60 j = max( 1, ktop-incol ), kdu
547  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
548  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
549  u( j, kms+1 ) = u( j, kms+1 ) - refsum
550  u( j, kms+2 ) = u( j, kms+2 ) -
551  $ refsum*dconjg( v( 2, m ) )
552  u( j, kms+3 ) = u( j, kms+3 ) -
553  $ refsum*dconjg( v( 3, m ) )
554  60 CONTINUE
555  ELSE IF( wantz ) THEN
556 *
557 * ==== U is not accumulated, so update Z
558 * . now by multiplying by reflections
559 * . from the right. ====
560 *
561  DO 70 j = iloz, ihiz
562  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
563  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
564  z( j, k+1 ) = z( j, k+1 ) - refsum
565  z( j, k+2 ) = z( j, k+2 ) -
566  $ refsum*dconjg( v( 2, m ) )
567  z( j, k+3 ) = z( j, k+3 ) -
568  $ refsum*dconjg( v( 3, m ) )
569  70 CONTINUE
570  END IF
571  END IF
572  80 CONTINUE
573 *
574 * ==== Special case: 2-by-2 reflection (if needed) ====
575 *
576  k = krcol + 3*( m22-1 )
577  IF( bmp22 ) THEN
578  IF ( v( 1, m22 ).NE.zero ) THEN
579  DO 90 j = jtop, min( kbot, k+3 )
580  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
581  $ h( j, k+2 ) )
582  h( j, k+1 ) = h( j, k+1 ) - refsum
583  h( j, k+2 ) = h( j, k+2 ) -
584  $ refsum*dconjg( v( 2, m22 ) )
585  90 CONTINUE
586 *
587  IF( accum ) THEN
588  kms = k - incol
589  DO 100 j = max( 1, ktop-incol ), kdu
590  refsum = v( 1, m22 )*( u( j, kms+1 )+
591  $ v( 2, m22 )*u( j, kms+2 ) )
592  u( j, kms+1 ) = u( j, kms+1 ) - refsum
593  u( j, kms+2 ) = u( j, kms+2 ) -
594  $ refsum*dconjg( v( 2, m22 ) )
595  100 CONTINUE
596  ELSE IF( wantz ) THEN
597  DO 110 j = iloz, ihiz
598  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
599  $ z( j, k+2 ) )
600  z( j, k+1 ) = z( j, k+1 ) - refsum
601  z( j, k+2 ) = z( j, k+2 ) -
602  $ refsum*dconjg( v( 2, m22 ) )
603  110 CONTINUE
604  END IF
605  END IF
606  END IF
607 *
608 * ==== Vigilant deflation check ====
609 *
610  mstart = mtop
611  IF( krcol+3*( mstart-1 ).LT.ktop )
612  $ mstart = mstart + 1
613  mend = mbot
614  IF( bmp22 )
615  $ mend = mend + 1
616  IF( krcol.EQ.kbot-2 )
617  $ mend = mend + 1
618  DO 120 m = mstart, mend
619  k = min( kbot-1, krcol+3*( m-1 ) )
620 *
621 * ==== The following convergence test requires that
622 * . the tradition small-compared-to-nearby-diagonals
623 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
624 * . criteria both be satisfied. The latter improves
625 * . accuracy in some examples. Falling back on an
626 * . alternate convergence criterion when TST1 or TST2
627 * . is zero (as done here) is traditional but probably
628 * . unnecessary. ====
629 *
630  IF( h( k+1, k ).NE.zero ) THEN
631  tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
632  IF( tst1.EQ.rzero ) THEN
633  IF( k.GE.ktop+1 )
634  $ tst1 = tst1 + cabs1( h( k, k-1 ) )
635  IF( k.GE.ktop+2 )
636  $ tst1 = tst1 + cabs1( h( k, k-2 ) )
637  IF( k.GE.ktop+3 )
638  $ tst1 = tst1 + cabs1( h( k, k-3 ) )
639  IF( k.LE.kbot-2 )
640  $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
641  IF( k.LE.kbot-3 )
642  $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
643  IF( k.LE.kbot-4 )
644  $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
645  END IF
646  IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
647  $ THEN
648  h12 = max( cabs1( h( k+1, k ) ),
649  $ cabs1( h( k, k+1 ) ) )
650  h21 = min( cabs1( h( k+1, k ) ),
651  $ cabs1( h( k, k+1 ) ) )
652  h11 = max( cabs1( h( k+1, k+1 ) ),
653  $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
654  h22 = min( cabs1( h( k+1, k+1 ) ),
655  $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
656  scl = h11 + h12
657  tst2 = h22*( h11 / scl )
658 *
659  IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
660  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
661  END IF
662  END IF
663  120 CONTINUE
664 *
665 * ==== Fill in the last row of each bulge. ====
666 *
667  mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
668  DO 130 m = mtop, mend
669  k = krcol + 3*( m-1 )
670  refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
671  h( k+4, k+1 ) = -refsum
672  h( k+4, k+2 ) = -refsum*dconjg( v( 2, m ) )
673  h( k+4, k+3 ) = h( k+4, k+3 ) -
674  $ refsum*dconjg( v( 3, m ) )
675  130 CONTINUE
676 *
677 * ==== End of near-the-diagonal bulge chase. ====
678 *
679  140 CONTINUE
680 *
681 * ==== Use U (if accumulated) to update far-from-diagonal
682 * . entries in H. If required, use U to update Z as
683 * . well. ====
684 *
685  IF( accum ) THEN
686  IF( wantt ) THEN
687  jtop = 1
688  jbot = n
689  ELSE
690  jtop = ktop
691  jbot = kbot
692  END IF
693  IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
694  $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) ) THEN
695 *
696 * ==== Updates not exploiting the 2-by-2 block
697 * . structure of U. K1 and NU keep track of
698 * . the location and size of U in the special
699 * . cases of introducing bulges and chasing
700 * . bulges off the bottom. In these special
701 * . cases and in case the number of shifts
702 * . is NS = 2, there is no 2-by-2 block
703 * . structure to exploit. ====
704 *
705  k1 = max( 1, ktop-incol )
706  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
707 *
708 * ==== Horizontal Multiply ====
709 *
710  DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
711  jlen = min( nh, jbot-jcol+1 )
712  CALL zgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
713  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
714  $ ldwh )
715  CALL zlacpy( 'ALL', nu, jlen, wh, ldwh,
716  $ h( incol+k1, jcol ), ldh )
717  150 CONTINUE
718 *
719 * ==== Vertical multiply ====
720 *
721  DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
722  jlen = min( nv, max( ktop, incol )-jrow )
723  CALL zgemm( 'N', 'N', jlen, nu, nu, one,
724  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
725  $ ldu, zero, wv, ldwv )
726  CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
727  $ h( jrow, incol+k1 ), ldh )
728  160 CONTINUE
729 *
730 * ==== Z multiply (also vertical) ====
731 *
732  IF( wantz ) THEN
733  DO 170 jrow = iloz, ihiz, nv
734  jlen = min( nv, ihiz-jrow+1 )
735  CALL zgemm( 'N', 'N', jlen, nu, nu, one,
736  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
737  $ ldu, zero, wv, ldwv )
738  CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
739  $ z( jrow, incol+k1 ), ldz )
740  170 CONTINUE
741  END IF
742  ELSE
743 *
744 * ==== Updates exploiting U's 2-by-2 block structure.
745 * . (I2, I4, J2, J4 are the last rows and columns
746 * . of the blocks.) ====
747 *
748  i2 = ( kdu+1 ) / 2
749  i4 = kdu
750  j2 = i4 - i2
751  j4 = kdu
752 *
753 * ==== KZS and KNZ deal with the band of zeros
754 * . along the diagonal of one of the triangular
755 * . blocks. ====
756 *
757  kzs = ( j4-j2 ) - ( ns+1 )
758  knz = ns + 1
759 *
760 * ==== Horizontal multiply ====
761 *
762  DO 180 jcol = min( ndcol, kbot ) + 1, jbot, nh
763  jlen = min( nh, jbot-jcol+1 )
764 *
765 * ==== Copy bottom of H to top+KZS of scratch ====
766 * (The first KZS rows get multiplied by zero.) ====
767 *
768  CALL zlacpy( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
769  $ ldh, wh( kzs+1, 1 ), ldwh )
770 *
771 * ==== Multiply by U21**H ====
772 *
773  CALL zlaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
774  CALL ztrmm( 'L', 'U', 'C', 'N', knz, jlen, one,
775  $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
776  $ ldwh )
777 *
778 * ==== Multiply top of H by U11**H ====
779 *
780  CALL zgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
781  $ h( incol+1, jcol ), ldh, one, wh, ldwh )
782 *
783 * ==== Copy top of H to bottom of WH ====
784 *
785  CALL zlacpy( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
786  $ wh( i2+1, 1 ), ldwh )
787 *
788 * ==== Multiply by U21**H ====
789 *
790  CALL ztrmm( 'L', 'L', 'C', 'N', j2, jlen, one,
791  $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
792 *
793 * ==== Multiply by U22 ====
794 *
795  CALL zgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
796  $ u( j2+1, i2+1 ), ldu,
797  $ h( incol+1+j2, jcol ), ldh, one,
798  $ wh( i2+1, 1 ), ldwh )
799 *
800 * ==== Copy it back ====
801 *
802  CALL zlacpy( 'ALL', kdu, jlen, wh, ldwh,
803  $ h( incol+1, jcol ), ldh )
804  180 CONTINUE
805 *
806 * ==== Vertical multiply ====
807 *
808  DO 190 jrow = jtop, max( incol, ktop ) - 1, nv
809  jlen = min( nv, max( incol, ktop )-jrow )
810 *
811 * ==== Copy right of H to scratch (the first KZS
812 * . columns get multiplied by zero) ====
813 *
814  CALL zlacpy( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
815  $ ldh, wv( 1, 1+kzs ), ldwv )
816 *
817 * ==== Multiply by U21 ====
818 *
819  CALL zlaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
820  CALL ztrmm( 'R', 'U', 'N', 'N', jlen, knz, one,
821  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
822  $ ldwv )
823 *
824 * ==== Multiply by U11 ====
825 *
826  CALL zgemm( 'N', 'N', jlen, i2, j2, one,
827  $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
828  $ ldwv )
829 *
830 * ==== Copy left of H to right of scratch ====
831 *
832  CALL zlacpy( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
833  $ wv( 1, 1+i2 ), ldwv )
834 *
835 * ==== Multiply by U21 ====
836 *
837  CALL ztrmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
838  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
839 *
840 * ==== Multiply by U22 ====
841 *
842  CALL zgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
843  $ h( jrow, incol+1+j2 ), ldh,
844  $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
845  $ ldwv )
846 *
847 * ==== Copy it back ====
848 *
849  CALL zlacpy( 'ALL', jlen, kdu, wv, ldwv,
850  $ h( jrow, incol+1 ), ldh )
851  190 CONTINUE
852 *
853 * ==== Multiply Z (also vertical) ====
854 *
855  IF( wantz ) THEN
856  DO 200 jrow = iloz, ihiz, nv
857  jlen = min( nv, ihiz-jrow+1 )
858 *
859 * ==== Copy right of Z to left of scratch (first
860 * . KZS columns get multiplied by zero) ====
861 *
862  CALL zlacpy( 'ALL', jlen, knz,
863  $ z( jrow, incol+1+j2 ), ldz,
864  $ wv( 1, 1+kzs ), ldwv )
865 *
866 * ==== Multiply by U12 ====
867 *
868  CALL zlaset( 'ALL', jlen, kzs, zero, zero, wv,
869  $ ldwv )
870  CALL ztrmm( 'R', 'U', 'N', 'N', jlen, knz, one,
871  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
872  $ ldwv )
873 *
874 * ==== Multiply by U11 ====
875 *
876  CALL zgemm( 'N', 'N', jlen, i2, j2, one,
877  $ z( jrow, incol+1 ), ldz, u, ldu, one,
878  $ wv, ldwv )
879 *
880 * ==== Copy left of Z to right of scratch ====
881 *
882  CALL zlacpy( 'ALL', jlen, j2, z( jrow, incol+1 ),
883  $ ldz, wv( 1, 1+i2 ), ldwv )
884 *
885 * ==== Multiply by U21 ====
886 *
887  CALL ztrmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
888  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
889  $ ldwv )
890 *
891 * ==== Multiply by U22 ====
892 *
893  CALL zgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
894  $ z( jrow, incol+1+j2 ), ldz,
895  $ u( j2+1, i2+1 ), ldu, one,
896  $ wv( 1, 1+i2 ), ldwv )
897 *
898 * ==== Copy the result back to Z ====
899 *
900  CALL zlacpy( 'ALL', jlen, kdu, wv, ldwv,
901  $ z( jrow, incol+1 ), ldz )
902  200 CONTINUE
903  END IF
904  END IF
905  END IF
906  210 CONTINUE
907 *
908 * ==== End of ZLAQR5 ====
909 *
910  END
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:105
subroutine zlaqr1(N, H, LDH, S1, S2, V)
ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: zlaqr1.f:109
subroutine zlarfg(N, ALPHA, X, INCX, TAU)
ZLARFG generates an elementary reflector (Householder matrix).
Definition: zlarfg.f:108
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: zlaset.f:108
subroutine dlabad(SMALL, LARGE)
DLABAD
Definition: dlabad.f:76
subroutine ztrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRMM
Definition: ztrmm.f:179
subroutine zlaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
ZLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: zlaqr5.f:253