ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaqr1.f
Go to the documentation of this file.
1  RECURSIVE SUBROUTINE pdlaqr1( WANTT, WANTZ, N, ILO, IHI, A,
2  $ DESCA, WR, WI, ILOZ, IHIZ, Z,
3  $ DESCZ, WORK, LWORK, IWORK,
4  $ ILWORK, INFO )
5 *
6 * Contribution from the Department of Computing Science and HPC2N,
7 * Umea University, Sweden
8 *
9 * -- ScaLAPACK auxiliary routine (version 2.0.1) --
10 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
11 * Univ. of Colorado Denver and University of California, Berkeley.
12 * January, 2012
13 *
14  IMPLICIT NONE
15 *
16 * .. Scalar Arguments ..
17  LOGICAL wantt, wantz
18  INTEGER ihi, ihiz, ilo, iloz, ilwork, info, lwork, n
19 * ..
20 * .. Array Arguments ..
21  INTEGER desca( * ), descz( * ), iwork( * )
22  DOUBLE PRECISION a( * ), wi( * ), work( * ), wr( * ), z( * )
23 * ..
24 *
25 * Purpose
26 * =======
27 *
28 * PDLAQR1 is an auxiliary routine used to find the Schur decomposition
29 * and or eigenvalues of a matrix already in Hessenberg form from
30 * cols ILO to IHI.
31 *
32 * This is a modified version of PDLAHQR from ScaLAPACK version 1.7.3.
33 * The following modifications were made:
34 * o Recently removed workspace query functionality was added.
35 * o Aggressive early deflation is implemented.
36 * o Aggressive deflation (looking for two consecutive small
37 * subdiagonal elements by PDLACONSB) is abandoned.
38 * o The returned Schur form is now in canonical form, i.e., the
39 * returned 2-by-2 blocks really correspond to complex conjugate
40 * pairs of eigenvalues.
41 * o For some reason, the original version of PDLAHQR sometimes did
42 * not read out the converged eigenvalues correclty. This is now
43 * fixed.
44 *
45 * Notes
46 * =====
47 *
48 * Each global data object is described by an associated description
49 * vector. This vector stores the information required to establish
50 * the mapping between an object element and its corresponding process
51 * and memory location.
52 *
53 * Let A be a generic term for any 2D block cyclicly distributed array.
54 * Such a global array has an associated description vector DESCA.
55 * In the following comments, the character _ should be read as
56 * "of the global array".
57 *
58 * NOTATION STORED IN EXPLANATION
59 * --------------- -------------- --------------------------------------
60 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
61 * DTYPE_A = 1.
62 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
63 * the BLACS process grid A is distribu-
64 * ted over. The context itself is glo-
65 * bal, but the handle (the integer
66 * value) may vary.
67 * M_A (global) DESCA( M_ ) The number of rows in the global
68 * array A.
69 * N_A (global) DESCA( N_ ) The number of columns in the global
70 * array A.
71 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
72 * the rows of the array.
73 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
74 * the columns of the array.
75 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
76 * row of the array A is distributed.
77 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
78 * first column of the array A is
79 * distributed.
80 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
81 * array. LLD_A >= MAX(1,LOCr(M_A)).
82 *
83 * Let K be the number of rows or columns of a distributed matrix,
84 * and assume that its process grid has dimension p x q.
85 * LOCr( K ) denotes the number of elements of K that a process
86 * would receive if K were distributed over the p processes of its
87 * process column.
88 * Similarly, LOCc( K ) denotes the number of elements of K that a
89 * process would receive if K were distributed over the q processes of
90 * its process row.
91 * The values of LOCr() and LOCc() may be determined via a call to the
92 * ScaLAPACK tool function, NUMROC:
93 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
94 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
95 * An upper bound for these quantities may be computed by:
96 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
97 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
98 *
99 * Arguments
100 * =========
101 *
102 * WANTT (global input) LOGICAL
103 * = .TRUE. : the full Schur form T is required;
104 * = .FALSE.: only eigenvalues are required.
105 *
106 * WANTZ (global input) LOGICAL
107 * = .TRUE. : the matrix of Schur vectors Z is required;
108 * = .FALSE.: Schur vectors are not required.
109 *
110 * N (global input) INTEGER
111 * The order of the Hessenberg matrix A (and Z if WANTZ).
112 * N >= 0.
113 *
114 * ILO (global input) INTEGER
115 * IHI (global input) INTEGER
116 * It is assumed that A is already upper quasi-triangular in
117 * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
118 * ILO = 1). PDLAQR1 works primarily with the Hessenberg
119 * submatrix in rows and columns ILO to IHI, but applies
120 * transformations to all of H if WANTT is .TRUE..
121 * 1 <= ILO <= max(1,IHI); IHI <= N.
122 *
123 * A (global input/output) DOUBLE PRECISION array, dimension
124 * (DESCA(LLD_),*)
125 * On entry, the upper Hessenberg matrix A.
126 * On exit, if WANTT is .TRUE., A is upper quasi-triangular in
127 * rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
128 * blocks not yet in standard form. If WANTT is .FALSE., the
129 * contents of A are unspecified on exit.
130 *
131 * DESCA (global and local input) INTEGER array of dimension DLEN_.
132 * The array descriptor for the distributed matrix A.
133 *
134 * WR (global replicated output) DOUBLE PRECISION array,
135 * dimension (N)
136 * WI (global replicated output) DOUBLE PRECISION array,
137 * dimension (N)
138 * The real and imaginary parts, respectively, of the computed
139 * eigenvalues ILO to IHI are stored in the corresponding
140 * elements of WR and WI. If two eigenvalues are computed as a
141 * complex conjugate pair, they are stored in consecutive
142 * elements of WR and WI, say the i-th and (i+1)th, with
143 * WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
144 * eigenvalues are stored in the same order as on the diagonal
145 * of the Schur form returned in A. A may be returned with
146 * larger diagonal blocks until the next release.
147 *
148 * ILOZ (global input) INTEGER
149 * IHIZ (global input) INTEGER
150 * Specify the rows of Z to which transformations must be
151 * applied if WANTZ is .TRUE..
152 * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
153 *
154 * Z (global input/output) DOUBLE PRECISION array.
155 * If WANTZ is .TRUE., on entry Z must contain the current
156 * matrix Z of transformations accumulated by PDHSEQR, and on
157 * exit Z has been updated; transformations are applied only to
158 * the submatrix Z(ILOZ:IHIZ,ILO:IHI).
159 * If WANTZ is .FALSE., Z is not referenced.
160 *
161 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
162 * The array descriptor for the distributed matrix Z.
163 *
164 * WORK (local output) DOUBLE PRECISION array of size LWORK
165 *
166 * LWORK (local input) INTEGER
167 * WORK(LWORK) is a local array and LWORK is assumed big enough
168 * so that LWORK >= 6*N + 6*385*385 +
169 * MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N),
170 * 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) )
171 *
172 * IWORK (global and local input) INTEGER array of size ILWORK
173 *
174 * ILWORK (local input) INTEGER
175 * This holds the some of the IBLK integer arrays. This is held
176 * as a place holder for the next release.
177 *
178 * INFO (global output) INTEGER
179 * < 0: parameter number -INFO incorrect or inconsistent
180 * = 0: successful exit
181 * > 0: PDLAQR1 failed to compute all the eigenvalues ILO to IHI
182 * in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
183 * elements i+1:ihi of WR and WI contain those eigenvalues
184 * which have been successfully computed.
185 *
186 * Logic:
187 * This algorithm is very similar to _LAHQR. Unlike _LAHQR,
188 * instead of sending one double shift through the largest
189 * unreduced submatrix, this algorithm sends multiple double shifts
190 * and spaces them apart so that there can be parallelism across
191 * several processor row/columns. Another critical difference is
192 * that this algorithm aggregrates multiple transforms together in
193 * order to apply them in a block fashion.
194 *
195 * Important Local Variables:
196 * IBLK = The maximum number of bulges that can be computed.
197 * Currently fixed. Future releases this won't be fixed.
198 * HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
199 * ROTN = The number of transforms to block together
200 * NBULGE = The number of bulges that will be attempted on the
201 * current submatrix.
202 * IBULGE = The current number of bulges started.
203 * K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
204 *
205 * Subroutines:
206 * This routine calls:
207 * PDLAWIL -> Given the shift, get the transformation
208 * DLASORTE -> Pair up eigenvalues so that reals are paired.
209 * PDLACP3 -> Parallel array to local replicated array copy &
210 * back.
211 * DLAREF -> Row/column reflector applier. Core routine here.
212 * PDLASMSUB -> Finds negligible subdiagonal elements.
213 *
214 * Current Notes and/or Restrictions:
215 * 1.) This code requires the distributed block size to be square
216 * and at least six (6); unlike simpler codes like LU, this
217 * algorithm is extremely sensitive to block size. Unwise
218 * choices of too small a block size can lead to bad
219 * performance.
220 * 2.) This code requires A and Z to be distributed identically
221 * and have identical contxts.
222 * 3.) This release currently does not have a routine for
223 * resolving the Schur blocks into regular 2x2 form after
224 * this code is completed. Because of this, a significant
225 * performance impact is required while the deflation is done
226 * by sometimes a single column of processors.
227 * 4.) This code does not currently block the initial transforms
228 * so that none of the rows or columns for any bulge are
229 * completed until all are started. To offset pipeline
230 * start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
231 * bulges are used (if possible)
232 * 5.) The maximum number of bulges currently supported is fixed at
233 * 32. In future versions this will be limited only by the
234 * incoming WORK array.
235 * 6.) The matrix A must be in upper Hessenberg form. If elements
236 * below the subdiagonal are nonzero, the resulting transforms
237 * may be nonsimilar. This is also true with the LAPACK
238 * routine.
239 * 7.) For this release, it is assumed RSRC_=CSRC_=0
240 * 8.) Currently, all the eigenvalues are distributed to all the
241 * nodes. Future releases will probably distribute the
242 * eigenvalues by the column partitioning.
243 * 9.) The internals of this routine are subject to change.
244 *
245 * Implemented by: G. Henry, November 17, 1996
246 *
247 * Modified by Robert Granat and Meiyue Shao, Department of Computing
248 * Science and HPC2N, Umea University, Sweden
249 *
250 * =====================================================================
251 *
252 * .. Parameters ..
253  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
254  $ lld_, mb_, m_, nb_, n_, rsrc_
255  PARAMETER ( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
256  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
257  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
258  DOUBLE PRECISION zero, one, half
259  PARAMETER ( zero = 0.0d+0, one = 1.0d+0, half = 0.5d+0 )
260  DOUBLE PRECISION const
261  parameter( const = 1.50d+0 )
262  INTEGER iblk, lds
263  parameter( iblk = 32, lds = 12*iblk+1 )
264 * ..
265 * .. Local Scalars ..
266  INTEGER contxt, down, hbl, i, i1, i2, iafirst, ibulge,
267  $ icbuf, icol, icol1, icol2, ierr, ii,
268  $ irbuf, irow, irow1, irow2, ispec, istart,
269  $ istartcol, istartrow, istop, isub,
270  $ itermax, itmp1, itmp2, itn, its, j, jafirst,
271  $ jblk, jj, k, ki, l, lcmrc, lda, ldz, left,
272  $ lihih, lihiz, liloh, liloz, locali1, locali2,
273  $ localk, localm, m, modkm1, mycol, myrow,
274  $ nbulge, nh, node, npcol, nprow, nr, num, nz,
275  $ right, rotn, up, vecsidx, totit, totns, totsw,
276  $ dblk, nibble, nd, ns, ltop, lwkopt, s1, s2, s3
277  DOUBLE PRECISION ave, disc, h00, h10, h11, h12, h21, h22, h33,
278  $ h43h34, h44, ovfl, s, smlnum, sum, t1, t1copy,
279  $ t2, t3, ulp, unfl, v1save, v2, v2save, v3,
280  $ v3save, sn, cs, swap
281  LOGICAL aed
282 * ..
283 * .. Local Arrays ..
284  INTEGER icurcol( iblk ), icurrow( iblk ), k1( iblk ),
285  $ k2( iblk ), kcol( iblk ), kp2col( iblk ),
286  $ kp2row( iblk ), krow( iblk ), localk2( iblk )
287  DOUBLE PRECISION smalla( 6, 6, iblk ), vcopy( 3 )
288 * ..
289 * .. External Functions ..
290  INTEGER ilcm, numroc, ilaenv
291  DOUBLE PRECISION pdlamch
292  EXTERNAL ilcm, numroc, ilaenv, pdlamch
293 * ..
294 * .. External Subroutines ..
295  EXTERNAL blacs_gridinfo, dcopy, dgebr2d, dgebs2d,
296  $ dgerv2d, dgesd2d, dgsum2d, dlahqr, dlaref,
297  $ dlarfg, dlasorte, igamn2d, infog1l, infog2l,
299  $ pdlawil, pxerbla, dlanv2, pdlaqr2, pdlaqr4
300 * ..
301 * .. Intrinsic Functions ..
302  INTRINSIC abs, dble, max, min, mod, sign, sqrt
303 * ..
304 * .. Executable Statements ..
305 *
306  info = 0
307 *
308  itermax = 30*( ihi-ilo+1 )
309  IF( n.EQ.0 )
310  $ RETURN
311 *
312 * NODE (IAFIRST,JAFIRST) OWNS A(1,1)
313 *
314  hbl = desca( mb_ )
315  contxt = desca( ctxt_ )
316  lda = desca( lld_ )
317  iafirst = desca( rsrc_ )
318  jafirst = desca( csrc_ )
319  ldz = descz( lld_ )
320  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
321  node = myrow*npcol + mycol
322  num = nprow*npcol
323  left = mod( mycol+npcol-1, npcol )
324  right = mod( mycol+1, npcol )
325  up = mod( myrow+nprow-1, nprow )
326  down = mod( myrow+1, nprow )
327  lcmrc = ilcm( nprow, npcol )
328  totit = 0
329  totns = 0
330  totsw = 0
331 *
332 * Determine the number of columns we have so we can check workspace
333 *
334  localk = numroc( n, hbl, mycol, jafirst, npcol )
335  jj = n / hbl
336  IF( jj*hbl.LT.n )
337  $ jj = jj + 1
338  jj = 7*jj / lcmrc
339  lwkopt = int( 6*n+max( 3*max( lda, ldz )+2*localk, jj )
340  $ +6*lds*lds )
341  IF( lwork.EQ.-1 .OR. ilwork.EQ.-1 ) THEN
342  work( 1 ) = dble( lwkopt )
343  iwork( 1 ) = 3
344  RETURN
345  ELSEIF( lwork.LT.lwkopt ) THEN
346  info = -15
347  END IF
348  IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN
349  info = -( 1300+ctxt_ )
350  END IF
351  IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
352  info = -( 700+nb_ )
353  END IF
354  IF( descz( mb_ ).NE.descz( nb_ ) ) THEN
355  info = -( 1300+nb_ )
356  END IF
357  IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
358  info = -( 1300+mb_ )
359  END IF
360  IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN
361  info = -4
362  END IF
363  IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN
364  info = -5
365  END IF
366  IF( hbl.LT.5 ) THEN
367  info = -( 700+mb_ )
368  END IF
369  CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,
370  $ -1, -1 )
371  IF( info.LT.0 ) THEN
372  CALL pxerbla( contxt, 'PDLAQR1', -info )
373  work( 1 ) = dble( lwkopt )
374  RETURN
375  END IF
376 *
377 * Set work array indices
378 *
379  s1 = 0
380  s2 = s1+lds*lds
381  s3 = s2+lds*lds
382  vecsidx = s3+4*lds*lds
383  isub = vecsidx+3*n
384  irbuf = isub+n
385  icbuf = irbuf+n
386 *
387 * Find a value for ROTN
388 *
389  rotn = hbl / 3
390  rotn = max( rotn, hbl-2 )
391  rotn = min( rotn, 1 )
392 *
393  IF( ilo.EQ.ihi ) THEN
394  CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
395  $ irow, icol, ii, jj )
396  IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
397  wr( ilo ) = a( ( icol-1 )*lda+irow )
398  ELSE
399  wr( ilo ) = zero
400  END IF
401  wi( ilo ) = zero
402  work( 1 ) = dble( lwkopt )
403  RETURN
404  END IF
405 *
406  nh = ihi - ilo + 1
407  nz = ihiz - iloz + 1
408 *
409 * If the diagonal block is small enough, copy it to local memory and
410 * call DLAHQR directly.
411 *
412  IF( nh .LE. lds ) THEN
413  CALL pdlaqr4( wantt, wantz, n, ilo, ihi, a, desca, wr, wi,
414  $ iloz, ihiz, z, descz, work( s1+1 ), nh,
415  $ work( s2+1 ), nh, work( s3+1 ), 4*lds*lds,
416  $ info )
417  work( 1 ) = dble( lwkopt )
418  RETURN
419  END IF
420 *
421  CALL infog1l( iloz, hbl, nprow, myrow, descz(rsrc_), liloz, lihiz)
422  lihiz = numroc( ihiz, hbl, myrow, descz(rsrc_), nprow )
423 *
424 * Set machine-dependent constants for the stopping criterion.
425 * If NORM(H) <= SQRT(OVFL), overflow should not occur.
426 *
427  unfl = pdlamch( contxt, 'SAFE MINIMUM' )
428  ovfl = one / unfl
429  CALL pdlabad( contxt, unfl, ovfl )
430  ulp = pdlamch( contxt, 'PRECISION' )
431  smlnum = unfl*( nh / ulp )
432 *
433 * I1 and I2 are the indices of the first row and last column of H
434 * to which transformations must be applied. If eigenvalues only are
435 * being computed, I1 and I2 are set inside the main loop.
436 *
437  IF( wantt ) THEN
438  i1 = 1
439  i2 = n
440  END IF
441 *
442 * ITN is the total number of QR iterations allowed.
443 *
444  itn = itermax
445 *
446 * The main loop begins here. I is the loop index and decreases from
447 * IHI to ILO in steps of our schur block size (<=2*IBLK). Each
448 * iteration of the loop works with the active submatrix in rows
449 * and columns L to I. Eigenvalues I+1 to IHI have already
450 * converged. Either L = ILO or the global A(L,L-1) is negligible
451 * so that the matrix splits.
452 *
453  i = ihi
454  10 CONTINUE
455  l = ilo
456  IF( i.LT.ilo )
457  $ GO TO 450
458 *
459 * Perform QR iterations on rows and columns ILO to I until a
460 * submatrix of order 1 or 2 splits off at the bottom because a
461 * subdiagonal element has become negligible.
462 *
463  DO 420 its = 0, itn
464  totit = totit + 1
465 *
466 * Look for a single small subdiagonal element.
467 *
468  CALL pdlasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),
469  $ lwork-irbuf )
470  l = k
471 *
472  IF( l.GT.ilo ) THEN
473 *
474 * H(L,L-1) is negligible
475 *
476  CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,
477  $ irow, icol, itmp1, itmp2 )
478  IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
479  a( ( icol-1 )*lda+irow ) = zero
480  END IF
481  work( isub+l-1 ) = zero
482  END IF
483 *
484 * Exit from loop if a small submatrix has split off.
485 *
486  m = l - 10
487  IF ( l .GT. i - lds )
488  $ GO TO 430
489 *
490 * Now the active submatrix is in rows and columns L to I. If
491 * eigenvalues only are being computed, only the active submatrix
492 * need be transformed.
493 *
494  IF( .NOT.wantt ) THEN
495  i1 = l
496  i2 = i
497  END IF
498 *
499 * Copy submatrix of size 2*JBLK and prepare to do generalized
500 * Wilkinson shift or an exceptional shift
501 *
502  nh = i-l+1
503  aed = .true.
504  jblk = min( iblk, ( nh / 2 )-1 )
505  IF( jblk.GT.lcmrc ) THEN
506 *
507 * Make sure it's divisible by LCM (we want even workloads!)
508 *
509  jblk = jblk - mod( jblk, lcmrc )
510  END IF
511  jblk = min( jblk, 2*lcmrc )
512  jblk = max( jblk, 1 )
513 *
514  IF( its.EQ.20 .OR. its.EQ.40 ) THEN
515 *
516 * Exceptional shift.
517 *
518  CALL pdlacp3( 2*jblk, i-2*jblk+1, a, desca, work( s1+1 ),
519  $ lds, -1, -1, 0 )
520  DO 20 ii = 2*jblk, 2, -1
521  work( s1+ii+(ii-1)*lds ) = const*(
522  $ abs( work( s1+ii+(ii-1)*lds ) )+
523  $ abs( work( s1+ii+(ii-2)*lds ) ) )
524  work( s1+ii+(ii-2)*lds ) = zero
525  work( s1+ii-1+(ii-1)*lds ) = zero
526  20 CONTINUE
527  work( s1+1 ) = const*abs( work( s1+1 ) )
528  ELSE
529 *
530 * Aggressive early deflation.
531 *
532  IF( aed ) THEN
533  dblk = ilaenv( 13, 'DLAQR0', 'SV', n, l, i, 4*lds*lds )
534  dblk = max( 2*jblk, dblk ) + 1
535  dblk = min( nh, lds, dblk )
536  CALL pdlaqr2( wantt, wantz, n, l, i, dblk, a, desca,
537  $ iloz, ihiz, z, descz, ns, nd, wr, wi,
538  $ work( s1+1 ), lds, work( s2+1 ), dblk,
539  $ work( irbuf+1 ), work( icbuf+1 ),
540  $ work( s3+1 ), 4*lds*lds )
541 *
542 * Skip a QR sweep if enough eigenvalues are deflated.
543 *
544  nibble = ilaenv( 14, 'DLAQR0', 'SV', n, l, i, 4*lds*lds )
545  nibble = max( 0, nibble )
546  i = i - nd
547  dblk = dblk - nd
548  IF( 100*nd .GT. nibble*nh .OR. dblk .LT. 2*jblk ) GOTO 10
549 *
550 * Use unconverged eigenvalues as shifts for the QR sweep.
551 * (This option is turned off because of the quality of
552 * these shifts are not so good.)
553 *
554 * IF( ND.GE.0 .AND. ND+DBLK.GE.64 ) THEN
555  IF( .false. ) THEN
556  CALL dlaset( 'L', dblk-1, dblk-1, zero, zero,
557  $ work( s1+2 ), lds )
558  work( irbuf+1 ) = work( s1+1 )
559  work( icbuf+1 ) = zero
560 *
561 * Shuffle shifts into pairs of real shifts and pairs of
562 * complex conjugate shifts assuming complex conjugate
563 * shifts are already adjacent to one another.
564 *
565  DO 21 ii = dblk, 3, -2
566  IF( work( icbuf+ii ).NE.-work( icbuf+ii-1 ) ) THEN
567  swap = work( irbuf+ii )
568  work( irbuf+ii ) = work( irbuf+ii-1 )
569  work( irbuf+ii-1 ) = work( irbuf+ii-2 )
570  work( irbuf+ii-2 ) = swap
571  swap = work( icbuf+ii )
572  work( icbuf+ii ) = work( icbuf+ii-1 )
573  work( icbuf+ii-1 ) = work( icbuf+ii-2 )
574  work( icbuf+ii-2 ) = swap
575  END IF
576  21 CONTINUE
577 *
578 * Copy undeflatable eigenvalues to the diagonal of S1.
579 *
580  ii = 2
581  22 CONTINUE
582  IF( work( icbuf+ii ) .EQ. zero ) THEN
583  work( s1+ii+(ii-1)*lds ) = work( irbuf+ii )
584  work( s1+ii+(ii-2)*lds ) = zero
585  ii = ii + 1
586  ELSE
587  work( s1+ii+(ii-1)*lds ) = work( irbuf+ii )
588  work( s1+ii+1+ii*lds ) = work( irbuf+ii )
589  work( s1+ii+1+(ii-1)*lds ) = work( icbuf+ii )
590  work( s1+ii+ii*lds ) = -work( icbuf+ii )
591  ii = ii + 2
592  END IF
593  IF( ii .LE. dblk ) GOTO 22
594  ELSE
595  CALL dlahqr( .false., .false., dblk, 1, dblk,
596  $ work( s1+1 ), lds, work( irbuf+1 ),
597  $ work( icbuf+1 ), 1, dblk, z, ldz, ierr )
598  END IF
599  ELSE
600  dblk = 2*jblk
601  CALL pdlacp3( dblk, i-dblk+1, a, desca, work( s1+1 ),
602  $ lds, -1, -1, 0 )
603  CALL dlahqr( .false., .false., dblk, 1, dblk,
604  $ work( s1+1 ), lds, work( irbuf+1 ),
605  $ work( icbuf+1 ), 1, dblk, z, ldz, ierr )
606  END IF
607  totsw = totsw + 1
608 *
609 * Prepare to use Wilkinson's double shift
610 *
611  h44 = work( s1+dblk+(dblk-1)*lds )
612  h33 = work( s1+dblk-1+(dblk-2)*lds )
613  h43h34 = work( s1+dblk-1+(dblk-1)*lds )*
614  $ work( s1+dblk+(dblk-2)*lds )
615  IF( ( jblk.GT.1 ) .AND. ( its.GT.30 ) ) THEN
616  s = work( s1+dblk-1+(dblk-3)*lds )
617  disc = ( h33-h44 )*half
618  disc = disc*disc + h43h34
619  IF( disc.GT.zero ) THEN
620 *
621 * Real roots: Use Wilkinson's shift twice
622 *
623  disc = sqrt( disc )
624  ave = half*( h33+h44 )
625  IF( abs( h33 )-abs( h44 ).GT.zero ) THEN
626  h33 = h33*h44 - h43h34
627  h44 = h33 / ( sign( disc, ave )+ave )
628  ELSE
629  h44 = sign( disc, ave ) + ave
630  END IF
631  h33 = h44
632  h43h34 = zero
633  END IF
634  END IF
635  END IF
636 *
637 * Look for two consecutive small subdiagonal elements:
638 * PDLACONSB is the routine that does this.
639 *
640 * CALL PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34,
641 * $ WORK( IRBUF+1 ), LWORK-IRBUF )
642 *
643 * Skip small submatrices
644 *
645 * IF ( M .GE. I - 5 )
646 * $ GO TO 80
647 *
648 * In principle PDLACONSB needs to check all shifts to decide
649 * whether two consecutive small subdiagonal entries are suitable
650 * as the starting position of the bulge chasing phase. It can be
651 * dangerous to check the first pair of shifts only. Moreover it
652 * is quite rare to obtain an M which is much larger than L. This
653 * process is a bit expensive compared with the benefit.
654 * Therefore it is sensible to abandon this routine. Total amount
655 * of communications is saved in average.
656 *
657  m = l
658 *
659 * Double-shift QR step
660 *
661 * NBULGE is the number of bulges that will be attempted
662 *
663  istop = min( m+rotn-mod( m, rotn ), i-2 )
664  istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
665  istop = min( istop, i2-2 )
666  istop = max( istop, m )
667  nbulge = ( i-1-istop ) / hbl
668 *
669 * Do not exceed maximum determined.
670 *
671  nbulge = min( nbulge, jblk )
672  IF( nbulge.GT.lcmrc ) THEN
673 *
674 * Make sure it's divisible by LCM (we want even workloads!)
675 *
676  nbulge = nbulge - mod( nbulge, lcmrc )
677  END IF
678  nbulge = max( nbulge, 1 )
679 *
680  totns = totns + nbulge*2
681 *
682  IF( ( its.NE.20 ) .AND. ( its.NE.40 ) .AND. ( nbulge.GT.1 ) )
683  $ THEN
684 *
685 * sort the eigenpairs so that they are in twos for double
686 * shifts. only call if several need sorting
687 *
688 * CALL DLASORTE( S1( 2*( JBLK-NBULGE )+1,
689 * $ 2*( JBLK-NBULGE )+1 ), 3*IBLK, 2*NBULGE,
690 * $ WORK( IRBUF+1 ), IERR )
691  CALL dlasorte( work(s1+dblk-2*nbulge+1+(dblk-2*nbulge)*lds),
692  $ lds, 2*nbulge, work( irbuf+1 ), ierr )
693  END IF
694 *
695 * IBULGE is the number of bulges going so far
696 *
697  ibulge = 1
698 *
699 * "A" row defs : main row transforms from LOCALK to LOCALI2
700 *
701  CALL infog1l( m, hbl, npcol, mycol, desca(csrc_),itmp1,localk )
702  localk = numroc( n, hbl, mycol, desca(csrc_), npcol )
703  CALL infog1l( 1, hbl, npcol, mycol,desca(csrc_),icol1,locali2 )
704  locali2 = numroc( i2, hbl, mycol, desca(csrc_), npcol )
705 *
706 * "A" col defs : main col transforms from LOCALI1 to LOCALM
707 *
708  CALL infog1l( i1, hbl, nprow,myrow,desca(rsrc_),locali1,icol1 )
709  icol1 = numroc( n, hbl, myrow, desca(rsrc_), nprow )
710  CALL infog1l( 1, hbl, nprow, myrow, desca(rsrc_),localm,icol1 )
711  icol1 = numroc( min( m+3, i ), hbl, myrow, desca(rsrc_),nprow )
712 *
713 * Which row & column will start the bulges
714 *
715  istartrow = mod( ( m+1 ) / hbl + iafirst, nprow )
716  istartcol = mod( ( m+1 ) / hbl + jafirst, npcol )
717 *
718  CALL infog1l( m, hbl, nprow, myrow, desca(rsrc_), ii, itmp2 )
719  itmp2 = numroc( n, hbl, myrow, desca(rsrc_), nprow )
720  CALL infog1l( m, hbl, npcol, mycol, desca(csrc_), jj, itmp2 )
721  itmp2 = numroc( n, hbl, mycol, desca(csrc_), npcol )
722  CALL infog1l(1,hbl,nprow,myrow,desca(rsrc_),istop,kp2row( 1 ) )
723  kp2row( 1 ) = numroc( m+2, hbl, myrow, desca(rsrc_), nprow )
724  CALL infog1l(1,hbl,npcol,mycol,desca(csrc_),istop,kp2col( 1 ) )
725  kp2col( 1 ) = numroc( m+2, hbl, mycol, desca(csrc_), npcol )
726 *
727 * Set all values for bulges. All bulges are stored in
728 * intermediate steps as loops over KI. Their current "task"
729 * over the global M to I-1 values is always K1(KI) to K2(KI).
730 * However, because there are many bulges, K1(KI) & K2(KI) might
731 * go past that range while later bulges (KI+1,KI+2,etc..) are
732 * finishing up.
733 *
734 * Rules:
735 * If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
736 * If MOD(K1(KI)-1,HBL) = HBL-2 then MOD(K2(KI)-1,HBL)=HBL-2
737 * If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
738 * K2(KI)-K1(KI) <= ROTN
739 *
740 * We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
741 * it again when MOD(K1(KI)-1,HBL)=HBL-1.
742 *
743  DO 30 ki = 1, nbulge
744  k1( ki ) = m
745  istop = min( m+rotn-mod( m, rotn ), i-2 )
746  istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
747  istop = min( istop, i2-2 )
748  istop = max( istop, m )
749  k2( ki ) = istop
750  icurrow( ki ) = istartrow
751  icurcol( ki ) = istartcol
752  localk2( ki ) = itmp1
753  krow( ki ) = ii
754  kcol( ki ) = jj
755  IF( ki.GT.1 )
756  $ kp2row( ki ) = kp2row( 1 )
757  IF( ki.GT.1 )
758  $ kp2col( ki ) = kp2col( 1 )
759  30 CONTINUE
760 *
761 * Get first transform on node who owns M+2,M+2
762 *
763  DO 31 itmp1 = 1, 3
764  vcopy(itmp1) = zero
765  31 CONTINUE
766  itmp1 = istartrow
767  itmp2 = istartcol
768  CALL pdlawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,
769  $ vcopy )
770  v1save = vcopy( 1 )
771  v2save = vcopy( 2 )
772  v3save = vcopy( 3 )
773  IF( k2( ibulge ).LE.i-1 ) THEN
774  40 CONTINUE
775  IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )
776  $ THEN
777  IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+
778  $ 2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN
779  h44 = work( s1+dblk-2*ibulge+(dblk-2*ibulge-1)*lds )
780  h33 = work( s1+dblk-2*ibulge-1+(dblk-2*ibulge-2)*lds )
781  h43h34 = work( s1+dblk-2*ibulge-1+
782  $ (dblk-2*ibulge-1)*lds )
783  $ *work(s1+dblk-2*ibulge+(dblk-2*ibulge-2)*lds)
784  itmp1 = istartrow
785  itmp2 = istartcol
786  CALL pdlawil( itmp1, itmp2, m, a, desca, h44, h33,
787  $ h43h34, vcopy )
788  v1save = vcopy( 1 )
789  v2save = vcopy( 2 )
790  v3save = vcopy( 3 )
791  ibulge = ibulge + 1
792  END IF
793  END IF
794 *
795 * When we hit a border, there are row and column transforms that
796 * overlap over several processors and the code gets very
797 * "congested." As a remedy, when we first hit a border, a 6x6
798 * *local* matrix is generated on one node (called SMALLA) and
799 * work is done on that. At the end of the border, the data is
800 * passed back and everything stays a lot simpler.
801 *
802  DO 80 ki = 1, ibulge
803 *
804  istart = max( k1( ki ), m )
805  istop = min( k2( ki ), i-1 )
806  k = istart
807  modkm1 = mod( k-1, hbl )
808  IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
809  DO 81 itmp1 = 1, 6
810  DO 82 itmp2 = 1, 6
811  smalla(itmp1, itmp2, ki) = zero
812  82 CONTINUE
813  81 CONTINUE
814  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
815 *
816 * Copy 6 elements from global A(K-1:K+4,K-1:K+4)
817 *
818  CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,
819  $ mycol, irow1, icol1, itmp1, itmp2 )
820  CALL pdlacp3( min( 6, n-k+2 ), k-1, a, desca,
821  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
822  $ 0 )
823  END IF
824  IF( modkm1.EQ.hbl-1 ) THEN
825 *
826 * Copy 6 elements from global A(K-2:K+3,K-2:K+3)
827 *
828  CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
829  $ mycol, irow1, icol1, itmp1, itmp2 )
830  CALL pdlacp3( min( 6, n-k+3 ), k-2, a, desca,
831  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
832  $ 0 )
833  END IF
834  END IF
835 *
836 * DLAHQR used to have a single row application and a single
837 * column application to H. Here we do something a little
838 * more clever. We break each transformation down into 3
839 * parts:
840 * 1.) The minimum amount of work it takes to determine
841 * a group of ROTN transformations (this is on
842 * the critical path.) (Loops 130-180)
843 * 2.) The small work it takes so that each of the rows
844 * and columns is at the same place. For example,
845 * all ROTN row transforms are all complete
846 * through some column TMP. (Loops within 190)
847 * 3.) The majority of the row and column transforms
848 * are then applied in a block fashion.
849 * (Loops 290 on.)
850 *
851 * Each of these three parts are further subdivided into 3
852 * parts:
853 * A.) Work at the start of a border when
854 * MOD(ISTART-1,HBL) = HBL-2
855 * B.) Work at the end of a border when
856 * MOD(ISTART-1,HBL) = HBL-1
857 * C.) Work in the middle of the block when
858 * MOD(ISTART-1,HBL) < HBL-2
859 *
860  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
861  $ ( mycol.EQ.icurcol( ki ) ) .AND.
862  $ ( modkm1.EQ.hbl-2 ) .AND.
863  $ ( istart.LT.min( i-1, istop+1 ) ) ) THEN
864  k = istart
865  nr = min( 3, i-k+1 )
866  IF( k.GT.m ) THEN
867  CALL dcopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )
868  ELSE
869  vcopy( 1 ) = v1save
870  vcopy( 2 ) = v2save
871  vcopy( 3 ) = v3save
872  END IF
873  CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
874  IF( k.GT.m ) THEN
875  smalla( 2, 1, ki ) = vcopy( 1 )
876  smalla( 3, 1, ki ) = zero
877  IF( k.LT.i-1 )
878  $ smalla( 4, 1, ki ) = zero
879  ELSE IF( m.GT.l ) THEN
880  smalla( 2, 1, ki ) = -smalla( 2, 1, ki )
881  END IF
882  v2 = vcopy( 2 )
883  t2 = t1copy*v2
884  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
885  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
886  work( vecsidx+( k-1 )*3+3 ) = t1copy
887  END IF
888 *
889  IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.
890  $ ( myrow.EQ.icurrow( ki ) ) .AND.
891  $ ( mycol.EQ.icurcol( ki ) ) .AND.
892  $ ( istart.LE.min( i, istop ) ) ) THEN
893  k = istart
894  nr = min( 3, i-k+1 )
895  IF( k.GT.m ) THEN
896  CALL dcopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )
897  ELSE
898  vcopy( 1 ) = v1save
899  vcopy( 2 ) = v2save
900  vcopy( 3 ) = v3save
901  END IF
902  CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
903  IF( k.GT.m ) THEN
904  smalla( 3, 2, ki ) = vcopy( 1 )
905  smalla( 4, 2, ki ) = zero
906  IF( k.LT.i-1 )
907  $ smalla( 5, 2, ki ) = zero
908 *
909 * Set a subdiagonal to zero now if it's possible
910 *
911 * H11 = SMALLA(1,1,KI)
912 * H10 = SMALLA(2,1,KI)
913 * H22 = SMALLA(2,2,KI)
914 * IF ( ABS(H10) .LE. MAX(ULP*(ABS(H11)+ABS(H22)),
915 * $ SMLNUM) ) THEN
916 * SMALLA(2,1,KI) = ZERO
917 * WORK(ISUB+K-2) = ZERO
918 * END IF
919  ELSE IF( m.GT.l ) THEN
920  smalla( 3, 2, ki ) = -smalla( 3, 2, ki )
921  END IF
922  v2 = vcopy( 2 )
923  t2 = t1copy*v2
924  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
925  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
926  work( vecsidx+( k-1 )*3+3 ) = t1copy
927  END IF
928 *
929  IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.
930  $ ( myrow.EQ.icurrow( ki ) ) .AND.
931  $ ( right.EQ.icurcol( ki ) ) ) THEN
932 *
933 * (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
934 *
935  irow1 = krow( ki )
936  icol1 = localk2( ki )
937  IF( istart.GT.m ) THEN
938  vcopy( 1 ) = smalla( 4, 3, ki )
939  vcopy( 2 ) = smalla( 5, 3, ki )
940  vcopy( 3 ) = smalla( 6, 3, ki )
941  nr = min( 3, i-istart+1 )
942  CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
943  $ t1copy )
944  a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
945  a( ( icol1-2 )*lda+irow1+1 ) = zero
946  IF( istart.LT.i-1 ) THEN
947  a( ( icol1-2 )*lda+irow1+2 ) = zero
948  END IF
949  ELSE
950  IF( m.GT.l ) THEN
951  a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*
952  $ lda+irow1 )
953  END IF
954  END IF
955  END IF
956 *
957  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
958  $ ( mycol.EQ.icurcol( ki ) ) .AND.
959  $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
960  $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
961  $ 1 ) ) ) ) THEN
962 *
963 * (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
964 *
965  irow1 = krow( ki )
966  icol1 = localk2( ki )
967  DO 70 k = istart, istop
968 *
969 * Create and do these transforms
970 *
971  nr = min( 3, i-k+1 )
972  IF( k.GT.m ) THEN
973  IF( mod( k-1, hbl ).EQ.0 ) THEN
974  vcopy( 1 ) = smalla( 4, 3, ki )
975  vcopy( 2 ) = smalla( 5, 3, ki )
976  vcopy( 3 ) = smalla( 6, 3, ki )
977  ELSE
978  vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )
979  vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )
980  IF( nr.EQ.3 ) THEN
981  vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )
982  END IF
983  END IF
984  ELSE
985  vcopy( 1 ) = v1save
986  vcopy( 2 ) = v2save
987  vcopy( 3 ) = v3save
988  END IF
989  CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
990  $ t1copy )
991  IF( k.GT.m ) THEN
992  IF( mod( k-1, hbl ).GT.0 ) THEN
993  a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
994  a( ( icol1-2 )*lda+irow1+1 ) = zero
995  IF( k.LT.i-1 ) THEN
996  a( ( icol1-2 )*lda+irow1+2 ) = zero
997  END IF
998 *
999 * Set a subdiagonal to zero now if it's possible
1000 *
1001 * IF ( (IROW1.GT.2) .AND. (ICOL1.GT.2) .AND.
1002 * $ (MOD(K-1,HBL) .GT. 1) ) THEN
1003 * H11 = A((ICOL1-3)*LDA+IROW1-2)
1004 * H10 = A((ICOL1-3)*LDA+IROW1-1)
1005 * H22 = A((ICOL1-2)*LDA+IROW1-1)
1006 * IF ( ABS(H10).LE.MAX(ULP*(ABS(H11)+ABS(H22)),
1007 * $ SMLNUM) ) THEN
1008 * A((ICOL1-3)*LDA+IROW1-1) = ZERO
1009 * END IF
1010 * END IF
1011  END IF
1012  ELSE IF( m.GT.l ) THEN
1013  IF( mod( k-1, hbl ).GT.0 ) THEN
1014  a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*
1015  $ lda+irow1 )
1016  END IF
1017  END IF
1018  v2 = vcopy( 2 )
1019  t2 = t1copy*v2
1020  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
1021  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
1022  work( vecsidx+( k-1 )*3+3 ) = t1copy
1023  t1 = t1copy
1024  IF( k.LT.istop ) THEN
1025 *
1026 * Do some work so next step is ready...
1027 *
1028  v3 = vcopy( 3 )
1029  t3 = t1*v3
1030  DO 50 j = icol1, min( k2( ki )+1, i-1 ) +
1031  $ icol1 - k
1032  sum = a( ( j-1 )*lda+irow1 ) +
1033  $ v2*a( ( j-1 )*lda+irow1+1 ) +
1034  $ v3*a( ( j-1 )*lda+irow1+2 )
1035  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*lda+
1036  $ irow1 ) - sum*t1
1037  a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*lda+
1038  $ irow1+1 ) - sum*t2
1039  a( ( j-1 )*lda+irow1+2 ) = a( ( j-1 )*lda+
1040  $ irow1+2 ) - sum*t3
1041  50 CONTINUE
1042  itmp1 = localk2( ki )
1043  DO 60 j = irow1 + 1, irow1 + 3
1044  sum = a( ( icol1-1 )*lda+j ) +
1045  $ v2*a( icol1*lda+j ) +
1046  $ v3*a( ( icol1+1 )*lda+j )
1047  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+
1048  $ j ) - sum*t1
1049  a( icol1*lda+j ) = a( icol1*lda+j ) - sum*t2
1050  a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+
1051  $ j ) - sum*t3
1052  60 CONTINUE
1053  END IF
1054  irow1 = irow1 + 1
1055  icol1 = icol1 + 1
1056  70 CONTINUE
1057  END IF
1058 *
1059  IF( modkm1.EQ.hbl-2 ) THEN
1060  IF( ( down.EQ.icurrow( ki ) ) .AND.
1061  $ ( right.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )
1062  $ THEN
1063  CALL dgerv2d( contxt, 3, 1,
1064  $ work( vecsidx+( istart-1 )*3+1 ), 3,
1065  $ down, right )
1066  END IF
1067  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1068  $ ( mycol.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )
1069  $ THEN
1070  CALL dgesd2d( contxt, 3, 1,
1071  $ work( vecsidx+( istart-1 )*3+1 ), 3,
1072  $ up, left )
1073  END IF
1074  IF( ( down.EQ.icurrow( ki ) ) .AND.
1075  $ ( npcol.GT.1 ) .AND. ( istart.LE.istop ) ) THEN
1076  jj = mod( icurcol( ki )+npcol-1, npcol )
1077  IF( mycol.NE.jj ) THEN
1078  CALL dgebr2d( contxt, 'ROW', ' ',
1079  $ 3*( istop-istart+1 ), 1,
1080  $ work( vecsidx+( istart-1 )*3+1 ),
1081  $ 3*( istop-istart+1 ), myrow, jj )
1082  ELSE
1083  CALL dgebs2d( contxt, 'ROW', ' ',
1084  $ 3*( istop-istart+1 ), 1,
1085  $ work( vecsidx+( istart-1 )*3+1 ),
1086  $ 3*( istop-istart+1 ) )
1087  END IF
1088  END IF
1089  END IF
1090 *
1091 * Broadcast Householder information from the block
1092 *
1093  IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.
1094  $ ( istart.LE.istop ) ) THEN
1095  IF( mycol.NE.icurcol( ki ) ) THEN
1096  CALL dgebr2d( contxt, 'ROW', ' ',
1097  $ 3*( istop-istart+1 ), 1,
1098  $ work( vecsidx+( istart-1 )*3+1 ),
1099  $ 3*( istop-istart+1 ), myrow,
1100  $ icurcol( ki ) )
1101  ELSE
1102  CALL dgebs2d( contxt, 'ROW', ' ',
1103  $ 3*( istop-istart+1 ), 1,
1104  $ work( vecsidx+( istart-1 )*3+1 ),
1105  $ 3*( istop-istart+1 ) )
1106  END IF
1107  END IF
1108  80 CONTINUE
1109 *
1110 * Now do column transforms and finish work
1111 *
1112  DO 90 ki = 1, ibulge
1113 *
1114  istart = max( k1( ki ), m )
1115  istop = min( k2( ki ), i-1 )
1116 *
1117  IF( mod( istart-1, hbl ).EQ.hbl-2 ) THEN
1118  IF( ( right.EQ.icurcol( ki ) ) .AND.
1119  $ ( nprow.GT.1 ) .AND. ( istart.LE.istop ) ) THEN
1120  jj = mod( icurrow( ki )+nprow-1, nprow )
1121  IF( myrow.NE.jj ) THEN
1122  CALL dgebr2d( contxt, 'COL', ' ',
1123  $ 3*( istop-istart+1 ), 1,
1124  $ work( vecsidx+( istart-1 )*3+1 ),
1125  $ 3*( istop-istart+1 ), jj, mycol )
1126  ELSE
1127  CALL dgebs2d( contxt, 'COL', ' ',
1128  $ 3*( istop-istart+1 ), 1,
1129  $ work( vecsidx+( istart-1 )*3+1 ),
1130  $ 3*( istop-istart+1 ) )
1131  END IF
1132  END IF
1133  END IF
1134 *
1135  IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.
1136  $ ( istart.LE.istop ) ) THEN
1137  IF( myrow.NE.icurrow( ki ) ) THEN
1138  CALL dgebr2d( contxt, 'COL', ' ',
1139  $ 3*( istop-istart+1 ), 1,
1140  $ work( vecsidx+( istart-1 )*3+1 ),
1141  $ 3*( istop-istart+1 ), icurrow( ki ),
1142  $ mycol )
1143  ELSE
1144  CALL dgebs2d( contxt, 'COL', ' ',
1145  $ 3*( istop-istart+1 ), 1,
1146  $ work( vecsidx+( istart-1 )*3+1 ),
1147  $ 3*( istop-istart+1 ) )
1148  END IF
1149  END IF
1150  90 CONTINUE
1151 *
1152 * Now do make up work to have things in block fashion
1153 *
1154  DO 150 ki = 1, ibulge
1155  istart = max( k1( ki ), m )
1156  istop = min( k2( ki ), i-1 )
1157 *
1158  modkm1 = mod( istart-1, hbl )
1159  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1160  $ ( mycol.EQ.icurcol( ki ) ) .AND.
1161  $ ( modkm1.EQ.hbl-2 ) .AND. ( istart.LT.i-1 ) ) THEN
1162  k = istart
1163 *
1164 * Catch up on column & border work
1165 *
1166  nr = min( 3, i-k+1 )
1167  v2 = work( vecsidx+( k-1 )*3+1 )
1168  v3 = work( vecsidx+( k-1 )*3+2 )
1169  t1 = work( vecsidx+( k-1 )*3+3 )
1170  IF( nr.EQ.3 ) THEN
1171 *
1172 * Do some work so next step is ready...
1173 *
1174 * V3 = VCOPY( 3 )
1175  t2 = t1*v2
1176  t3 = t1*v3
1177  itmp1 = min( 6, i2+2-k )
1178  itmp2 = max( i1-k+2, 1 )
1179  DO 100 j = 2, itmp1
1180  sum = smalla( 2, j, ki ) +
1181  $ v2*smalla( 3, j, ki ) +
1182  $ v3*smalla( 4, j, ki )
1183  smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum*t1
1184  smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t2
1185  smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t3
1186  100 CONTINUE
1187  DO 110 j = itmp2, 5
1188  sum = smalla( j, 2, ki ) +
1189  $ v2*smalla( j, 3, ki ) +
1190  $ v3*smalla( j, 4, ki )
1191  smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum*t1
1192  smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t2
1193  smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t3
1194  110 CONTINUE
1195  END IF
1196  END IF
1197 *
1198  IF( ( mod( istart-1, hbl ).EQ.hbl-1 ) .AND.
1199  $ ( istart.LE.istop ) .AND.
1200  $ ( myrow.EQ.icurrow( ki ) ) .AND.
1201  $ ( mycol.EQ.icurcol( ki ) ) ) THEN
1202  k = istop
1203 *
1204 * Catch up on column & border work
1205 *
1206  nr = min( 3, i-k+1 )
1207  v2 = work( vecsidx+( k-1 )*3+1 )
1208  v3 = work( vecsidx+( k-1 )*3+2 )
1209  t1 = work( vecsidx+( k-1 )*3+3 )
1210  IF( nr.EQ.3 ) THEN
1211 *
1212 * Do some work so next step is ready...
1213 *
1214 * V3 = VCOPY( 3 )
1215  t2 = t1*v2
1216  t3 = t1*v3
1217  itmp1 = min( 6, i2-k+3 )
1218  itmp2 = max( i1-k+3, 1 )
1219  DO 120 j = 3, itmp1
1220  sum = smalla( 3, j, ki ) +
1221  $ v2*smalla( 4, j, ki ) +
1222  $ v3*smalla( 5, j, ki )
1223  smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t1
1224  smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t2
1225  smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*t3
1226  120 CONTINUE
1227  DO 130 j = itmp2, 6
1228  sum = smalla( j, 3, ki ) +
1229  $ v2*smalla( j, 4, ki ) +
1230  $ v3*smalla( j, 5, ki )
1231  smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t1
1232  smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t2
1233  smalla( j, 5, ki ) = smalla( j, 5, ki ) - sum*t3
1234  130 CONTINUE
1235  END IF
1236  END IF
1237 *
1238  modkm1 = mod( istart-1, hbl )
1239  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1240  $ ( mycol.EQ.icurcol( ki ) ) .AND.
1241  $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
1242  $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
1243  $ 1 ) ) ) ) THEN
1244 *
1245 * (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
1246 *
1247  irow1 = krow( ki )
1248  icol1 = localk2( ki )
1249  DO 140 k = istart, istop
1250 *
1251 * Catch up on column & border work
1252 *
1253  nr = min( 3, i-k+1 )
1254  v2 = work( vecsidx+( k-1 )*3+1 )
1255  v3 = work( vecsidx+( k-1 )*3+2 )
1256  t1 = work( vecsidx+( k-1 )*3+3 )
1257  IF( k.LT.istop ) THEN
1258 *
1259 * Do some work so next step is ready...
1260 *
1261  t2 = t1*v2
1262  t3 = t1*v3
1263  CALL dlaref( 'Col', a, lda, .false., z, ldz,
1264  $ .false., icol1, icol1, istart,
1265  $ istop, min( istart+1, i )-k+irow1,
1266  $ irow1, liloz, lihiz,
1267  $ work( vecsidx+1 ), v2, v3, t1, t2,
1268  $ t3 )
1269  irow1 = irow1 + 1
1270  icol1 = icol1 + 1
1271  ELSE
1272  IF( ( nr.EQ.3 ) .AND. ( mod( k-1,
1273  $ hbl ).LT.hbl-2 ) ) THEN
1274  t2 = t1*v2
1275  t3 = t1*v3
1276  CALL dlaref( 'Row', a, lda, .false., z, ldz,
1277  $ .false., irow1, irow1, istart,
1278  $ istop, icol1, min( min( k2( ki )
1279  $ +1, i-1 ), i2 )-k+icol1, liloz,
1280  $ lihiz, work( vecsidx+1 ), v2,
1281  $ v3, t1, t2, t3 )
1282  END IF
1283  END IF
1284  140 CONTINUE
1285  END IF
1286 *
1287 * Send SMALLA back again.
1288 *
1289  k = istart
1290  modkm1 = mod( k-1, hbl )
1291  IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
1292  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
1293 *
1294 * Copy 6 elements to global A(K-1:K+4,K-1:K+4)
1295 *
1296  CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,
1297  $ mycol, irow1, icol1, itmp1, itmp2 )
1298  CALL pdlacp3( min( 6, n-k+2 ), k-1, a, desca,
1299  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1300  $ 1 )
1301 *
1302  END IF
1303  IF( modkm1.EQ.hbl-1 ) THEN
1304 *
1305 * Copy 6 elements to global A(K-2:K+3,K-2:K+3)
1306 *
1307  CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
1308  $ mycol, irow1, icol1, itmp1, itmp2 )
1309  CALL pdlacp3( min( 6, n-k+3 ), k-2, a, desca,
1310  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1311  $ 1 )
1312  END IF
1313  END IF
1314 *
1315  150 CONTINUE
1316 *
1317 * Now start major set of block ROW reflections
1318 *
1319  DO 160 ki = 1, ibulge
1320  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1321  $ ( down.NE.icurrow( ki ) ) )GO TO 160
1322  istart = max( k1( ki ), m )
1323  istop = min( k2( ki ), i-1 )
1324 *
1325  IF( ( istop.GT.istart ) .AND.
1326  $ ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.
1327  $ ( icurrow( ki ).EQ.myrow ) ) THEN
1328  irow1 = min( k2( ki )+1, i-1 ) + 1
1329  CALL infog1l( irow1, hbl, npcol, mycol, desca(csrc_),
1330  $ itmp1, itmp2 )
1331  itmp2 = numroc( i2, hbl, mycol, desca(csrc_), npcol )
1332  ii = krow( ki )
1333  CALL dlaref( 'Row', a, lda, wantz, z, ldz, .true., ii,
1334  $ ii, istart, istop, itmp1, itmp2, liloz,
1335  $ lihiz, work( vecsidx+1 ), v2, v3, t1, t2,
1336  $ t3 )
1337  END IF
1338  160 CONTINUE
1339 *
1340  DO 180 ki = 1, ibulge
1341  IF( krow( ki ).GT.kp2row( ki ) )
1342  $ GO TO 180
1343  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1344  $ ( down.NE.icurrow( ki ) ) )GO TO 180
1345  istart = max( k1( ki ), m )
1346  istop = min( k2( ki ), i-1 )
1347  IF( ( istart.EQ.istop ) .OR.
1348  $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1349  $ ( icurrow( ki ).NE.myrow ) ) THEN
1350  DO 170 k = istart, istop
1351  v2 = work( vecsidx+( k-1 )*3+1 )
1352  v3 = work( vecsidx+( k-1 )*3+2 )
1353  t1 = work( vecsidx+( k-1 )*3+3 )
1354  nr = min( 3, i-k+1 )
1355  IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1356  $ kp2row( ki ) ) ) THEN
1357  IF( ( k.LT.istop ) .AND.
1358  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1359  itmp1 = min( k2( ki )+1, i-1 ) + 1
1360  ELSE
1361  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1362  itmp1 = min( k2( ki )+1, i-1 ) + 1
1363  END IF
1364  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1365  itmp1 = min( k+4, i2 ) + 1
1366  END IF
1367  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1368  itmp1 = min( k+3, i2 ) + 1
1369  END IF
1370  END IF
1371 *
1372 * Find local coor of rows K through K+2
1373 *
1374  irow1 = krow( ki )
1375  irow2 = kp2row( ki )
1376  CALL infog1l( itmp1, hbl, npcol, mycol,
1377  $ desca(csrc_), icol1, icol2 )
1378  icol2 = numroc(i2,hbl,mycol,desca(csrc_),npcol )
1379  IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.
1380  $ ( nprow.EQ.1 ) ) THEN
1381  t2 = t1*v2
1382  t3 = t1*v3
1383  CALL dlaref( 'Row', a, lda, wantz, z, ldz,
1384  $ .false., irow1, irow1, istart,
1385  $ istop, icol1, icol2, liloz,
1386  $ lihiz, work( vecsidx+1 ), v2,
1387  $ v3, t1, t2, t3 )
1388  END IF
1389  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1390  $ ( nprow.GT.1 ) ) THEN
1391  IF( irow1.EQ.irow2 ) THEN
1392  CALL dgesd2d( contxt, 1, icol2-icol1+1,
1393  $ a( ( icol1-1 )*lda+irow2 ),
1394  $ lda, up, mycol )
1395  END IF
1396  END IF
1397  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1398  $ ( nprow.GT.1 ) ) THEN
1399  IF( irow1.EQ.irow2 ) THEN
1400  CALL dgesd2d( contxt, 1, icol2-icol1+1,
1401  $ a( ( icol1-1 )*lda+irow1 ),
1402  $ lda, down, mycol )
1403  END IF
1404  END IF
1405  END IF
1406  170 CONTINUE
1407  END IF
1408  180 CONTINUE
1409 *
1410  DO 220 ki = 1, ibulge
1411  IF( krow( ki ).GT.kp2row( ki ) )
1412  $ GO TO 220
1413  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1414  $ ( down.NE.icurrow( ki ) ) )GO TO 220
1415  istart = max( k1( ki ), m )
1416  istop = min( k2( ki ), i-1 )
1417  IF( ( istart.EQ.istop ) .OR.
1418  $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1419  $ ( icurrow( ki ).NE.myrow ) ) THEN
1420  DO 210 k = istart, istop
1421  v2 = work( vecsidx+( k-1 )*3+1 )
1422  v3 = work( vecsidx+( k-1 )*3+2 )
1423  t1 = work( vecsidx+( k-1 )*3+3 )
1424  nr = min( 3, i-k+1 )
1425  IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1426  $ kp2row( ki ) ) ) THEN
1427  IF( ( k.LT.istop ) .AND.
1428  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1429  itmp1 = min( k2( ki )+1, i-1 ) + 1
1430  ELSE
1431  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1432  itmp1 = min( k2( ki )+1, i-1 ) + 1
1433  END IF
1434  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1435  itmp1 = min( k+4, i2 ) + 1
1436  END IF
1437  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1438  itmp1 = min( k+3, i2 ) + 1
1439  END IF
1440  END IF
1441 *
1442  irow1 = krow( ki ) + k - istart
1443  irow2 = kp2row( ki ) + k - istart
1444  CALL infog1l( itmp1, hbl, npcol, mycol,
1445  $ desca(csrc_),icol1, icol2 )
1446  icol2 = numroc(i2,hbl,mycol,desca(csrc_),npcol )
1447  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1448  $ ( nprow.GT.1 ) ) THEN
1449  IF( irow1.NE.irow2 ) THEN
1450  CALL dgerv2d( contxt, 1, icol2-icol1+1,
1451  $ work( irbuf+1 ), 1, down,
1452  $ mycol )
1453  t2 = t1*v2
1454  t3 = t1*v3
1455  DO 190 j = icol1, icol2
1456  sum = a( ( j-1 )*lda+irow1 ) +
1457  $ v2*a( ( j-1 )*lda+irow1+1 ) +
1458  $ v3*work( irbuf+j-icol1+1 )
1459  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1460  $ lda+irow1 ) - sum*t1
1461  a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1462  $ lda+irow1+1 ) - sum*t2
1463  work( irbuf+j-icol1+1 ) = work( irbuf+
1464  $ j-icol1+1 ) - sum*t3
1465  190 CONTINUE
1466  CALL dgesd2d( contxt, 1, icol2-icol1+1,
1467  $ work( irbuf+1 ), 1, down,
1468  $ mycol )
1469  END IF
1470  END IF
1471  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1472  $ ( nprow.GT.1 ) ) THEN
1473  IF( irow1.NE.irow2 ) THEN
1474  CALL dgerv2d( contxt, 1, icol2-icol1+1,
1475  $ work( irbuf+1 ), 1, up,
1476  $ mycol )
1477  t2 = t1*v2
1478  t3 = t1*v3
1479  DO 200 j = icol1, icol2
1480  sum = work( irbuf+j-icol1+1 ) +
1481  $ v2*a( ( j-1 )*lda+irow1 ) +
1482  $ v3*a( ( j-1 )*lda+irow1+1 )
1483  work( irbuf+j-icol1+1 ) = work( irbuf+
1484  $ j-icol1+1 ) - sum*t1
1485  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1486  $ lda+irow1 ) - sum*t2
1487  a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1488  $ lda+irow1+1 ) - sum*t3
1489  200 CONTINUE
1490  CALL dgesd2d( contxt, 1, icol2-icol1+1,
1491  $ work( irbuf+1 ), 1, up,
1492  $ mycol )
1493  END IF
1494  END IF
1495  END IF
1496  210 CONTINUE
1497  END IF
1498  220 CONTINUE
1499 *
1500  DO 240 ki = 1, ibulge
1501  IF( krow( ki ).GT.kp2row( ki ) )
1502  $ GO TO 240
1503  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1504  $ ( down.NE.icurrow( ki ) ) )GO TO 240
1505  istart = max( k1( ki ), m )
1506  istop = min( k2( ki ), i-1 )
1507  IF( ( istart.EQ.istop ) .OR.
1508  $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1509  $ ( icurrow( ki ).NE.myrow ) ) THEN
1510  DO 230 k = istart, istop
1511  v2 = work( vecsidx+( k-1 )*3+1 )
1512  v3 = work( vecsidx+( k-1 )*3+2 )
1513  t1 = work( vecsidx+( k-1 )*3+3 )
1514  nr = min( 3, i-k+1 )
1515  IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1516  $ kp2row( ki ) ) ) THEN
1517  IF( ( k.LT.istop ) .AND.
1518  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1519  itmp1 = min( k2( ki )+1, i-1 ) + 1
1520  ELSE
1521  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1522  itmp1 = min( k2( ki )+1, i-1 ) + 1
1523  END IF
1524  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1525  itmp1 = min( k+4, i2 ) + 1
1526  END IF
1527  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1528  itmp1 = min( k+3, i2 ) + 1
1529  END IF
1530  END IF
1531 *
1532  irow1 = krow( ki ) + k - istart
1533  irow2 = kp2row( ki ) + k - istart
1534  CALL infog1l( itmp1, hbl, npcol, mycol,
1535  $ desca(csrc_), icol1, icol2 )
1536  icol2 = numroc(i2,hbl,mycol,desca(csrc_),npcol )
1537  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1538  $ ( nprow.GT.1 ) ) THEN
1539  IF( irow1.EQ.irow2 ) THEN
1540  CALL dgerv2d( contxt, 1, icol2-icol1+1,
1541  $ a( ( icol1-1 )*lda+irow2 ),
1542  $ lda, up, mycol )
1543  END IF
1544  END IF
1545  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1546  $ ( nprow.GT.1 ) ) THEN
1547  IF( irow1.EQ.irow2 ) THEN
1548  CALL dgerv2d( contxt, 1, icol2-icol1+1,
1549  $ a( ( icol1-1 )*lda+irow1 ),
1550  $ lda, down, mycol )
1551  END IF
1552  END IF
1553  END IF
1554  230 CONTINUE
1555  END IF
1556  240 CONTINUE
1557  250 CONTINUE
1558 *
1559 * Now start major set of block COL reflections
1560 *
1561  DO 260 ki = 1, ibulge
1562  IF( ( mycol.NE.icurcol( ki ) ) .AND.
1563  $ ( right.NE.icurcol( ki ) ) )GO TO 260
1564  istart = max( k1( ki ), m )
1565  istop = min( k2( ki ), i-1 )
1566 *
1567  IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.
1568  $ 1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.
1569  $ ( i-istop+1.GE.3 ) ) THEN
1570  k = istart
1571  IF( ( k.LT.istop ) .AND. ( mod( k-1,
1572  $ hbl ).LT.hbl-2 ) ) THEN
1573  itmp1 = min( istart+1, i ) - 1
1574  ELSE
1575  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1576  itmp1 = min( k+3, i )
1577  END IF
1578  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1579  itmp1 = max( i1, k-1 ) - 1
1580  END IF
1581  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1582  itmp1 = max( i1, k-2 ) - 1
1583  END IF
1584  END IF
1585 *
1586  icol1 = kcol( ki )
1587  CALL infog1l( i1, hbl, nprow, myrow, desca(rsrc_),
1588  $ irow1, irow2 )
1589  irow2 = numroc( itmp1, hbl, myrow,desca(rsrc_),nprow )
1590  IF( irow1.LE.irow2 ) THEN
1591  itmp2 = irow2
1592  ELSE
1593  itmp2 = -1
1594  END IF
1595  CALL dlaref( 'Col', a, lda, wantz, z, ldz, .true.,
1596  $ icol1, icol1, istart, istop, irow1,
1597  $ irow2, liloz, lihiz, work( vecsidx+1 ),
1598  $ v2, v3, t1, t2, t3 )
1599  k = istop
1600  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1601 *
1602 * Do from ITMP1+1 to MIN(K+3,I)
1603 *
1604  IF( mod( k-1, hbl ).LT.hbl-3 ) THEN
1605  irow1 = itmp2 + 1
1606  IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )
1607  $ THEN
1608  IF( itmp2.GT.0 ) THEN
1609  irow2 = itmp2 + min( k+3, i ) - itmp1
1610  ELSE
1611  irow2 = irow1 - 1
1612  END IF
1613  ELSE
1614  irow2 = irow1 - 1
1615  END IF
1616  ELSE
1617  CALL infog1l( itmp1+1, hbl, nprow, myrow,
1618  $ desca(rsrc_),irow1, irow2 )
1619  irow2 = numroc( min( k+3, i ), hbl, myrow,
1620  $ desca(rsrc_), nprow )
1621  END IF
1622  v2 = work( vecsidx+( k-1 )*3+1 )
1623  v3 = work( vecsidx+( k-1 )*3+2 )
1624  t1 = work( vecsidx+( k-1 )*3+3 )
1625  t2 = t1*v2
1626  t3 = t1*v3
1627  icol1 = kcol( ki ) + istop - istart
1628  CALL dlaref( 'Col', a, lda, .false., z, ldz,
1629  $ .false., icol1, icol1, istart, istop,
1630  $ irow1, irow2, liloz, lihiz,
1631  $ work( vecsidx+1 ), v2, v3, t1, t2,
1632  $ t3 )
1633  END IF
1634  END IF
1635  260 CONTINUE
1636 *
1637  DO 320 ki = 1, ibulge
1638  IF( kcol( ki ).GT.kp2col( ki ) )
1639  $ GO TO 320
1640  IF( ( mycol.NE.icurcol( ki ) ) .AND.
1641  $ ( right.NE.icurcol( ki ) ) )GO TO 320
1642  istart = max( k1( ki ), m )
1643  istop = min( k2( ki ), i-1 )
1644  IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1645 *
1646 * INFO is found in a buffer
1647 *
1648  ispec = 1
1649  ELSE
1650 *
1651 * All INFO is local
1652 *
1653  ispec = 0
1654  END IF
1655 *
1656  DO 310 k = istart, istop
1657 *
1658  v2 = work( vecsidx+( k-1 )*3+1 )
1659  v3 = work( vecsidx+( k-1 )*3+2 )
1660  t1 = work( vecsidx+( k-1 )*3+3 )
1661  nr = min( 3, i-k+1 )
1662  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
1663  $ THEN
1664 *
1665  IF( ( k.LT.istop ) .AND.
1666  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1667  itmp1 = min( istart+1, i ) - 1
1668  ELSE
1669  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1670  itmp1 = min( k+3, i )
1671  END IF
1672  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1673  itmp1 = max( i1, k-1 ) - 1
1674  END IF
1675  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1676  itmp1 = max( i1, k-2 ) - 1
1677  END IF
1678  END IF
1679  icol1 = kcol( ki ) + k - istart
1680  icol2 = kp2col( ki ) + k - istart
1681  CALL infog1l( i1, hbl, nprow, myrow, desca(rsrc_),
1682  $ irow1, irow2 )
1683  irow2 = numroc( itmp1, hbl, myrow, desca(rsrc_),
1684  $ nprow )
1685  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1686  $ ( npcol.GT.1 ) ) THEN
1687  IF( icol1.EQ.icol2 ) THEN
1688  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1689  $ a( ( icol1-1 )*lda+irow1 ),
1690  $ lda, myrow, left )
1691  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1692  $ a( ( icol1-1 )*lda+irow1 ),
1693  $ lda, myrow, left )
1694  ELSE
1695  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1696  $ work( icbuf+1 ), irow2-irow1+1,
1697  $ myrow, right )
1698  t2 = t1*v2
1699  t3 = t1*v3
1700  DO 270 j = irow1, irow2
1701  sum = a( ( icol1-1 )*lda+j ) +
1702  $ v2*a( icol1*lda+j ) +
1703  $ v3*work( icbuf+j-irow1+1 )
1704  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1705  $ lda+j ) - sum*t1
1706  a( icol1*lda+j ) = a( icol1*lda+j ) -
1707  $ sum*t2
1708  work( icbuf+j-irow1+1 ) = work( icbuf+j-
1709  $ irow1+1 ) - sum*t3
1710  270 CONTINUE
1711  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1712  $ work( icbuf+1 ), irow2-irow1+1,
1713  $ myrow, right )
1714  END IF
1715  END IF
1716  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1717  $ ( npcol.GT.1 ) ) THEN
1718  IF( icol1.EQ.icol2 ) THEN
1719  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1720  $ a( ( icol1-1 )*lda+irow1 ),
1721  $ lda, myrow, right )
1722  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1723  $ a( ( icol1-1 )*lda+irow1 ),
1724  $ lda, myrow, right )
1725  ELSE
1726  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1727  $ work( icbuf+1 ), irow2-irow1+1,
1728  $ myrow, left )
1729  t2 = t1*v2
1730  t3 = t1*v3
1731  DO 280 j = irow1, irow2
1732  sum = work( icbuf+j-irow1+1 ) +
1733  $ v2*a( ( icol1-1 )*lda+j ) +
1734  $ v3*a( icol1*lda+j )
1735  work( icbuf+j-irow1+1 ) = work( icbuf+j-
1736  $ irow1+1 ) - sum*t1
1737  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1738  $ lda+j ) - sum*t2
1739  a( icol1*lda+j ) = a( icol1*lda+j ) -
1740  $ sum*t3
1741  280 CONTINUE
1742  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1743  $ work( icbuf+1 ), irow2-irow1+1,
1744  $ myrow, left )
1745  END IF
1746  END IF
1747 *
1748 * If we want Z and we haven't already done any Z
1749  IF( ( wantz ) .AND. ( mod( k-1,
1750  $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
1751 *
1752 * Accumulate transformations in the matrix Z
1753 *
1754  irow1 = liloz
1755  irow2 = lihiz
1756  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1757  IF( icol1.EQ.icol2 ) THEN
1758  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1759  $ z( ( icol1-1 )*ldz+irow1 ),
1760  $ ldz, myrow, left )
1761  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1762  $ z( ( icol1-1 )*ldz+irow1 ),
1763  $ ldz, myrow, left )
1764  ELSE
1765  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1766  $ work( icbuf+1 ),
1767  $ irow2-irow1+1, myrow,
1768  $ right )
1769  t2 = t1*v2
1770  t3 = t1*v3
1771  icol1 = ( icol1-1 )*ldz
1772  DO 290 j = irow1, irow2
1773  sum = z( icol1+j ) +
1774  $ v2*z( icol1+j+ldz ) +
1775  $ v3*work( icbuf+j-irow1+1 )
1776  z( j+icol1 ) = z( j+icol1 ) - sum*t1
1777  z( j+icol1+ldz ) = z( j+icol1+ldz ) -
1778  $ sum*t2
1779  work( icbuf+j-irow1+1 ) = work( icbuf+
1780  $ j-irow1+1 ) - sum*t3
1781  290 CONTINUE
1782  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1783  $ work( icbuf+1 ),
1784  $ irow2-irow1+1, myrow,
1785  $ right )
1786  END IF
1787  END IF
1788  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1789  IF( icol1.EQ.icol2 ) THEN
1790  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1791  $ z( ( icol1-1 )*ldz+irow1 ),
1792  $ ldz, myrow, right )
1793  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1794  $ z( ( icol1-1 )*ldz+irow1 ),
1795  $ ldz, myrow, right )
1796  ELSE
1797  CALL dgerv2d( contxt, irow2-irow1+1, 1,
1798  $ work( icbuf+1 ),
1799  $ irow2-irow1+1, myrow, left )
1800  t2 = t1*v2
1801  t3 = t1*v3
1802  icol1 = ( icol1-1 )*ldz
1803  DO 300 j = irow1, irow2
1804  sum = work( icbuf+j-irow1+1 ) +
1805  $ v2*z( j+icol1 ) +
1806  $ v3*z( j+icol1+ldz )
1807  work( icbuf+j-irow1+1 ) = work( icbuf+
1808  $ j-irow1+1 ) - sum*t1
1809  z( j+icol1 ) = z( j+icol1 ) - sum*t2
1810  z( j+icol1+ldz ) = z( j+icol1+ldz ) -
1811  $ sum*t3
1812  300 CONTINUE
1813  CALL dgesd2d( contxt, irow2-irow1+1, 1,
1814  $ work( icbuf+1 ),
1815  $ irow2-irow1+1, myrow, left )
1816  END IF
1817  END IF
1818  END IF
1819  IF( icurcol( ki ).EQ.mycol ) THEN
1820  IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) ) THEN
1821  localk2( ki ) = localk2( ki ) + 1
1822  END IF
1823  ELSE
1824  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1825  $ ( icurcol( ki ).EQ.right ) ) THEN
1826  IF( k.GT.m ) THEN
1827  localk2( ki ) = localk2( ki ) + 2
1828  ELSE
1829  localk2( ki ) = localk2( ki ) + 1
1830  END IF
1831  END IF
1832  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1833  $ ( i-k.EQ.2 ) .AND. ( icurcol( ki ).EQ.
1834  $ right ) ) THEN
1835  localk2( ki ) = localk2( ki ) + 2
1836  END IF
1837  END IF
1838  END IF
1839  310 CONTINUE
1840  320 CONTINUE
1841 *
1842 * Column work done
1843 *
1844  330 CONTINUE
1845 *
1846 * Now do NR=2 work
1847 *
1848  DO 410 ki = 1, ibulge
1849  istart = max( k1( ki ), m )
1850  istop = min( k2( ki ), i-1 )
1851  IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1852 *
1853 * INFO is found in a buffer
1854 *
1855  ispec = 1
1856  ELSE
1857 *
1858 * All INFO is local
1859 *
1860  ispec = 0
1861  END IF
1862 *
1863  DO 400 k = istart, istop
1864 *
1865  v2 = work( vecsidx+( k-1 )*3+1 )
1866  v3 = work( vecsidx+( k-1 )*3+2 )
1867  t1 = work( vecsidx+( k-1 )*3+3 )
1868  nr = min( 3, i-k+1 )
1869  IF( nr.EQ.2 ) THEN
1870  IF ( icurrow( ki ).EQ.myrow ) THEN
1871  t2 = t1*v2
1872  END IF
1873  IF ( icurcol( ki ).EQ.mycol ) THEN
1874  t2 = t1*v2
1875  END IF
1876 *
1877 * Apply G from the left to transform the rows of the matrix
1878 * in columns K to I2.
1879 *
1880  CALL infog1l( k, hbl, npcol, mycol, desca(csrc_),
1881  $ liloh,lihih )
1882  lihih = numroc( i2, hbl, mycol, desca(csrc_),npcol)
1883  CALL infog1l( 1, hbl, nprow, myrow, desca(rsrc_),
1884  $ itmp2,itmp1 )
1885  itmp1 = numroc( k+1,hbl, myrow,desca(rsrc_),nprow )
1886  IF( icurrow( ki ).EQ.myrow ) THEN
1887  IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.
1888  $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
1889  itmp1 = itmp1 - 1
1890  DO 340 j = ( liloh-1 )*lda,
1891  $ ( lihih-1 )*lda, lda
1892  sum = a( itmp1+j ) + v2*a( itmp1+1+j )
1893  a( itmp1+j ) = a( itmp1+j ) - sum*t1
1894  a( itmp1+1+j ) = a( itmp1+1+j ) - sum*t2
1895  340 CONTINUE
1896  ELSE
1897  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1898  CALL dgerv2d( contxt, 1, lihih-liloh+1,
1899  $ work( irbuf+1 ), 1, up,
1900  $ mycol )
1901  DO 350 j = liloh, lihih
1902  sum = work( irbuf+j-liloh+1 ) +
1903  $ v2*a( ( j-1 )*lda+itmp1 )
1904  work( irbuf+j-liloh+1 ) = work( irbuf+
1905  $ j-liloh+1 ) - sum*t1
1906  a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*
1907  $ lda+itmp1 ) - sum*t2
1908  350 CONTINUE
1909  CALL dgesd2d( contxt, 1, lihih-liloh+1,
1910  $ work( irbuf+1 ), 1, up,
1911  $ mycol )
1912  END IF
1913  END IF
1914  ELSE
1915  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1916  $ ( icurrow( ki ).EQ.down ) ) THEN
1917  CALL dgesd2d( contxt, 1, lihih-liloh+1,
1918  $ a( ( liloh-1 )*lda+itmp1 ),
1919  $ lda, down, mycol )
1920  CALL dgerv2d( contxt, 1, lihih-liloh+1,
1921  $ a( ( liloh-1 )*lda+itmp1 ),
1922  $ lda, down, mycol )
1923  END IF
1924  END IF
1925 *
1926 * Apply G from the right to transform the columns of the
1927 * matrix in rows I1 to MIN(K+3,I).
1928 *
1929  CALL infog1l( i1, hbl, nprow, myrow, desca(rsrc_),
1930  $ liloh, lihih )
1931  lihih = numroc( i, hbl, myrow, desca(rsrc_),nprow )
1932 *
1933  IF( icurcol( ki ).EQ.mycol ) THEN
1934 * LOCAL A(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)
1935  IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
1936  $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
1937  CALL infog1l( k, hbl, npcol, mycol,
1938  $ desca(csrc_), itmp1,itmp2 )
1939  itmp2 = numroc(k+1,hbl,mycol,desca(csrc_),
1940  $ npcol )
1941  DO 360 j = liloh, lihih
1942  sum = a( ( itmp1-1 )*lda+j ) +
1943  $ v2*a( itmp1*lda+j )
1944  a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*
1945  $ lda+j ) - sum*t1
1946  a( itmp1*lda+j ) = a( itmp1*lda+j ) -
1947  $ sum*t2
1948  360 CONTINUE
1949  ELSE
1950  itmp1 = localk2( ki )
1951  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1952  CALL dgerv2d( contxt, lihih-liloh+1, 1,
1953  $ work( icbuf+1 ),
1954  $ lihih-liloh+1, myrow, left )
1955  DO 370 j = liloh, lihih
1956  sum = work( icbuf+j ) +
1957  $ v2*a( ( itmp1-1 )*lda+j )
1958  work( icbuf+j ) = work( icbuf+j ) -
1959  $ sum*t1
1960  a( ( itmp1-1 )*lda+j )
1961  $ = a( ( itmp1-1 )*lda+j ) - sum*t2
1962  370 CONTINUE
1963  CALL dgesd2d( contxt, lihih-liloh+1, 1,
1964  $ work( icbuf+1 ),
1965  $ lihih-liloh+1, myrow, left )
1966  END IF
1967  END IF
1968  ELSE
1969  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1970  $ ( icurcol( ki ).EQ.right ) ) THEN
1971  itmp1 = kcol( ki )
1972  CALL dgesd2d( contxt, lihih-liloh+1, 1,
1973  $ a( ( itmp1-1 )*lda+liloh ),
1974  $ lda, myrow, right )
1975  CALL infog1l( k, hbl, npcol, mycol,
1976  $ desca(csrc_), itmp1, itmp2 )
1977  itmp2 = numroc( k+1, hbl, mycol,
1978  $ desca(csrc_), npcol )
1979  CALL dgerv2d( contxt, lihih-liloh+1, 1,
1980  $ a( ( itmp1-1 )*lda+liloh ),
1981  $ lda, myrow, right )
1982  END IF
1983  END IF
1984 *
1985  IF( wantz ) THEN
1986 *
1987 * Accumulate transformations in the matrix Z
1988 *
1989  IF( icurcol( ki ).EQ.mycol ) THEN
1990 * LOCAL Z(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)
1991  IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
1992  $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
1993  itmp1 = kcol( ki ) + k - istart
1994  itmp1 = ( itmp1-1 )*ldz
1995  DO 380 j = liloz, lihiz
1996  sum = z( j+itmp1 ) +
1997  $ v2*z( j+itmp1+ldz )
1998  z( j+itmp1 ) = z( j+itmp1 ) - sum*t1
1999  z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -
2000  $ sum*t2
2001  380 CONTINUE
2002  localk2( ki ) = localk2( ki ) + 1
2003  ELSE
2004  itmp1 = localk2( ki )
2005 * IF WE ACTUALLY OWN COLUMN K
2006  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2007  CALL dgerv2d( contxt, lihiz-liloz+1, 1,
2008  $ work( icbuf+1 ), ldz,
2009  $ myrow, left )
2010  itmp1 = ( itmp1-1 )*ldz
2011  DO 390 j = liloz, lihiz
2012  sum = work( icbuf+j ) +
2013  $ v2*z( j+itmp1 )
2014  work( icbuf+j ) = work( icbuf+j ) -
2015  $ sum*t1
2016  z( j+itmp1 ) = z( j+itmp1 ) - sum*t2
2017  390 CONTINUE
2018  CALL dgesd2d( contxt, lihiz-liloz+1, 1,
2019  $ work( icbuf+1 ), ldz,
2020  $ myrow, left )
2021  localk2( ki ) = localk2( ki ) + 1
2022  END IF
2023  END IF
2024  ELSE
2025 *
2026 * NO WORK BUT NEED TO UPDATE ANYWAY????
2027 *
2028  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2029  $ ( icurcol( ki ).EQ.right ) ) THEN
2030  itmp1 = kcol( ki )
2031  itmp1 = ( itmp1-1 )*ldz
2032  CALL dgesd2d( contxt, lihiz-liloz+1, 1,
2033  $ z( liloz+itmp1 ), ldz,
2034  $ myrow, right )
2035  CALL dgerv2d( contxt, lihiz-liloz+1, 1,
2036  $ z( liloz+itmp1 ), ldz,
2037  $ myrow, right )
2038  localk2( ki ) = localk2( ki ) + 1
2039  END IF
2040  END IF
2041  END IF
2042  END IF
2043  400 CONTINUE
2044 *
2045 * Adjust local information for this bulge
2046 *
2047  IF( nprow.EQ.1 ) THEN
2048  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2049  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2050  END IF
2051  IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2052  $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2053  $ THEN
2054  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2055  END IF
2056  IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2057  $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2058  $ THEN
2059  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2060  END IF
2061  IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2062  $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.
2063  $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2064  CALL infog1l( k2( ki )+1, hbl, nprow, myrow,
2065  $ desca(rsrc_), krow( ki ), itmp2 )
2066  itmp2 = numroc( n, hbl, myrow, desca(rsrc_), nprow )
2067  END IF
2068  IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2069  $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.
2070  $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2071  CALL infog1l( 1, hbl, nprow, myrow, desca(rsrc_),
2072  $ itmp2,kp2row( ki ) )
2073  kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow,
2074  $ desca(rsrc_), nprow )
2075  END IF
2076  IF( npcol.EQ.1 ) THEN
2077  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2078  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2079  END IF
2080  IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2081  $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2082  $ THEN
2083  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2084  END IF
2085  IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2086  $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2087  $ THEN
2088  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2089  END IF
2090  IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2091  $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.
2092  $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2093  CALL infog1l( k2( ki )+1, hbl, npcol, mycol,
2094  $ desca(csrc_), kcol( ki ), itmp2 )
2095  itmp2 = numroc( n, hbl, mycol, desca(csrc_), npcol )
2096  END IF
2097  IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2098  $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.
2099  $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2100  CALL infog1l( 1, hbl, npcol, mycol,desca(csrc_),itmp2,
2101  $ kp2col( ki ) )
2102  kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol,
2103  $ desca(csrc_), npcol )
2104  END IF
2105  k1( ki ) = k2( ki ) + 1
2106  istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )
2107  istop = min( istop, k1( ki )+hbl-3-
2108  $ mod( k1( ki )-1, hbl ) )
2109  istop = min( istop, i2-2 )
2110  istop = max( istop, k1( ki ) )
2111 * ISTOP = MIN( ISTOP , I-1 )
2112  k2( ki ) = istop
2113  IF( k1( ki ).EQ.istop ) THEN
2114  IF( ( mod( istop-1, hbl ).EQ.hbl-2 ) .AND.
2115  $ ( i-istop.GT.1 ) ) THEN
2116 *
2117 * Next step switches rows & cols
2118 *
2119  icurrow( ki ) = mod( icurrow( ki )+1, nprow )
2120  icurcol( ki ) = mod( icurcol( ki )+1, npcol )
2121  END IF
2122  END IF
2123  410 CONTINUE
2124  IF( k2( ibulge ).LE.i-1 )
2125  $ GO TO 40
2126  END IF
2127 *
2128  420 CONTINUE
2129 *
2130 * Failure to converge in remaining number of iterations
2131 *
2132  info = i
2133  work( 1 ) = dble( lwkopt )
2134  RETURN
2135 *
2136  430 CONTINUE
2137 *
2138  IF( l.EQ.i ) THEN
2139 *
2140 * H(I,I-1) is negligible: one eigenvalue has converged.
2141 *
2142  CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,
2143  $ icol, itmp1, itmp2 )
2144  IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
2145  wr( i ) = a( ( icol-1 )*lda+irow )
2146  ELSE
2147  wr( i ) = zero
2148  END IF
2149  wi( i ) = zero
2150  ELSE IF( l.EQ.i-1 ) THEN
2151 *
2152 * H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
2153 *
2154  CALL pdelget( 'All', ' ', h11, a, l, l, desca )
2155  CALL pdelget( 'All', ' ', h21, a, i, l, desca )
2156  CALL pdelget( 'All', ' ', h12, a, l, i, desca )
2157  CALL pdelget( 'All', ' ', h22, a, i, i, desca )
2158  CALL dlanv2( h11, h12, h21, h22, wr( l ), wi( l ), wr( i ),
2159  $ wi( i ), cs, sn )
2160  CALL pdelset( a, l, l, desca, h11 )
2161  CALL pdelset( a, i, l, desca, h21 )
2162  CALL pdelset( a, l, i, desca, h12 )
2163  CALL pdelset( a, i, i, desca, h22 )
2164 *
2165 * Transform H to the standard Schur form
2166 *
2167  IF( wantt ) THEN
2168  IF(i .LT. n) CALL pdrot( n-i, a, l, i+1, desca, desca( m_ ),
2169  $ a, i, i+1, desca, desca( m_ ), cs,
2170  $ sn, work( vecsidx+1 ),
2171  $ lwork-vecsidx, ierr )
2172  ltop = 1
2173  ELSE
2174  ltop = i1
2175  END IF
2176  IF (l .GT. ltop) CALL pdrot( l-ltop, a, ltop, l, desca, 1, a,
2177  $ ltop, i, desca, 1, cs, sn,
2178  $ work( vecsidx+1 ), lwork-vecsidx,
2179  $ ierr )
2180  IF( wantz ) THEN
2181  CALL pdrot( ihiz-iloz+1, z, iloz, l, descz, 1, z, iloz, i,
2182  $ descz, 1, cs, sn, work( vecsidx+1 ),
2183  $ lwork-vecsidx, ierr )
2184  END IF
2185  IF( node .NE. 0 ) THEN
2186  wr( l ) = zero
2187  wr( i ) = zero
2188  wi( l ) = zero
2189  wi( i ) = zero
2190  ENDIF
2191  ELSE
2192 *
2193 * Find the eigenvalues in H(L:I,L:I), L < I-1
2194 *
2195  nh = i - l + 1
2196  IF( nh .LE. lds ) THEN
2197  CALL pdlaqr4( wantt, wantz, n, l, i, a, desca, wr, wi,
2198  $ iloz, ihiz, z, descz, work( s1+1 ), nh,
2199  $ work( s2+1 ), nh, work( s3+1 ), 4*lds*lds,
2200  $ info )
2201  IF( info.NE.0 ) THEN
2202  work( 1 ) = dble( lwkopt )
2203  RETURN
2204  END IF
2205  IF( node.NE.0 ) THEN
2206 *
2207 * Erase the eigenvalues
2208 *
2209  DO 440 k = l, i
2210  wr( k ) = zero
2211  wi( k ) = zero
2212  440 CONTINUE
2213  END IF
2214  END IF
2215  END IF
2216 *
2217 * Decrement number of remaining iterations, and return to start of
2218 * the main loop with new value of I.
2219 *
2220  itn = itn - its
2221  IF( m.EQ.l-10 ) THEN
2222  i = l - 1
2223  ELSE
2224  i = m
2225  END IF
2226 * I = L - 1
2227  GO TO 10
2228 *
2229  450 CONTINUE
2230 *
2231  IF( num.GT.1 ) THEN
2232  CALL dgsum2d( contxt, 'All', ' ', ihi-ilo+1, 1, wr(ilo), n,
2233  $ -1, -1 )
2234  CALL dgsum2d( contxt, 'All', ' ', ihi-ilo+1, 1, wi(ilo), n,
2235  $ -1, -1 )
2236  END IF
2237 *
2238  work( 1 ) = dble( lwkopt )
2239  iwork( 1 ) = totit
2240  iwork( 2 ) = totsw
2241  iwork( 3 ) = totns
2242  RETURN
2243 *
2244 * END OF PDLAQR1
2245 *
2246  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdlabad
subroutine pdlabad(ICTXT, SMALL, LARGE)
Definition: pdlabad.f:2
pdlacp3
subroutine pdlacp3(M, I, A, DESCA, B, LDB, II, JJ, REV)
Definition: pdlacp3.f:2
pdelget
subroutine pdelget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pdelget.f:2
pdlaqr1
recursive subroutine pdlaqr1(WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, INFO)
Definition: pdlaqr1.f:5
pdlaqr4
subroutine pdlaqr4(WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK, LWORK, INFO)
Definition: pdlaqr4.f:4
pdlawil
subroutine pdlawil(II, JJ, M, A, DESCA, H44, H33, H43H34, V)
Definition: pdlawil.f:2
dlaref
subroutine dlaref(TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1, ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ, LIHIZ, VECS, V2, V3, T1, T2, T3)
Definition: dlaref.f:4
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdrot
subroutine pdrot(N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, CS, SN, WORK, LWORK, INFO)
Definition: pdrot.f:3
dlasorte
subroutine dlasorte(S, LDS, J, OUT, INFO)
Definition: dlasorte.f:2
pdlasmsub
subroutine pdlasmsub(A, DESCA, I, L, K, SMLNUM, BUF, LWORK)
Definition: pdlasmsub.f:2
pdlaqr2
subroutine pdlaqr2(WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT, V, LDV, WR, WI, WORK, LWORK)
Definition: pdlaqr2.f:4
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pdlamch
double precision function pdlamch(ICTXT, CMACH)
Definition: pdblastst.f:6769
pdelset
subroutine pdelset(A, IA, JA, DESCA, ALPHA)
Definition: pdelset.f:2
min
#define min(A, B)
Definition: pcgemr.c:181