ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslaqr5.f
Go to the documentation of this file.
1  SUBROUTINE pslaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
2  $ SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK,
3  $ LWORK, IWORK, LIWORK )
4 *
5 * Contribution from the Department of Computing Science and HPC2N,
6 * Umea University, Sweden
7 *
8 * -- ScaLAPACK auxiliary routine (version 2.0.2) --
9 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10 * May 1 2012
11 *
12  IMPLICIT NONE
13 *
14 * .. Scalar Arguments ..
15  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, N, NSHFTS,
16  $ LWORK, LIWORK
17  LOGICAL WANTT, WANTZ
18 * ..
19 * .. Array Arguments ..
20  INTEGER DESCH( * ), DESCZ( * ), IWORK( * )
21  REAL H( * ), SI( * ), SR( * ), Z( * ), WORK( * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * This auxiliary subroutine called by PSLAQR0 performs a
28 * single small-bulge multi-shift QR sweep by chasing separated
29 * groups of bulges along the main block diagonal of H.
30 *
31 * WANTT (global input) logical scalar
32 * WANTT = .TRUE. if the quasi-triangular Schur factor
33 * is being computed. WANTT is set to .FALSE. otherwise.
34 *
35 * WANTZ (global input) logical scalar
36 * WANTZ = .TRUE. if the orthogonal Schur factor is being
37 * computed. WANTZ is set to .FALSE. otherwise.
38 *
39 * KACC22 (global input) integer with value 0, 1, or 2.
40 * Specifies the computation mode of far-from-diagonal
41 * orthogonal updates.
42 * = 1: PSLAQR5 accumulates reflections and uses matrix-matrix
43 * multiply to update the far-from-diagonal matrix entries.
44 * = 2: PSLAQR5 accumulates reflections, uses matrix-matrix
45 * multiply to update the far-from-diagonal matrix entries,
46 * and takes advantage of 2-by-2 block structure during
47 * matrix multiplies.
48 *
49 * N (global input) integer scalar
50 * N is the order of the Hessenberg matrix H upon which this
51 * subroutine operates.
52 *
53 * KTOP (global input) integer scalar
54 * KBOT (global input) integer scalar
55 * These are the first and last rows and columns of an
56 * isolated diagonal block upon which the QR sweep is to be
57 * applied. It is assumed without a check that
58 * either KTOP = 1 or H(KTOP,KTOP-1) = 0
59 * and
60 * either KBOT = N or H(KBOT+1,KBOT) = 0.
61 *
62 * NSHFTS (global input) integer scalar
63 * NSHFTS gives the number of simultaneous shifts. NSHFTS
64 * must be positive and even.
65 *
66 * SR (global input) REAL array of size (NSHFTS)
67 * SI (global input) REAL array of size (NSHFTS)
68 * SR contains the real parts and SI contains the imaginary
69 * parts of the NSHFTS shifts of origin that define the
70 * multi-shift QR sweep.
71 *
72 * H (local input/output) REAL array of size
73 * (DESCH(LLD_),*)
74 * On input H contains a Hessenberg matrix. On output a
75 * multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
76 * to the isolated diagonal block in rows and columns KTOP
77 * through KBOT.
78 *
79 * DESCH (global and local input) INTEGER array of dimension DLEN_.
80 * The array descriptor for the distributed matrix H.
81 *
82 * ILOZ (global input) INTEGER
83 * IHIZ (global input) INTEGER
84 * Specify the rows of Z to which transformations must be
85 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
86 *
87 * Z (local input/output) REAL array of size
88 * (DESCZ(LLD_),*)
89 * If WANTZ = .TRUE., then the QR Sweep orthogonal
90 * similarity transformation is accumulated into
91 * Z(ILOZ:IHIZ,ILO:IHI) from the right.
92 * If WANTZ = .FALSE., then Z is unreferenced.
93 *
94 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
95 * The array descriptor for the distributed matrix Z.
96 *
97 * WORK (local workspace) REAL array, dimension(DWORK)
98 *
99 * LWORK (local input) INTEGER
100 * The length of the workspace array WORK.
101 *
102 * IWORK (local workspace) INTEGER array, dimension (LIWORK)
103 *
104 * LIWORK (local input) INTEGER
105 * The length of the workspace array IWORK.
106 *
107 * ================================================================
108 * Based on contributions by
109 * Robert Granat, Department of Computing Science and HPC2N,
110 * University of Umea, Sweden.
111 *
112 * ============================================================
113 * References:
114 * K. Braman, R. Byers, and R. Mathias,
115 * The Multi-Shift QR Algorithm Part I: Maintaining Well Focused
116 * Shifts, and Level 3 Performance.
117 * SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.
118 *
119 * R. Granat, B. Kagstrom, and D. Kressner,
120 * A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC
121 * Systems.
122 * SIAM J. Sci. Comput., 32(4):2345--2378, 2010.
123 *
124 * ============================================================
125 * .. Parameters ..
126  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
127  $ LLD_, MB_, M_, NB_, N_, RSRC_
128  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
129  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
130  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
131  REAL ZERO, ONE
132  PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
133  INTEGER NTINY
134  parameter( ntiny = 11 )
135 * ..
136 * .. Local Scalars ..
137  REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
138  $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
139  $ ulp, tau, elem, stamp, ddum, orth
140  INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
141  $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
142  $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
143  $ ns, nu, lldh, lldz, lldu, lldv, lldw, lldwh,
144  $ info, ictxt, nprow, npcol, nb, iroffh, itop,
145  $ nwin, myrow, mycol, lns, numwin, lkacc22,
146  $ lchain, win, idonejob, ipnext, anmwin, lenrbuf,
147  $ lencbuf, ichoff, lrsrc, lcsrc, lktop, lkbot,
148  $ ii, jj, swin, ewin, lnwin, dim, llktop, llkbot,
149  $ ipv, ipu, iph, ipw, ku, kwh, kwv, nve, lks,
150  $ idum, nho, dir, winid, indx, iloc, jloc, rsrc1,
151  $ csrc1, rsrc2, csrc2, rsrc3, csrc3, rsrc4, ipuu,
152  $ csrc4, lrows, lcols, indxs, ks, jloc1, iloc1,
153  $ lktop1, lktop2, wchunk, numchunk, oddeven,
154  $ chunknum, dim1, dim4, ipw3, hrows, zrows,
155  $ hcols, ipw1, ipw2, rsrc, east, jloc4, iloc4,
156  $ west, csrc, south, norht, indxe, north,
157  $ ihh, ipiw, lkbot1, nprocs, liroffh,
158  $ winfin, rws3, cls3, indx2, hrows2,
159  $ zrows2, hcols2, mnrbuf,
160  $ mxrbuf, mncbuf, mxcbuf, lwkopt
161  LOGICAL BLK22, BMP22, INTRO, DONEJOB, ODDNPROW,
162  $ ODDNPCOL, LQUERY, BCDONE
163  CHARACTER JBCMPZ*2, JOB
164 * ..
165 * .. External Functions ..
166  LOGICAL LSAME
167  INTEGER PILAENVX, ICEIL, INDXG2P, INDXG2L, NUMROC
168  REAL SLAMCH, SLANGE
169  EXTERNAL slamch, pilaenvx, iceil, indxg2p, indxg2l,
170  $ numroc, lsame, slange
171 * ..
172 * .. Intrinsic Functions ..
173  INTRINSIC abs, float, max, min, mod
174 * ..
175 * .. Local Arrays ..
176  REAL VT( 3 )
177 * ..
178 * .. External Subroutines ..
179  EXTERNAL sgemm, slabad, slamov, slaqr1, slarfg, slaset,
180  $ strmm, slaqr6
181 * ..
182 * .. Executable Statements ..
183 *
184  info = 0
185  ictxt = desch( ctxt_ )
186  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187  nprocs = nprow*npcol
188  lldh = desch( lld_ )
189  lldz = descz( lld_ )
190  nb = desch( mb_ )
191  iroffh = mod( ktop - 1, nb )
192  lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
193 *
194 * If there are no shifts, then there is nothing to do.
195 *
196  IF( .NOT. lquery .AND. nshfts.LT.2 )
197  $ RETURN
198 *
199 * If the active block is empty or 1-by-1, then there
200 * is nothing to do.
201 *
202  IF( .NOT. lquery .AND. ktop.GE.kbot )
203  $ RETURN
204 *
205 * Shuffle shifts into pairs of real shifts and pairs of
206 * complex conjugate shifts assuming complex conjugate
207 * shifts are already adjacent to one another.
208 *
209  IF( .NOT. lquery ) THEN
210  DO 10 i = 1, nshfts - 2, 2
211  IF( si( i ).NE.-si( i+1 ) ) THEN
212 *
213  swap = sr( i )
214  sr( i ) = sr( i+1 )
215  sr( i+1 ) = sr( i+2 )
216  sr( i+2 ) = swap
217 *
218  swap = si( i )
219  si( i ) = si( i+1 )
220  si( i+1 ) = si( i+2 )
221  si( i+2 ) = swap
222  END IF
223  10 CONTINUE
224  END IF
225 *
226 * NSHFTS is supposed to be even, but if is odd,
227 * then simply reduce it by one. The shuffle above
228 * ensures that the dropped shift is real and that
229 * the remaining shifts are paired.
230 *
231  ns = nshfts - mod( nshfts, 2 )
232 *
233 * Extract the size of the computational window.
234 *
235  nwin = pilaenvx( ictxt, 19, 'PSLAQR5', jbcmpz, n, nb, nb, nb )
236  nwin = min( nwin, kbot-ktop+1 )
237 *
238 * Adjust number of simultaneous shifts if it exceeds the limit
239 * set by the number of diagonal blocks in the active submatrix
240 * H(KTOP:KBOT,KTOP:KBOT).
241 *
242  ns = max( 2, min( ns, iceil( kbot-ktop+1, nb )*nwin/3 ) )
243  ns = ns - mod( ns, 2 )
244 
245 *
246 * Decide the number of simultaneous computational windows
247 * from the number of shifts - each window should contain up to
248 * (NWIN / 3) shifts. Also compute the number of shifts per
249 * window and make sure that number is even.
250 *
251  lns = min( max( 2, nwin / 3 ), max( 2, ns / min(nprow,npcol) ) )
252  lns = lns - mod( lns, 2 )
253  numwin = max( 1, min( iceil( ns, lns ),
254  $ iceil( kbot-ktop+1, nb ) - 1 ) )
255  IF( nprow.NE.npcol ) THEN
256  numwin = min( numwin, min(nprow,npcol) )
257  lns = min( lns, max( 2, ns / min(nprow,npcol) ) )
258  lns = lns - mod( lns, 2 )
259  END IF
260 *
261 * Machine constants for deflation.
262 *
263  safmin = slamch( 'SAFE MINIMUM' )
264  safmax = one / safmin
265  CALL slabad( safmin, safmax )
266  ulp = slamch( 'PRECISION' )
267  smlnum = safmin*( float( n ) / ulp )
268 *
269 * Use accumulated reflections to update far-from-diagonal
270 * entries on a local level?
271 *
272  IF( lns.LT.14 ) THEN
273  lkacc22 = 1
274  ELSE
275  lkacc22 = 2
276  END IF
277 *
278 * If so, exploit the 2-by-2 block structure?
279 * ( Usually it is not efficient to exploit the 2-by-2 structure
280 * because the block size is too small. )
281 *
282  blk22 = ( lns.GT.2 ) .AND. ( kacc22.EQ.2 )
283 *
284 * Clear trash.
285 *
286  IF( .NOT. lquery .AND. ktop+2.LE.kbot )
287  $ CALL pselset( h, ktop+2, ktop, desch, zero )
288 *
289 * NBMPS = number of 2-shift bulges in each chain
290 *
291  nbmps = lns / 2
292 *
293 * KDU = width of slab
294 *
295  kdu = 6*nbmps - 3
296 *
297 * LCHAIN = length of each chain
298 *
299  lchain = 3 * nbmps + 1
300 *
301 * Check if workspace query.
302 *
303  IF( lquery ) THEN
304  hrows = numroc( n, nb, myrow, desch(rsrc_), nprow )
305  hcols = numroc( n, nb, mycol, desch(csrc_), npcol )
306  lwkopt = (5+2*numwin)*nb**2 + 2*hrows*nb + hcols*nb +
307  $ max( hrows*nb, hcols*nb )
308  work(1) = float(lwkopt)
309  iwork(1) = 5*numwin
310  RETURN
311  END IF
312 *
313 * Check if KTOP and KBOT are valid.
314 *
315  IF( ktop.LT.1 .OR. kbot.GT.n ) RETURN
316 *
317 * Create and chase NUMWIN chains of NBMPS bulges.
318 *
319 * Set up window introduction.
320 *
321  anmwin = 0
322  intro = .true.
323  ipiw = 1
324 *
325 * Main loop:
326 * While-loop over the computational windows which is
327 * terminated when all windows have been introduced,
328 * chased down to the bottom of the considered submatrix
329 * and chased off.
330 *
331  20 CONTINUE
332 *
333 * Set up next window as long as we have less than the prescribed
334 * number of windows. Each window is described an integer quadruple:
335 * 1. Local value of KTOP (below denoted by LKTOP)
336 * 2. Local value of KBOT (below denoted by LKBOT)
337 * 3-4. Processor indices (LRSRC,LCSRC) associated with the window.
338 * (5. Mark that decides if a window is fully processed or not)
339 *
340 * Notice - the next window is only introduced if the first block
341 * in the active submatrix does not contain any other windows.
342 *
343  IF( anmwin.GT.0 ) THEN
344  lktop = iwork( 1+(anmwin-1)*5 )
345  ELSE
346  lktop = ktop
347  END IF
348  IF( intro .AND. (anmwin.EQ.0 .OR. lktop.GT.iceil(ktop,nb)*nb) )
349  $ THEN
350  anmwin = anmwin + 1
351 *
352 * Structure of IWORK:
353 * IWORK( 1+(WIN-1)*5 ): start position
354 * IWORK( 2+(WIN-1)*5 ): stop position
355 * IWORK( 3+(WIN-1)*5 ): processor row id
356 * IWORK( 4+(WIN-1)*5 ): processor col id
357 * IWORK( 5+(WIN-1)*5 ): window status (0, 1, or 2)
358 *
359  iwork( 1+(anmwin-1)*5 ) = ktop
360  iwork( 2+(anmwin-1)*5 ) = ktop +
361  $ min( nwin,nb-iroffh,kbot-ktop+1 ) - 1
362  iwork( 3+(anmwin-1)*5 ) = indxg2p( iwork(1+(anmwin-1)*5), nb,
363  $ myrow, desch(rsrc_), nprow )
364  iwork( 4+(anmwin-1)*5 ) = indxg2p( iwork(2+(anmwin-1)*5), nb,
365  $ mycol, desch(csrc_), npcol )
366  iwork( 5+(anmwin-1)*5 ) = 0
367  ipiw = 6+(anmwin-1)*5
368  IF( anmwin.EQ.numwin ) intro = .false.
369  END IF
370 *
371 * Do-loop over the number of windows.
372 *
373  ipnext = 1
374  donejob = .false.
375  idonejob = 0
376  lenrbuf = 0
377  lencbuf = 0
378  ichoff = 0
379  DO 40 win = 1, anmwin
380 *
381 * Extract window information to simplify the rest.
382 *
383  lrsrc = iwork( 3+(win-1)*5 )
384  lcsrc = iwork( 4+(win-1)*5 )
385  lktop = iwork( 1+(win-1)*5 )
386  lkbot = iwork( 2+(win-1)*5 )
387  lnwin = lkbot - lktop + 1
388 *
389 * Check if anything to do for current window, i.e., if the local
390 * chain of bulges has reached the next block border etc.
391 *
392  IF( iwork(5+(win-1)*5).LT.2 .AND. lnwin.GT.1 .AND.
393  $ (lnwin.GT.lchain .OR. lkbot.EQ.kbot ) ) THEN
394  liroffh = mod(lktop-1,nb)
395  swin = lktop-liroffh
396  ewin = min(kbot,lktop-liroffh+nb-1)
397  dim = ewin-swin+1
398  IF( dim.LE.ntiny .AND. .NOT.lkbot.EQ.kbot ) THEN
399  iwork( 5+(win-1)*5 ) = 2
400  GO TO 45
401  END IF
402  idonejob = 1
403  IF( iwork(5+(win-1)*5).EQ.0 ) THEN
404  iwork(5+(win-1)*5) = 1
405  END IF
406 *
407 * Let the process that owns the corresponding window do the
408 * local bulge chase.
409 *
410  IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN
411 *
412 * Set the kind of job to do in SLAQR6:
413 * 1. JOB = 'I': Introduce and chase bulges in window WIN
414 * 2. JOB = 'C': Chase bulges from top to bottom of window WIN
415 * 3. JOB = 'O': Chase bulges off window WIN
416 * 4. JOB = 'A': All of 1-3 above is done - this will for
417 * example happen for very small active
418 * submatrices (like 2-by-2)
419 *
420  llkbot = llktop + lnwin - 1
421  IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
422  job = 'All steps'
423  ichoff = 1
424  ELSEIF( lktop.EQ.ktop ) THEN
425  job = 'Introduce and chase'
426  ELSEIF( lkbot.EQ.kbot ) THEN
427  job = 'Off-chase bulges'
428  ichoff = 1
429  ELSE
430  job = 'Chase bulges'
431  END IF
432 *
433 * Copy submatrix of H corresponding to window WIN into
434 * workspace and set out additional workspace for storing
435 * orthogonal transformations. This submatrix must be at
436 * least (NTINY+1)-by-(NTINY+1) to fit into SLAQR6 - if not,
437 * abort and go for cross border bulge chasing with this
438 * particular window.
439 *
440  ii = indxg2l( swin, nb, myrow, desch(rsrc_), nprow )
441  jj = indxg2l( swin, nb, mycol, desch(csrc_), npcol )
442  llktop = 1 + liroffh
443  llkbot = llktop + lnwin - 1
444 *
445  ipu = ipnext
446  iph = ipu + lnwin**2
447  ipuu = iph + max(ntiny+1,dim)**2
448  ipv = ipuu + max(ntiny+1,dim)**2
449  ipnext = iph
450 *
451  IF( lsame( job, 'A' ) .OR. lsame( job, 'O' ) .AND.
452  $ dim.LT.ntiny+1 ) THEN
453  CALL slaset( 'All', ntiny+1, ntiny+1, zero, one,
454  $ work(iph), ntiny+1 )
455  END IF
456  CALL slamov( 'Upper', dim, dim, h(ii+(jj-1)*lldh), lldh,
457  $ work(iph), max(ntiny+1,dim) )
458  CALL scopy( dim-1, h(ii+(jj-1)*lldh+1), lldh+1,
459  $ work(iph+1), max(ntiny+1,dim)+1 )
460  IF( lsame( job, 'C' ) .OR. lsame( job, 'O') ) THEN
461  CALL scopy( dim-2, h(ii+(jj-1)*lldh+2), lldh+1,
462  $ work(iph+2), max(ntiny+1,dim)+1 )
463  CALL scopy( dim-3, h(ii+(jj-1)*lldh+3), lldh+1,
464  $ work(iph+3), max(ntiny+1,dim)+1 )
465  CALL slaset( 'Lower', dim-4, dim-4, zero,
466  $ zero, work(iph+4), max(ntiny+1,dim) )
467  ELSE
468  CALL slaset( 'Lower', dim-2, dim-2, zero,
469  $ zero, work(iph+2), max(ntiny+1,dim) )
470  END IF
471 *
472  ku = max(ntiny+1,dim) - kdu + 1
473  kwh = kdu + 1
474  nho = ( max(ntiny+1,dim)-kdu+1-4 ) - ( kdu+1 ) + 1
475  kwv = kdu + 4
476  nve = max(ntiny+1,dim) - kdu - kwv + 1
477  CALL slaset( 'All', max(ntiny+1,dim),
478  $ max(ntiny+1,dim), zero, one, work(ipuu),
479  $ max(ntiny+1,dim) )
480 *
481 * Small-bulge multi-shift QR sweep.
482 *
483  lks = max( 1, ns - win*lns + 1 )
484  CALL slaqr6( job, wantt, .true., lkacc22,
485  $ max(ntiny+1,dim), llktop, llkbot, lns, sr( lks ),
486  $ si( lks ), work(iph), max(ntiny+1,dim), llktop,
487  $ llkbot, work(ipuu), max(ntiny+1,dim), work(ipu),
488  $ 3, work( iph+ku-1 ),
489  $ max(ntiny+1,dim), nve, work( iph+kwv-1 ),
490  $ max(ntiny+1,dim), nho, work( iph-1+ku+(kwh-1)*
491  $ max(ntiny+1,dim) ), max(ntiny+1,dim) )
492 *
493 * Copy submatrix of H back.
494 *
495  CALL slamov( 'Upper', dim, dim, work(iph),
496  $ max(ntiny+1,dim), h(ii+(jj-1)*lldh), lldh )
497  CALL scopy( dim-1, work(iph+1), max(ntiny+1,dim)+1,
498  $ h(ii+(jj-1)*lldh+1), lldh+1 )
499  IF( lsame( job, 'I' ) .OR. lsame( job, 'C' ) ) THEN
500  CALL scopy( dim-2, work(iph+2), dim+1,
501  $ h(ii+(jj-1)*lldh+2), lldh+1 )
502  CALL scopy( dim-3, work(iph+3), dim+1,
503  $ h(ii+(jj-1)*lldh+3), lldh+1 )
504  ELSE
505  CALL slaset( 'Lower', dim-2, dim-2, zero,
506  $ zero, h(ii+(jj-1)*lldh+2), lldh )
507  END IF
508 *
509 * Copy actual submatrix of U to the correct place
510 * of the buffer.
511 *
512  CALL slamov( 'All', lnwin, lnwin,
513  $ work(ipuu+(max(ntiny+1,dim)*liroffh)+liroffh),
514  $ max(ntiny+1,dim), work(ipu), lnwin )
515  END IF
516 *
517 * In case the local submatrix was smaller than
518 * (NTINY+1)-by-(NTINY+1) we go here and proceed.
519 *
520  45 CONTINUE
521  ELSE
522  iwork( 5+(win-1)*5 ) = 2
523  END IF
524 *
525 * Increment counter for buffers of orthogonal transformations.
526 *
527  IF( myrow.EQ.lrsrc .OR. mycol.EQ.lcsrc ) THEN
528  IF( idonejob.EQ.1 .AND. iwork(5+(win-1)*5).LT.2 ) THEN
529  IF( myrow.EQ.lrsrc ) lenrbuf = lenrbuf + lnwin*lnwin
530  IF( mycol.EQ.lcsrc ) lencbuf = lencbuf + lnwin*lnwin
531  END IF
532  END IF
533  40 CONTINUE
534 *
535 * Did some work in the above do-loop?
536 *
537  CALL igsum2d( ictxt, 'All', '1-Tree', 1, 1, idonejob, 1, -1, -1 )
538  donejob = idonejob.GT.0
539 *
540 * Chased off bulges from first window?
541 *
542  IF( nprocs.GT.1 )
543  $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1, -1,
544  $ -1, -1, -1, -1 )
545 *
546 * If work was done in the do-loop over local windows, perform
547 * updates, otherwise go for cross border bulge chasing and updates.
548 *
549  IF( donejob ) THEN
550 *
551 * Broadcast orthogonal transformations.
552 *
553  49 CONTINUE
554  IF( lenrbuf.GT.0 .OR. lencbuf.GT.0 ) THEN
555  DO 50 dir = 1, 2
556  bcdone = .false.
557  DO 60 win = 1, anmwin
558  IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.
559  $ bcdone ) GO TO 62
560  lrsrc = iwork( 3+(win-1)*5 )
561  lcsrc = iwork( 4+(win-1)*5 )
562  IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN
563  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
564  $ npcol.GT.1 ) THEN
565  CALL sgebs2d( ictxt, 'Row', '1-Tree', lenrbuf,
566  $ 1, work, lenrbuf )
567  ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
568  $ nprow.GT.1 ) THEN
569  CALL sgebs2d( ictxt, 'Col', '1-Tree', lencbuf,
570  $ 1, work, lencbuf )
571  END IF
572  IF( lenrbuf.GT.0 )
573  $ CALL slamov( 'All', lenrbuf, 1, work, lenrbuf,
574  $ work(1+lenrbuf), lencbuf )
575  bcdone = .true.
576  ELSEIF( myrow.EQ.lrsrc .AND. dir.EQ.1 ) THEN
577  IF( lenrbuf.GT.0 .AND. npcol.GT.1 ) THEN
578  CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
579  $ 1, work, lenrbuf, lrsrc, lcsrc )
580  bcdone = .true.
581  END IF
582  ELSEIF( mycol.EQ.lcsrc .AND. dir.EQ.2 ) THEN
583  IF( lencbuf.GT.0 .AND. nprow.GT.1 ) THEN
584  CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
585  $ 1, work(1+lenrbuf), lencbuf, lrsrc, lcsrc )
586  bcdone = .true.
587  END IF
588  END IF
589  62 CONTINUE
590  60 CONTINUE
591  50 CONTINUE
592  END IF
593 *
594 * Compute updates - make sure to skip windows that was skipped
595 * regarding local bulge chasing.
596 *
597  DO 65 dir = 1, 2
598  winid = 0
599  IF( dir.EQ.1 ) THEN
600  ipnext = 1
601  ELSE
602  ipnext = 1 + lenrbuf
603  END IF
604  DO 70 win = 1, anmwin
605  IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 75
606  lrsrc = iwork( 3+(win-1)*5 )
607  lcsrc = iwork( 4+(win-1)*5 )
608  lktop = iwork( 1+(win-1)*5 )
609  lkbot = iwork( 2+(win-1)*5 )
610  lnwin = lkbot - lktop + 1
611  IF( (myrow.EQ.lrsrc.AND.lenrbuf.GT.0.AND.dir.EQ.1) .OR.
612  $ (mycol.EQ.lcsrc.AND.lencbuf.GT.0.AND.dir.EQ.2 ) )
613  $ THEN
614 *
615 * Set up workspaces.
616 *
617  ipu = ipnext
618  ipnext = ipu + lnwin*lnwin
619  ipw = 1 + lenrbuf + lencbuf
620  liroffh = mod(lktop-1,nb)
621  winid = winid + 1
622 *
623 * Recompute JOB to see if block structure of U could
624 * possibly be exploited or not.
625 *
626  IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
627  job = 'All steps'
628  ELSEIF( lktop.EQ.ktop ) THEN
629  job = 'Introduce and chase'
630  ELSEIF( lkbot.EQ.kbot ) THEN
631  job = 'Off-chase bulges'
632  ELSE
633  job = 'Chase bulges'
634  END IF
635  END IF
636 *
637 * Use U to update far-from-diagonal entries in H.
638 * If required, use U to update Z as well.
639 *
640  IF( .NOT. blk22 .OR. .NOT. lsame(job,'C')
641  $ .OR. lns.LE.2 ) THEN
642 *
643  IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
644  $ mycol.EQ.lcsrc ) THEN
645  IF( wantt ) THEN
646  DO 80 indx = 1, lktop-liroffh-1, nb
647  CALL infog2l( indx, lktop, desch, nprow,
648  $ npcol, myrow, mycol, iloc, jloc, rsrc1,
649  $ csrc1 )
650  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
651  lrows = min( nb, lktop-indx )
652  CALL sgemm('No transpose', 'No transpose',
653  $ lrows, lnwin, lnwin, one,
654  $ h((jloc-1)*lldh+iloc), lldh,
655  $ work( ipu ), lnwin, zero,
656  $ work(ipw),
657  $ lrows )
658  CALL slamov( 'All', lrows, lnwin,
659  $ work(ipw), lrows,
660  $ h((jloc-1)*lldh+iloc), lldh )
661  END IF
662  80 CONTINUE
663  END IF
664  IF( wantz ) THEN
665  DO 90 indx = 1, n, nb
666  CALL infog2l( indx, lktop, descz, nprow,
667  $ npcol, myrow, mycol, iloc, jloc, rsrc1,
668  $ csrc1 )
669  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
670  lrows = min(nb,n-indx+1)
671  CALL sgemm( 'No transpose',
672  $ 'No transpose', lrows, lnwin, lnwin,
673  $ one, z((jloc-1)*lldz+iloc), lldz,
674  $ work( ipu ), lnwin, zero,
675  $ work(ipw), lrows )
676  CALL slamov( 'All', lrows, lnwin,
677  $ work(ipw), lrows,
678  $ z((jloc-1)*lldz+iloc), lldz )
679  END IF
680  90 CONTINUE
681  END IF
682  END IF
683 *
684 * Update the rows of H affected by the bulge-chase.
685 *
686  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
687  $ myrow.EQ.lrsrc ) THEN
688  IF( wantt ) THEN
689  IF( iceil(lkbot,nb).EQ.iceil(kbot,nb) ) THEN
690  lcols = min(iceil(kbot,nb)*nb,n) - kbot
691  ELSE
692  lcols = 0
693  END IF
694  IF( lcols.GT.0 ) THEN
695  indx = kbot + 1
696  CALL infog2l( lktop, indx, desch, nprow,
697  $ npcol, myrow, mycol, iloc, jloc,
698  $ rsrc1, csrc1 )
699  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
700  CALL sgemm( 'Transpose', 'No Transpose',
701  $ lnwin, lcols, lnwin, one, work(ipu),
702  $ lnwin, h((jloc-1)*lldh+iloc), lldh,
703  $ zero, work(ipw), lnwin )
704  CALL slamov( 'All', lnwin, lcols,
705  $ work(ipw), lnwin,
706  $ h((jloc-1)*lldh+iloc), lldh )
707  END IF
708  END IF
709  93 CONTINUE
710  indxs = iceil(lkbot,nb)*nb + 1
711  DO 95 indx = indxs, n, nb
712  CALL infog2l( lktop, indx,
713  $ desch, nprow, npcol, myrow, mycol,
714  $ iloc, jloc, rsrc1, csrc1 )
715  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
716  lcols = min( nb, n-indx+1 )
717  CALL sgemm( 'Transpose', 'No Transpose',
718  $ lnwin, lcols, lnwin, one, work(ipu),
719  $ lnwin, h((jloc-1)*lldh+iloc), lldh,
720  $ zero, work(ipw),
721  $ lnwin )
722  CALL slamov( 'All', lnwin, lcols,
723  $ work(ipw), lnwin,
724  $ h((jloc-1)*lldh+iloc), lldh )
725  END IF
726  95 CONTINUE
727  END IF
728  END IF
729  ELSE
730  ks = lnwin-lns/2*3
731 *
732 * The LNWIN-by-LNWIN matrix U containing the accumulated
733 * orthogonal transformations has the following structure:
734 *
735 * [ U11 U12 ]
736 * U = [ ],
737 * [ U21 U22 ]
738 *
739 * where U21 is KS-by-KS upper triangular and U12 is
740 * (LNWIN-KS)-by-(LNWIN-KS) lower triangular.
741 * Here, KS = LNS.
742 *
743 * Update the columns of H and Z affected by the bulge
744 * chasing.
745 *
746 * Compute H2*U21 + H1*U11 in workspace.
747 *
748  IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
749  $ mycol.EQ.lcsrc ) THEN
750  IF( wantt ) THEN
751  DO 100 indx = 1, lktop-liroffh-1, nb
752  CALL infog2l( indx, lktop, desch, nprow,
753  $ npcol, myrow, mycol, iloc, jloc, rsrc1,
754  $ csrc1 )
755  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
756  jloc1 = indxg2l( lktop+lnwin-ks, nb,
757  $ mycol, desch( csrc_ ), npcol )
758  lrows = min( nb, lktop-indx )
759  CALL slamov( 'All', lrows, ks,
760  $ h((jloc1-1)*lldh+iloc ), lldh,
761  $ work(ipw), lrows )
762  CALL strmm( 'Right', 'Upper',
763  $ 'No transpose','Non-unit', lrows,
764  $ ks, one, work( ipu+lnwin-ks ), lnwin,
765  $ work(ipw), lrows )
766  CALL sgemm('No transpose', 'No transpose',
767  $ lrows, ks, lnwin-ks, one,
768  $ h((jloc-1)*lldh+iloc), lldh,
769  $ work( ipu ), lnwin, one, work(ipw),
770  $ lrows )
771 *
772 * Compute H1*U12 + H2*U22 in workspace.
773 *
774  CALL slamov( 'All', lrows, lnwin-ks,
775  $ h((jloc-1)*lldh+iloc), lldh,
776  $ work( ipw+ks*lrows ), lrows )
777  CALL strmm( 'Right', 'Lower',
778  $ 'No transpose', 'Non-Unit',
779  $ lrows, lnwin-ks, one,
780  $ work( ipu+lnwin*ks ), lnwin,
781  $ work( ipw+ks*lrows ), lrows )
782  CALL sgemm('No transpose', 'No transpose',
783  $ lrows, lnwin-ks, ks, one,
784  $ h((jloc1-1)*lldh+iloc), lldh,
785  $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
786  $ one, work( ipw+ks*lrows ), lrows )
787 *
788 * Copy workspace to H.
789 *
790  CALL slamov( 'All', lrows, lnwin,
791  $ work(ipw), lrows,
792  $ h((jloc-1)*lldh+iloc), lldh )
793  END IF
794  100 CONTINUE
795  END IF
796 *
797  IF( wantz ) THEN
798 *
799 * Compute Z2*U21 + Z1*U11 in workspace.
800 *
801  DO 110 indx = 1, n, nb
802  CALL infog2l( indx, lktop, descz, nprow,
803  $ npcol, myrow, mycol, iloc, jloc, rsrc1,
804  $ csrc1 )
805  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
806  jloc1 = indxg2l( lktop+lnwin-ks, nb,
807  $ mycol, descz( csrc_ ), npcol )
808  lrows = min(nb,n-indx+1)
809  CALL slamov( 'All', lrows, ks,
810  $ z((jloc1-1)*lldz+iloc ), lldz,
811  $ work(ipw), lrows )
812  CALL strmm( 'Right', 'Upper',
813  $ 'No transpose', 'Non-unit',
814  $ lrows, ks, one, work( ipu+lnwin-ks ),
815  $ lnwin, work(ipw), lrows )
816  CALL sgemm( 'No transpose',
817  $ 'No transpose', lrows, ks, lnwin-ks,
818  $ one, z((jloc-1)*lldz+iloc), lldz,
819  $ work( ipu ), lnwin, one, work(ipw),
820  $ lrows )
821 *
822 * Compute Z1*U12 + Z2*U22 in workspace.
823 *
824  CALL slamov( 'All', lrows, lnwin-ks,
825  $ z((jloc-1)*lldz+iloc), lldz,
826  $ work( ipw+ks*lrows ), lrows)
827  CALL strmm( 'Right', 'Lower',
828  $ 'No transpose', 'Non-unit',
829  $ lrows, lnwin-ks, one,
830  $ work( ipu+lnwin*ks ), lnwin,
831  $ work( ipw+ks*lrows ), lrows )
832  CALL sgemm( 'No transpose',
833  $ 'No transpose', lrows, lnwin-ks, ks,
834  $ one, z((jloc1-1)*lldz+iloc), lldz,
835  $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
836  $ one, work( ipw+ks*lrows ),
837  $ lrows )
838 *
839 * Copy workspace to Z.
840 *
841  CALL slamov( 'All', lrows, lnwin,
842  $ work(ipw), lrows,
843  $ z((jloc-1)*lldz+iloc), lldz )
844  END IF
845  110 CONTINUE
846  END IF
847  END IF
848 *
849  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
850  $ myrow.EQ.lrsrc ) THEN
851  IF( wantt ) THEN
852  indxs = iceil(lkbot,nb)*nb + 1
853  DO 120 indx = indxs, n, nb
854  CALL infog2l( lktop, indx,
855  $ desch, nprow, npcol, myrow, mycol, iloc,
856  $ jloc, rsrc1, csrc1 )
857  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
858 *
859 * Compute U21**T*H2 + U11**T*H1 in workspace.
860 *
861  iloc1 = indxg2l( lktop+lnwin-ks, nb,
862  $ myrow, desch( rsrc_ ), nprow )
863  lcols = min( nb, n-indx+1 )
864  CALL slamov( 'All', ks, lcols,
865  $ h((jloc-1)*lldh+iloc1), lldh,
866  $ work(ipw), lnwin )
867  CALL strmm( 'Left', 'Upper', 'Transpose',
868  $ 'Non-unit', ks, lcols, one,
869  $ work( ipu+lnwin-ks ), lnwin,
870  $ work(ipw), lnwin )
871  CALL sgemm( 'Transpose', 'No transpose',
872  $ ks, lcols, lnwin-ks, one, work(ipu),
873  $ lnwin, h((jloc-1)*lldh+iloc), lldh,
874  $ one, work(ipw), lnwin )
875 *
876 * Compute U12**T*H1 + U22**T*H2 in workspace.
877 *
878  CALL slamov( 'All', lnwin-ks, lcols,
879  $ h((jloc-1)*lldh+iloc), lldh,
880  $ work( ipw+ks ), lnwin )
881  CALL strmm( 'Left', 'Lower', 'Transpose',
882  $ 'Non-unit', lnwin-ks, lcols, one,
883  $ work( ipu+lnwin*ks ), lnwin,
884  $ work( ipw+ks ), lnwin )
885  CALL sgemm( 'Transpose', 'No Transpose',
886  $ lnwin-ks, lcols, ks, one,
887  $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
888  $ h((jloc-1)*lldh+iloc1), lldh,
889  $ one, work( ipw+ks ), lnwin )
890 *
891 * Copy workspace to H.
892 *
893  CALL slamov( 'All', lnwin, lcols,
894  $ work(ipw), lnwin,
895  $ h((jloc-1)*lldh+iloc), lldh )
896  END IF
897  120 CONTINUE
898  END IF
899  END IF
900  END IF
901 *
902 * Update position information about current window.
903 *
904  IF( dir.EQ.2 ) THEN
905  IF( lkbot.EQ.kbot ) THEN
906  lktop = kbot+1
907  lkbot = kbot+1
908  iwork( 1+(win-1)*5 ) = lktop
909  iwork( 2+(win-1)*5 ) = lkbot
910  iwork( 5+(win-1)*5 ) = 2
911  ELSE
912  lktop = min( lktop + lnwin - lchain,
913  $ iceil( lktop, nb )*nb - lchain + 1,
914  $ kbot )
915  iwork( 1+(win-1)*5 ) = lktop
916  lkbot = min( lkbot + lnwin - lchain,
917  $ iceil( lkbot, nb )*nb, kbot )
918  iwork( 2+(win-1)*5 ) = lkbot
919  lnwin = lkbot-lktop+1
920  IF( lnwin.EQ.lchain ) iwork(5+(win-1)*5) = 2
921  END IF
922  END IF
923  75 CONTINUE
924  70 CONTINUE
925  65 CONTINUE
926 *
927 * If bulges were chasen off from first window, the window is
928 * removed.
929 *
930  IF( ichoff.GT.0 ) THEN
931  DO 128 win = 2, anmwin
932  iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )
933  iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )
934  iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )
935  iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )
936  iwork( 5+(win-2)*5 ) = iwork( 5+(win-1)*5 )
937  128 CONTINUE
938  anmwin = anmwin - 1
939  ipiw = 6+(anmwin-1)*5
940  END IF
941 *
942 * If we have no more windows, return.
943 *
944  IF( anmwin.LT.1 ) RETURN
945 *
946  ELSE
947 *
948 * Set up windows such that as many bulges as possible can be
949 * moved over the border to the next block. Make sure that the
950 * cross border window is at least (NTINY+1)-by-(NTINY+1), unless
951 * we are chasing off the bulges from the last window. This is
952 * accomplished by setting the bottom index LKBOT such that the
953 * local window has the correct size.
954 *
955 * If LKBOT then becomes larger than KBOT, the endpoint of the whole
956 * global submatrix, or LKTOP from a window located already residing
957 * at the other side of the border, this is taken care of by some
958 * dirty tricks.
959 *
960  DO 130 win = 1, anmwin
961  lktop1 = iwork( 1+(win-1)*5 )
962  lkbot = iwork( 2+(win-1)*5 )
963  lnwin = max( 6, min( lkbot - lktop1 + 1, lchain ) )
964  lkbot1 = max( min( kbot, iceil(lktop1,nb)*nb+lchain),
965  $ min( kbot, min( lktop1+2*lnwin-1,
966  $ (iceil(lktop1,nb)+1)*nb ) ) )
967  iwork( 2+(win-1)*5 ) = lkbot1
968  130 CONTINUE
969  ichoff = 0
970 *
971 * Keep a record over what windows that were moved over the borders
972 * such that we can delay some windows due to lack of space on the
973 * other side of the border; we do not want to leave any of the
974 * bulges behind...
975 *
976 * IWORK( 5+(WIN-1)*5 ) = 0: window WIN has not been processed
977 * IWORK( 5+(WIN-1)*5 ) = 1: window WIN is being processed (need to
978 * know for updates)
979 * IWORK( 5+(WIN-1)*5 ) = 2: window WIN has been fully processed
980 *
981 * So, start by marking all windows as not processed.
982 *
983  DO 135 win = 1, anmwin
984  iwork( 5+(win-1)*5 ) = 0
985  135 CONTINUE
986 *
987 * Do the cross border bulge-chase as follows: Start from the
988 * first window (the one that is closest to be chased off the
989 * diagonal of H) and take the odd windows first followed by the
990 * even ones. To not get into hang-problems on processor meshes
991 * with at least one odd dimension, the windows will in such a case
992 * be processed in chunks of {the minimum odd process dimension}-1
993 * windows to avoid overlapping processor scopes in forming the
994 * cross border computational windows and the cross border update
995 * regions.
996 *
997  wchunk = max( 1, min( anmwin, nprow-1, npcol-1 ) )
998  numchunk = iceil( anmwin, wchunk )
999 *
1000 * Based on the computed chunk of windows, start working with
1001 * crossborder bulge-chasing. Repeat this as long as there is
1002 * still work left to do (137 is a kind of do-while statement).
1003 *
1004  137 CONTINUE
1005 *
1006 * Zero out LENRBUF and LENCBUF each time we restart this loop.
1007 *
1008  lenrbuf = 0
1009  lencbuf = 0
1010 *
1011  DO 140 oddeven = 1, min( 2, anmwin )
1012  DO 150 chunknum = 1, numchunk
1013  ipnext = 1
1014  DO 160 win = oddeven+(chunknum-1)*wchunk,
1015  $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1016 *
1017 * Get position and size of the WIN:th active window and
1018 * make sure that we skip the cross border bulge for this
1019 * window if the window is not shared between several data
1020 * layout blocks (and processors).
1021 *
1022 * Also, delay windows that do not have sufficient size of
1023 * the other side of the border. Moreover, make sure to skip
1024 * windows that was already processed in the last round of
1025 * the do-while loop (137).
1026 *
1027  IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 165
1028  lktop = iwork( 1+(win-1)*5 )
1029  lkbot = iwork( 2+(win-1)*5 )
1030  IF( win.GT.1 ) THEN
1031  lktop2 = iwork( 1+(win-2)*5 )
1032  ELSE
1033  lktop2 = kbot+1
1034  END IF
1035  IF( iceil(lktop,nb).EQ.iceil(lkbot,nb) .OR.
1036  $ lkbot.GE.lktop2 ) GO TO 165
1037  lnwin = lkbot - lktop + 1
1038  IF( lnwin.LE.ntiny .AND. lkbot.NE.kbot .AND.
1039  $ .NOT. mod(lkbot,nb).EQ.0 ) GO TO 165
1040 *
1041 * If window is going to be processed, mark it as processed.
1042 *
1043  iwork( 5+(win-1)*5 ) = 1
1044 *
1045 * Extract processors for current cross border window,
1046 * as below:
1047 *
1048 * 1 | 2
1049 * --+--
1050 * 3 | 4
1051 *
1052  rsrc1 = iwork( 3+(win-1)*5 )
1053  csrc1 = iwork( 4+(win-1)*5 )
1054  rsrc2 = rsrc1
1055  csrc2 = mod( csrc1+1, npcol )
1056  rsrc3 = mod( rsrc1+1, nprow )
1057  csrc3 = csrc1
1058  rsrc4 = mod( rsrc1+1, nprow )
1059  csrc4 = mod( csrc1+1, npcol )
1060 *
1061 * Form group of four processors for cross border window.
1062 *
1063  IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1064  $ ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.
1065  $ ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.
1066  $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN
1067 *
1068 * Compute the upper and lower parts of the active
1069 * window.
1070 *
1071  dim1 = nb - mod(lktop-1,nb)
1072  dim4 = lnwin - dim1
1073 *
1074 * Temporarily compute a new value of the size of the
1075 * computational window that is larger than or equal to
1076 * NTINY+1; call the *real* value DIM.
1077 *
1078  dim = lnwin
1079  lnwin = max(ntiny+1,lnwin)
1080 *
1081 * Divide workspace.
1082 *
1083  ipu = ipnext
1084  iph = ipu + dim**2
1085  ipuu = iph + lnwin**2
1086  ipv = ipuu + lnwin**2
1087  ipnext = iph
1088  IF( dim.LT.lnwin ) THEN
1089  CALL slaset( 'All', lnwin, lnwin, zero,
1090  $ one, work( iph ), lnwin )
1091  ELSE
1092  CALL slaset( 'All', dim, dim, zero,
1093  $ zero, work( iph ), lnwin )
1094  END IF
1095 *
1096 * Form the active window.
1097 *
1098  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1099  iloc = indxg2l( lktop, nb, myrow,
1100  $ desch( rsrc_ ), nprow )
1101  jloc = indxg2l( lktop, nb, mycol,
1102  $ desch( csrc_ ), npcol )
1103  CALL slamov( 'All', dim1, dim1,
1104  $ h((jloc-1)*lldh+iloc), lldh, work(iph),
1105  $ lnwin )
1106  IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN
1107 * Proc#1 <==> Proc#4
1108  CALL sgesd2d( ictxt, dim1, dim1,
1109  $ work(iph), lnwin, rsrc4, csrc4 )
1110  CALL sgerv2d( ictxt, dim4, dim4,
1111  $ work(iph+dim1*lnwin+dim1),
1112  $ lnwin, rsrc4, csrc4 )
1113  END IF
1114  END IF
1115  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1116  iloc = indxg2l( lktop+dim1, nb, myrow,
1117  $ desch( rsrc_ ), nprow )
1118  jloc = indxg2l( lktop+dim1, nb, mycol,
1119  $ desch( csrc_ ), npcol )
1120  CALL slamov( 'All', dim4, dim4,
1121  $ h((jloc-1)*lldh+iloc), lldh,
1122  $ work(iph+dim1*lnwin+dim1),
1123  $ lnwin )
1124  IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 ) THEN
1125 * Proc#4 <==> Proc#1
1126  CALL sgesd2d( ictxt, dim4, dim4,
1127  $ work(iph+dim1*lnwin+dim1),
1128  $ lnwin, rsrc1, csrc1 )
1129  CALL sgerv2d( ictxt, dim1, dim1,
1130  $ work(iph), lnwin, rsrc1, csrc1 )
1131  END IF
1132  END IF
1133  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1134  iloc = indxg2l( lktop, nb, myrow,
1135  $ desch( rsrc_ ), nprow )
1136  jloc = indxg2l( lktop+dim1, nb, mycol,
1137  $ desch( csrc_ ), npcol )
1138  CALL slamov( 'All', dim1, dim4,
1139  $ h((jloc-1)*lldh+iloc), lldh,
1140  $ work(iph+dim1*lnwin), lnwin )
1141  IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 ) THEN
1142 * Proc#2 ==> Proc#1
1143  CALL sgesd2d( ictxt, dim1, dim4,
1144  $ work(iph+dim1*lnwin),
1145  $ lnwin, rsrc1, csrc1 )
1146  END IF
1147  END IF
1148  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1149  IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN
1150 * Proc#2 ==> Proc#4
1151  CALL sgesd2d( ictxt, dim1, dim4,
1152  $ work(iph+dim1*lnwin),
1153  $ lnwin, rsrc4, csrc4 )
1154  END IF
1155  END IF
1156  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1157  iloc = indxg2l( lktop+dim1, nb, myrow,
1158  $ desch( rsrc_ ), nprow )
1159  jloc = indxg2l( lktop+dim1-1, nb, mycol,
1160  $ desch( csrc_ ), npcol )
1161  CALL slamov( 'All', 1, 1,
1162  $ h((jloc-1)*lldh+iloc), lldh,
1163  $ work(iph+(dim1-1)*lnwin+dim1),
1164  $ lnwin )
1165  IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN
1166 * Proc#3 ==> Proc#1
1167  CALL sgesd2d( ictxt, 1, 1,
1168  $ work(iph+(dim1-1)*lnwin+dim1),
1169  $ lnwin, rsrc1, csrc1 )
1170  END IF
1171  END IF
1172  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1173  IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 ) THEN
1174 * Proc#3 ==> Proc#4
1175  CALL sgesd2d( ictxt, 1, 1,
1176  $ work(iph+(dim1-1)*lnwin+dim1),
1177  $ lnwin, rsrc4, csrc4 )
1178  END IF
1179  END IF
1180  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1181  IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 ) THEN
1182 * Proc#1 <== Proc#2
1183  CALL sgerv2d( ictxt, dim1, dim4,
1184  $ work(iph+dim1*lnwin),
1185  $ lnwin, rsrc2, csrc2 )
1186  END IF
1187  IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN
1188 * Proc#1 <== Proc#3
1189  CALL sgerv2d( ictxt, 1, 1,
1190  $ work(iph+(dim1-1)*lnwin+dim1),
1191  $ lnwin, rsrc3, csrc3 )
1192  END IF
1193  END IF
1194  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1195  IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN
1196 * Proc#4 <== Proc#2
1197  CALL sgerv2d( ictxt, dim1, dim4,
1198  $ work(iph+dim1*lnwin),
1199  $ lnwin, rsrc2, csrc2 )
1200  END IF
1201  IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 ) THEN
1202 * Proc#4 <== Proc#3
1203  CALL sgerv2d( ictxt, 1, 1,
1204  $ work(iph+(dim1-1)*lnwin+dim1),
1205  $ lnwin, rsrc3, csrc3 )
1206  END IF
1207  END IF
1208 *
1209 * Prepare for call to SLAQR6 - it could happen that no
1210 * bulges where introduced in the pre-cross border step
1211 * since the chain was too long to fit in the top-left
1212 * part of the cross border window. In such a case, the
1213 * bulges are introduced here instead. It could also
1214 * happen that the bottom-right part is too small to hold
1215 * the whole chain -- in such a case, the bulges are
1216 * chasen off immediately, as well.
1217 *
1218  IF( (myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1) .OR.
1219  $ (myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4) ) THEN
1220  IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot .AND.
1221  $ (dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN
1222  job = 'All steps'
1223  ichoff = 1
1224  ELSEIF( lktop.EQ.ktop .AND.
1225  $ ( dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN
1226  job = 'Introduce and chase'
1227  ELSEIF( lkbot.EQ.kbot ) THEN
1228  job = 'Off-chase bulges'
1229  ichoff = 1
1230  ELSE
1231  job = 'Chase bulges'
1232  END IF
1233  ku = lnwin - kdu + 1
1234  kwh = kdu + 1
1235  nho = ( lnwin-kdu+1-4 ) - ( kdu+1 ) + 1
1236  kwv = kdu + 4
1237  nve = lnwin - kdu - kwv + 1
1238  CALL slaset( 'All', lnwin, lnwin,
1239  $ zero, one, work(ipuu), lnwin )
1240 *
1241 * Small-bulge multi-shift QR sweep.
1242 *
1243  lks = max(1, ns - win*lns + 1)
1244  CALL slaqr6( job, wantt, .true., lkacc22, lnwin,
1245  $ 1, dim, lns, sr( lks ), si( lks ),
1246  $ work(iph), lnwin, 1, dim,
1247  $ work(ipuu), lnwin, work(ipu), 3,
1248  $ work( iph+ku-1 ), lnwin, nve,
1249  $ work( iph+kwv-1 ), lnwin, nho,
1250  $ work( iph-1+ku+(kwh-1)*lnwin ), lnwin )
1251 *
1252 * Copy local submatrices of H back to global matrix.
1253 *
1254  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1255  iloc = indxg2l( lktop, nb, myrow,
1256  $ desch( rsrc_ ), nprow )
1257  jloc = indxg2l( lktop, nb, mycol,
1258  $ desch( csrc_ ), npcol )
1259  CALL slamov( 'All', dim1, dim1, work(iph),
1260  $ lnwin, h((jloc-1)*lldh+iloc),
1261  $ lldh )
1262  END IF
1263  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1264  iloc = indxg2l( lktop+dim1, nb, myrow,
1265  $ desch( rsrc_ ), nprow )
1266  jloc = indxg2l( lktop+dim1, nb, mycol,
1267  $ desch( csrc_ ), npcol )
1268  CALL slamov( 'All', dim4, dim4,
1269  $ work(iph+dim1*lnwin+dim1),
1270  $ lnwin, h((jloc-1)*lldh+iloc), lldh )
1271  END IF
1272 *
1273 * Copy actual submatrix of U to the correct place of
1274 * the buffer.
1275 *
1276  CALL slamov( 'All', dim, dim,
1277  $ work(ipuu), lnwin, work(ipu), dim )
1278  END IF
1279 *
1280 * Return data to process 2 and 3.
1281 *
1282  rws3 = min(3,dim4)
1283  cls3 = min(3,dim1)
1284  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1285  IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN
1286 * Proc#1 ==> Proc#3
1287  CALL sgesd2d( ictxt, rws3, cls3,
1288  $ work( iph+(dim1-cls3)*lnwin+dim1 ),
1289  $ lnwin, rsrc3, csrc3 )
1290  END IF
1291  END IF
1292  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1293  IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN
1294 * Proc#4 ==> Proc#2
1295  CALL sgesd2d( ictxt, dim1, dim4,
1296  $ work( iph+dim1*lnwin),
1297  $ lnwin, rsrc2, csrc2 )
1298  END IF
1299  END IF
1300  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1301  iloc = indxg2l( lktop, nb, myrow,
1302  $ desch( rsrc_ ), nprow )
1303  jloc = indxg2l( lktop+dim1, nb, mycol,
1304  $ desch( csrc_ ), npcol )
1305  IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN
1306 * Proc#2 <== Proc#4
1307  CALL sgerv2d( ictxt, dim1, dim4,
1308  $ work(iph+dim1*lnwin),
1309  $ lnwin, rsrc4, csrc4 )
1310  END IF
1311  CALL slamov( 'All', dim1, dim4,
1312  $ work( iph+dim1*lnwin ), lnwin,
1313  $ h((jloc-1)*lldh+iloc), lldh )
1314  END IF
1315  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1316  iloc = indxg2l( lktop+dim1, nb, myrow,
1317  $ desch( rsrc_ ), nprow )
1318  jloc = indxg2l( lktop+dim1-cls3, nb, mycol,
1319  $ desch( csrc_ ), npcol )
1320  IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN
1321 * Proc#3 <== Proc#1
1322  CALL sgerv2d( ictxt, rws3, cls3,
1323  $ work( iph+(dim1-cls3)*lnwin+dim1 ),
1324  $ lnwin, rsrc1, csrc1 )
1325  END IF
1326  CALL slamov( 'Upper', rws3, cls3,
1327  $ work( iph+(dim1-cls3)*lnwin+dim1 ),
1328  $ lnwin, h((jloc-1)*lldh+iloc),
1329  $ lldh )
1330  IF( rws3.GT.1 .AND. cls3.GT.1 ) THEN
1331  elem = work( iph+(dim1-cls3)*lnwin+dim1+1 )
1332  IF( elem.NE.zero ) THEN
1333  CALL slamov( 'Lower', rws3-1, cls3-1,
1334  $ work( iph+(dim1-cls3)*lnwin+dim1+1 ),
1335  $ lnwin, h((jloc-1)*lldh+iloc+1), lldh )
1336  END IF
1337  END IF
1338  END IF
1339 *
1340 * Restore correct value of LNWIN.
1341 *
1342  lnwin = dim
1343 *
1344  END IF
1345 *
1346 * Increment counter for buffers of orthogonal
1347 * transformations.
1348 *
1349  IF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 .OR.
1350  $ myrow.EQ.rsrc4 .OR. mycol.EQ.csrc4 ) THEN
1351  IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 )
1352  $ lenrbuf = lenrbuf + lnwin*lnwin
1353  IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )
1354  $ lencbuf = lencbuf + lnwin*lnwin
1355  END IF
1356 *
1357 * If no cross border bulge chasing was performed for the
1358 * current WIN:th window, the processor jump to this point
1359 * and consider the next one.
1360 *
1361  165 CONTINUE
1362 *
1363  160 CONTINUE
1364 *
1365 * Broadcast orthogonal transformations -- this will only happen
1366 * if the buffer associated with the orthogonal transformations
1367 * is not empty (controlled by LENRBUF, for row-wise
1368 * broadcasts, and LENCBUF, for column-wise broadcasts).
1369 *
1370  DO 170 dir = 1, 2
1371  bcdone = .false.
1372  DO 180 win = oddeven+(chunknum-1)*wchunk,
1373  $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1374  IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.
1375  $ bcdone ) GO TO 185
1376  rsrc1 = iwork( 3+(win-1)*5 )
1377  csrc1 = iwork( 4+(win-1)*5 )
1378  rsrc4 = mod( rsrc1+1, nprow )
1379  csrc4 = mod( csrc1+1, npcol )
1380  IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1381  $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN
1382  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
1383  $ npcol.GT.1 .AND. nprocs.GT.2 ) THEN
1384  IF( myrow.EQ.rsrc1 .OR. ( myrow.EQ.rsrc4
1385  $ .AND. rsrc4.NE.rsrc1 ) ) THEN
1386  CALL sgebs2d( ictxt, 'Row', '1-Tree',
1387  $ lenrbuf, 1, work, lenrbuf )
1388  ELSE
1389  CALL sgebr2d( ictxt, 'Row', '1-Tree',
1390  $ lenrbuf, 1, work, lenrbuf, rsrc1,
1391  $ csrc1 )
1392  END IF
1393  ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
1394  $ nprow.GT.1 .AND. nprocs.GT.2 ) THEN
1395  IF( mycol.EQ.csrc1 .OR. ( mycol.EQ.csrc4
1396  $ .AND. csrc4.NE.csrc1 ) ) THEN
1397  CALL sgebs2d( ictxt, 'Col', '1-Tree',
1398  $ lencbuf, 1, work, lencbuf )
1399  ELSE
1400  CALL sgebr2d( ictxt, 'Col', '1-Tree',
1401  $ lencbuf, 1, work(1+lenrbuf), lencbuf,
1402  $ rsrc1, csrc1 )
1403  END IF
1404  END IF
1405  IF( lenrbuf.GT.0 .AND. ( mycol.EQ.csrc1 .OR.
1406  $ ( mycol.EQ.csrc4 .AND. csrc4.NE.csrc1 ) ) )
1407  $ CALL slamov( 'All', lenrbuf, 1, work, lenrbuf,
1408  $ work(1+lenrbuf), lencbuf )
1409  bcdone = .true.
1410  ELSEIF( myrow.EQ.rsrc1 .AND. dir.EQ.1 ) THEN
1411  IF( lenrbuf.GT.0 .AND. npcol.GT.1 )
1412  $ CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
1413  $ 1, work, lenrbuf, rsrc1, csrc1 )
1414  bcdone = .true.
1415  ELSEIF( mycol.EQ.csrc1 .AND. dir.EQ.2 ) THEN
1416  IF( lencbuf.GT.0 .AND. nprow.GT.1 )
1417  $ CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
1418  $ 1, work(1+lenrbuf), lencbuf, rsrc1, csrc1 )
1419  bcdone = .true.
1420  ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 ) THEN
1421  IF( lenrbuf.GT.0 .AND. npcol.GT.1 )
1422  $ CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
1423  $ 1, work, lenrbuf, rsrc4, csrc4 )
1424  bcdone = .true.
1425  ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 ) THEN
1426  IF( lencbuf.GT.0 .AND. nprow.GT.1 )
1427  $ CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
1428  $ 1, work(1+lenrbuf), lencbuf, rsrc4, csrc4 )
1429  bcdone = .true.
1430  END IF
1431  185 CONTINUE
1432  180 CONTINUE
1433  170 CONTINUE
1434 *
1435 * Prepare for computing cross border updates by exchanging
1436 * data in cross border update regions in H and Z.
1437 *
1438  DO 190 dir = 1, 2
1439  winid = 0
1440  ipw3 = 1
1441  DO 200 win = oddeven+(chunknum-1)*wchunk,
1442  $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1443  IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 205
1444 *
1445 * Make sure this part of the code is only executed when
1446 * there has been some work performed on the WIN:th
1447 * window.
1448 *
1449  lktop = iwork( 1+(win-1)*5 )
1450  lkbot = iwork( 2+(win-1)*5 )
1451 *
1452 * Extract processor indices associated with
1453 * the current window.
1454 *
1455  rsrc1 = iwork( 3+(win-1)*5 )
1456  csrc1 = iwork( 4+(win-1)*5 )
1457  rsrc4 = mod( rsrc1+1, nprow )
1458  csrc4 = mod( csrc1+1, npcol )
1459 *
1460 * Compute local number of rows and columns
1461 * of H and Z to exchange.
1462 *
1463  IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
1464  $ .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
1465  $ dir.EQ.1)) THEN
1466  winid = winid + 1
1467  lnwin = lkbot - lktop + 1
1468  ipu = ipnext
1469  dim1 = nb - mod(lktop-1,nb)
1470  dim4 = lnwin - dim1
1471  ipnext = ipu + lnwin*lnwin
1472  IF( dir.EQ.2 ) THEN
1473  IF( wantz ) THEN
1474  zrows = numroc( n, nb, myrow, descz( rsrc_ ),
1475  $ nprow )
1476  ELSE
1477  zrows = 0
1478  END IF
1479  IF( wantt ) THEN
1480  hrows = numroc( lktop-1, nb, myrow,
1481  $ desch( rsrc_ ), nprow )
1482  ELSE
1483  hrows = 0
1484  END IF
1485  ELSE
1486  zrows = 0
1487  hrows = 0
1488  END IF
1489  IF( dir.EQ.1 ) THEN
1490  IF( wantt ) THEN
1491  hcols = numroc( n - (lktop+dim1-1), nb,
1492  $ mycol, csrc4, npcol )
1493  IF( mycol.EQ.csrc4 ) hcols = hcols - dim4
1494  ELSE
1495  hcols = 0
1496  END IF
1497  ELSE
1498  hcols = 0
1499  END IF
1500  ipw = max( 1 + lenrbuf + lencbuf, ipw3 )
1501  ipw1 = ipw + hrows * lnwin
1502  IF( wantz ) THEN
1503  ipw2 = ipw1 + lnwin * hcols
1504  ipw3 = ipw2 + zrows * lnwin
1505  ELSE
1506  ipw3 = ipw1 + lnwin * hcols
1507  END IF
1508  END IF
1509 *
1510 * Let each process row and column involved in the updates
1511 * exchange data in H and Z with their neighbours.
1512 *
1513  IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN
1514  IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN
1515  DO 210 indx = 1, nprow
1516  IF( mycol.EQ.csrc1 ) THEN
1517  CALL infog2l( 1+(indx-1)*nb, lktop, desch,
1518  $ nprow, npcol, myrow, mycol, iloc,
1519  $ jloc1, rsrc, csrc1 )
1520  IF( myrow.EQ.rsrc ) THEN
1521  CALL slamov( 'All', hrows, dim1,
1522  $ h((jloc1-1)*lldh+iloc), lldh,
1523  $ work(ipw), hrows )
1524  IF( npcol.GT.1 ) THEN
1525  east = mod( mycol + 1, npcol )
1526  CALL sgesd2d( ictxt, hrows, dim1,
1527  $ work(ipw), hrows, rsrc, east )
1528  CALL sgerv2d( ictxt, hrows, dim4,
1529  $ work(ipw+hrows*dim1), hrows,
1530  $ rsrc, east )
1531  END IF
1532  END IF
1533  END IF
1534  IF( mycol.EQ.csrc4 ) THEN
1535  CALL infog2l( 1+(indx-1)*nb, lktop+dim1,
1536  $ desch, nprow, npcol, myrow, mycol,
1537  $ iloc, jloc4, rsrc, csrc4 )
1538  IF( myrow.EQ.rsrc ) THEN
1539  CALL slamov( 'All', hrows, dim4,
1540  $ h((jloc4-1)*lldh+iloc), lldh,
1541  $ work(ipw+hrows*dim1), hrows )
1542  IF( npcol.GT.1 ) THEN
1543  west = mod( mycol - 1 + npcol,
1544  $ npcol )
1545  CALL sgesd2d( ictxt, hrows, dim4,
1546  $ work(ipw+hrows*dim1), hrows,
1547  $ rsrc, west )
1548  CALL sgerv2d( ictxt, hrows, dim1,
1549  $ work(ipw), hrows, rsrc, west )
1550  END IF
1551  END IF
1552  END IF
1553  210 CONTINUE
1554  END IF
1555  END IF
1556 *
1557  IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN
1558  IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 ) THEN
1559  DO 220 indx = 1, npcol
1560  IF( myrow.EQ.rsrc1 ) THEN
1561  IF( indx.EQ.1 ) THEN
1562  IF( lkbot.LT.n ) THEN
1563  CALL infog2l( lktop, lkbot+1, desch,
1564  $ nprow, npcol, myrow, mycol,
1565  $ iloc1, jloc, rsrc1, csrc )
1566  ELSE
1567  csrc = -1
1568  END IF
1569  ELSEIF( mod(lkbot,nb).NE.0 ) THEN
1570  CALL infog2l( lktop,
1571  $ (iceil(lkbot,nb)+(indx-2))*nb+1,
1572  $ desch, nprow, npcol, myrow, mycol,
1573  $ iloc1, jloc, rsrc1, csrc )
1574  ELSE
1575  CALL infog2l( lktop,
1576  $ (iceil(lkbot,nb)+(indx-1))*nb+1,
1577  $ desch, nprow, npcol, myrow, mycol,
1578  $ iloc1, jloc, rsrc1, csrc )
1579  END IF
1580  IF( mycol.EQ.csrc ) THEN
1581  CALL slamov( 'All', dim1, hcols,
1582  $ h((jloc-1)*lldh+iloc1), lldh,
1583  $ work(ipw1), lnwin )
1584  IF( nprow.GT.1 ) THEN
1585  south = mod( myrow + 1, nprow )
1586  CALL sgesd2d( ictxt, dim1, hcols,
1587  $ work(ipw1), lnwin, south,
1588  $ csrc )
1589  CALL sgerv2d( ictxt, dim4, hcols,
1590  $ work(ipw1+dim1), lnwin, south,
1591  $ csrc )
1592  END IF
1593  END IF
1594  END IF
1595  IF( myrow.EQ.rsrc4 ) THEN
1596  IF( indx.EQ.1 ) THEN
1597  IF( lkbot.LT.n ) THEN
1598  CALL infog2l( lktop+dim1, lkbot+1,
1599  $ desch, nprow, npcol, myrow,
1600  $ mycol, iloc4, jloc, rsrc4,
1601  $ csrc )
1602  ELSE
1603  csrc = -1
1604  END IF
1605  ELSEIF( mod(lkbot,nb).NE.0 ) THEN
1606  CALL infog2l( lktop+dim1,
1607  $ (iceil(lkbot,nb)+(indx-2))*nb+1,
1608  $ desch, nprow, npcol, myrow, mycol,
1609  $ iloc4, jloc, rsrc4, csrc )
1610  ELSE
1611  CALL infog2l( lktop+dim1,
1612  $ (iceil(lkbot,nb)+(indx-1))*nb+1,
1613  $ desch, nprow, npcol, myrow, mycol,
1614  $ iloc4, jloc, rsrc4, csrc )
1615  END IF
1616  IF( mycol.EQ.csrc ) THEN
1617  CALL slamov( 'All', dim4, hcols,
1618  $ h((jloc-1)*lldh+iloc4), lldh,
1619  $ work(ipw1+dim1), lnwin )
1620  IF( nprow.GT.1 ) THEN
1621  north = mod( myrow - 1 + nprow,
1622  $ nprow )
1623  CALL sgesd2d( ictxt, dim4, hcols,
1624  $ work(ipw1+dim1), lnwin, north,
1625  $ csrc )
1626  CALL sgerv2d( ictxt, dim1, hcols,
1627  $ work(ipw1), lnwin, north,
1628  $ csrc )
1629  END IF
1630  END IF
1631  END IF
1632  220 CONTINUE
1633  END IF
1634  END IF
1635 *
1636  IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0) THEN
1637  IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN
1638  DO 230 indx = 1, nprow
1639  IF( mycol.EQ.csrc1 ) THEN
1640  CALL infog2l( 1+(indx-1)*nb, lktop,
1641  $ descz, nprow, npcol, myrow, mycol,
1642  $ iloc, jloc1, rsrc, csrc1 )
1643  IF( myrow.EQ.rsrc ) THEN
1644  CALL slamov( 'All', zrows, dim1,
1645  $ z((jloc1-1)*lldz+iloc), lldz,
1646  $ work(ipw2), zrows )
1647  IF( npcol.GT.1 ) THEN
1648  east = mod( mycol + 1, npcol )
1649  CALL sgesd2d( ictxt, zrows, dim1,
1650  $ work(ipw2), zrows, rsrc,
1651  $ east )
1652  CALL sgerv2d( ictxt, zrows, dim4,
1653  $ work(ipw2+zrows*dim1),
1654  $ zrows, rsrc, east )
1655  END IF
1656  END IF
1657  END IF
1658  IF( mycol.EQ.csrc4 ) THEN
1659  CALL infog2l( 1+(indx-1)*nb,
1660  $ lktop+dim1, descz, nprow, npcol,
1661  $ myrow, mycol, iloc, jloc4, rsrc,
1662  $ csrc4 )
1663  IF( myrow.EQ.rsrc ) THEN
1664  CALL slamov( 'All', zrows, dim4,
1665  $ z((jloc4-1)*lldz+iloc), lldz,
1666  $ work(ipw2+zrows*dim1), zrows )
1667  IF( npcol.GT.1 ) THEN
1668  west = mod( mycol - 1 + npcol,
1669  $ npcol )
1670  CALL sgesd2d( ictxt, zrows, dim4,
1671  $ work(ipw2+zrows*dim1),
1672  $ zrows, rsrc, west )
1673  CALL sgerv2d( ictxt, zrows, dim1,
1674  $ work(ipw2), zrows, rsrc,
1675  $ west )
1676  END IF
1677  END IF
1678  END IF
1679  230 CONTINUE
1680  END IF
1681  END IF
1682 *
1683 * If no exchanges was performed for the current window,
1684 * all processors jump to this point and try the next
1685 * one.
1686 *
1687  205 CONTINUE
1688 *
1689  200 CONTINUE
1690 *
1691 * Compute crossborder bulge-chase updates.
1692 *
1693  winid = 0
1694  IF( dir.EQ.1 ) THEN
1695  ipnext = 1
1696  ELSE
1697  ipnext = 1 + lenrbuf
1698  END IF
1699  ipw3 = 1
1700  DO 240 win = oddeven+(chunknum-1)*wchunk,
1701  $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1702  IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 245
1703 *
1704 * Only perform this part of the code if there was really
1705 * some work performed on the WIN:th window.
1706 *
1707  lktop = iwork( 1+(win-1)*5 )
1708  lkbot = iwork( 2+(win-1)*5 )
1709  lnwin = lkbot - lktop + 1
1710 *
1711 * Extract the processor indices associated with
1712 * the current window.
1713 *
1714  rsrc1 = iwork( 3+(win-1)*5 )
1715  csrc1 = iwork( 4+(win-1)*5 )
1716  rsrc4 = mod( rsrc1+1, nprow )
1717  csrc4 = mod( csrc1+1, npcol )
1718 *
1719  IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
1720  $ .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
1721  $ dir.EQ.1)) THEN
1722 *
1723 * Set up workspaces.
1724 *
1725  winid = winid + 1
1726  lktop = iwork( 1+(win-1)*5 )
1727  lkbot = iwork( 2+(win-1)*5 )
1728  lnwin = lkbot - lktop + 1
1729  dim1 = nb - mod(lktop-1,nb)
1730  dim4 = lnwin - dim1
1731  ipu = ipnext + (winid-1)*lnwin*lnwin
1732  IF( dir.EQ.2 ) THEN
1733  IF( wantz ) THEN
1734  zrows = numroc( n, nb, myrow, descz( rsrc_ ),
1735  $ nprow )
1736  ELSE
1737  zrows = 0
1738  END IF
1739  IF( wantt ) THEN
1740  hrows = numroc( lktop-1, nb, myrow,
1741  $ desch( rsrc_ ), nprow )
1742  ELSE
1743  hrows = 0
1744  END IF
1745  ELSE
1746  zrows = 0
1747  hrows = 0
1748  END IF
1749  IF( dir.EQ.1 ) THEN
1750  IF( wantt ) THEN
1751  hcols = numroc( n - (lktop+dim1-1), nb,
1752  $ mycol, csrc4, npcol )
1753  IF( mycol.EQ.csrc4 ) hcols = hcols - dim4
1754  ELSE
1755  hcols = 0
1756  END IF
1757  ELSE
1758  hcols = 0
1759  END IF
1760 *
1761 * IPW = local copy of overlapping column block of H
1762 * IPW1 = local copy of overlapping row block of H
1763 * IPW2 = local copy of overlapping column block of Z
1764 * IPW3 = workspace for right hand side of matrix
1765 * multiplication
1766 *
1767  ipw = max( 1 + lenrbuf + lencbuf, ipw3 )
1768  ipw1 = ipw + hrows * lnwin
1769  IF( wantz ) THEN
1770  ipw2 = ipw1 + lnwin * hcols
1771  ipw3 = ipw2 + zrows * lnwin
1772  ELSE
1773  ipw3 = ipw1 + lnwin * hcols
1774  END IF
1775 *
1776 * Recompute job to see if special structure of U
1777 * could possibly be exploited.
1778 *
1779  IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
1780  job = 'All steps'
1781  ELSEIF( lktop.EQ.ktop .AND.
1782  $ ( dim1.LT.lchain+1 .OR. dim1.LE.ntiny ) )
1783  $ THEN
1784  job = 'Introduce and chase'
1785  ELSEIF( lkbot.EQ.kbot ) THEN
1786  job = 'Off-chase bulges'
1787  ELSE
1788  job = 'Chase bulges'
1789  END IF
1790  END IF
1791 *
1792 * Test if to exploit sparsity structure of
1793 * orthogonal matrix U.
1794 *
1795  ks = dim1+dim4-lns/2*3
1796  IF( .NOT. blk22 .OR. dim1.NE.ks .OR.
1797  $ dim4.NE.ks .OR. lsame(job,'I') .OR.
1798  $ lsame(job,'O') .OR. lns.LE.2 ) THEN
1799 *
1800 * Update the columns of H and Z.
1801 *
1802  IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN
1803  DO 250 indx = 1, min(lktop-1,1+(nprow-1)*nb), nb
1804  IF( mycol.EQ.csrc1 ) THEN
1805  CALL infog2l( indx, lktop, desch, nprow,
1806  $ npcol, myrow, mycol, iloc, jloc,
1807  $ rsrc, csrc1 )
1808  IF( myrow.EQ.rsrc ) THEN
1809  CALL sgemm( 'No transpose',
1810  $ 'No transpose', hrows, dim1,
1811  $ lnwin, one, work( ipw ), hrows,
1812  $ work( ipu ), lnwin, zero,
1813  $ work(ipw3), hrows )
1814  CALL slamov( 'All', hrows, dim1,
1815  $ work(ipw3), hrows,
1816  $ h((jloc-1)*lldh+iloc), lldh )
1817  END IF
1818  END IF
1819  IF( mycol.EQ.csrc4 ) THEN
1820  CALL infog2l( indx, lktop+dim1, desch,
1821  $ nprow, npcol, myrow, mycol, iloc,
1822  $ jloc, rsrc, csrc4 )
1823  IF( myrow.EQ.rsrc ) THEN
1824  CALL sgemm( 'No transpose',
1825  $ 'No transpose', hrows, dim4,
1826  $ lnwin, one, work( ipw ), hrows,
1827  $ work( ipu+lnwin*dim1 ), lnwin,
1828  $ zero, work(ipw3), hrows )
1829  CALL slamov( 'All', hrows, dim4,
1830  $ work(ipw3), hrows,
1831  $ h((jloc-1)*lldh+iloc), lldh )
1832  END IF
1833  END IF
1834  250 CONTINUE
1835  END IF
1836 *
1837  IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN
1838  DO 260 indx = 1, min(n,1+(nprow-1)*nb), nb
1839  IF( mycol.EQ.csrc1 ) THEN
1840  CALL infog2l( indx, lktop, descz, nprow,
1841  $ npcol, myrow, mycol, iloc, jloc,
1842  $ rsrc, csrc1 )
1843  IF( myrow.EQ.rsrc ) THEN
1844  CALL sgemm( 'No transpose',
1845  $ 'No transpose', zrows, dim1,
1846  $ lnwin, one, work( ipw2 ),
1847  $ zrows, work( ipu ), lnwin,
1848  $ zero, work(ipw3), zrows )
1849  CALL slamov( 'All', zrows, dim1,
1850  $ work(ipw3), zrows,
1851  $ z((jloc-1)*lldz+iloc), lldz )
1852  END IF
1853  END IF
1854  IF( mycol.EQ.csrc4 ) THEN
1855  CALL infog2l( indx, lktop+dim1, descz,
1856  $ nprow, npcol, myrow, mycol, iloc,
1857  $ jloc, rsrc, csrc4 )
1858  IF( myrow.EQ.rsrc ) THEN
1859  CALL sgemm( 'No transpose',
1860  $ 'No transpose', zrows, dim4,
1861  $ lnwin, one, work( ipw2 ),
1862  $ zrows,
1863  $ work( ipu+lnwin*dim1 ), lnwin,
1864  $ zero, work(ipw3), zrows )
1865  CALL slamov( 'All', zrows, dim4,
1866  $ work(ipw3), zrows,
1867  $ z((jloc-1)*lldz+iloc), lldz )
1868  END IF
1869  END IF
1870  260 CONTINUE
1871  END IF
1872 *
1873 * Update the rows of H.
1874 *
1875  IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN
1876  IF( lkbot.LT.n ) THEN
1877  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4 .AND.
1878  $ mod(lkbot,nb).NE.0 ) THEN
1879  indx = lkbot + 1
1880  CALL infog2l( lktop, indx, desch, nprow,
1881  $ npcol, myrow, mycol, iloc, jloc,
1882  $ rsrc1, csrc4 )
1883  CALL sgemm( 'Transpose', 'No Transpose',
1884  $ dim1, hcols, lnwin, one, work(ipu),
1885  $ lnwin, work( ipw1 ), lnwin, zero,
1886  $ work(ipw3), dim1 )
1887  CALL slamov( 'All', dim1, hcols,
1888  $ work(ipw3), dim1,
1889  $ h((jloc-1)*lldh+iloc), lldh )
1890  END IF
1891  IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4 .AND.
1892  $ mod(lkbot,nb).NE.0 ) THEN
1893  indx = lkbot + 1
1894  CALL infog2l( lktop+dim1, indx, desch,
1895  $ nprow, npcol, myrow, mycol, iloc,
1896  $ jloc, rsrc4, csrc4 )
1897  CALL sgemm( 'Transpose', 'No Transpose',
1898  $ dim4, hcols, lnwin, one,
1899  $ work( ipu+dim1*lnwin ), lnwin,
1900  $ work( ipw1), lnwin, zero,
1901  $ work(ipw3), dim4 )
1902  CALL slamov( 'All', dim4, hcols,
1903  $ work(ipw3), dim4,
1904  $ h((jloc-1)*lldh+iloc), lldh )
1905  END IF
1906  indxs = iceil(lkbot,nb)*nb + 1
1907  IF( mod(lkbot,nb).NE.0 ) THEN
1908  indxe = min(n,indxs+(npcol-2)*nb)
1909  ELSE
1910  indxe = min(n,indxs+(npcol-1)*nb)
1911  END IF
1912  DO 270 indx = indxs, indxe, nb
1913  IF( myrow.EQ.rsrc1 ) THEN
1914  CALL infog2l( lktop, indx, desch,
1915  $ nprow, npcol, myrow, mycol, iloc,
1916  $ jloc, rsrc1, csrc )
1917  IF( mycol.EQ.csrc ) THEN
1918  CALL sgemm( 'Transpose',
1919  $ 'No Transpose', dim1, hcols,
1920  $ lnwin, one, work( ipu ), lnwin,
1921  $ work( ipw1 ), lnwin, zero,
1922  $ work(ipw3), dim1 )
1923  CALL slamov( 'All', dim1, hcols,
1924  $ work(ipw3), dim1,
1925  $ h((jloc-1)*lldh+iloc), lldh )
1926  END IF
1927  END IF
1928  IF( myrow.EQ.rsrc4 ) THEN
1929  CALL infog2l( lktop+dim1, indx, desch,
1930  $ nprow, npcol, myrow, mycol, iloc,
1931  $ jloc, rsrc4, csrc )
1932  IF( mycol.EQ.csrc ) THEN
1933  CALL sgemm( 'Transpose',
1934  $ 'No Transpose', dim4, hcols,
1935  $ lnwin, one,
1936  $ work( ipu+lnwin*dim1 ), lnwin,
1937  $ work( ipw1 ), lnwin,
1938  $ zero, work(ipw3), dim4 )
1939  CALL slamov( 'All', dim4, hcols,
1940  $ work(ipw3), dim4,
1941  $ h((jloc-1)*lldh+iloc), lldh )
1942  END IF
1943  END IF
1944  270 CONTINUE
1945  END IF
1946  END IF
1947  ELSE
1948 *
1949 * Update the columns of H and Z.
1950 *
1951 * Compute H2*U21 + H1*U11 on the left side of the border.
1952 *
1953  IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN
1954  indxe = min(lktop-1,1+(nprow-1)*nb)
1955  DO 280 indx = 1, indxe, nb
1956  IF( mycol.EQ.csrc1 ) THEN
1957  CALL infog2l( indx, lktop, desch, nprow,
1958  $ npcol, myrow, mycol, iloc, jloc,
1959  $ rsrc, csrc1 )
1960  IF( myrow.EQ.rsrc ) THEN
1961  CALL slamov( 'All', hrows, ks,
1962  $ work( ipw+hrows*dim4), hrows,
1963  $ work(ipw3), hrows )
1964  CALL strmm( 'Right', 'Upper',
1965  $ 'No transpose',
1966  $ 'Non-unit', hrows, ks, one,
1967  $ work( ipu+dim4 ), lnwin,
1968  $ work(ipw3), hrows )
1969  CALL sgemm( 'No transpose',
1970  $ 'No transpose', hrows, ks, dim4,
1971  $ one, work( ipw ), hrows,
1972  $ work( ipu ), lnwin, one,
1973  $ work(ipw3), hrows )
1974  CALL slamov( 'All', hrows, ks,
1975  $ work(ipw3), hrows,
1976  $ h((jloc-1)*lldh+iloc), lldh )
1977  END IF
1978  END IF
1979 *
1980 * Compute H1*U12 + H2*U22 on the right side of
1981 * the border.
1982 *
1983  IF( mycol.EQ.csrc4 ) THEN
1984  CALL infog2l( indx, lktop+dim1, desch,
1985  $ nprow, npcol, myrow, mycol, iloc,
1986  $ jloc, rsrc, csrc4 )
1987  IF( myrow.EQ.rsrc ) THEN
1988  CALL slamov( 'All', hrows, dim4,
1989  $ work(ipw), hrows, work( ipw3 ),
1990  $ hrows )
1991  CALL strmm( 'Right', 'Lower',
1992  $ 'No transpose',
1993  $ 'Non-unit', hrows, dim4, one,
1994  $ work( ipu+lnwin*ks ), lnwin,
1995  $ work( ipw3 ), hrows )
1996  CALL sgemm( 'No transpose',
1997  $ 'No transpose', hrows, dim4, ks,
1998  $ one, work( ipw+hrows*dim4),
1999  $ hrows,
2000  $ work( ipu+lnwin*ks+dim4 ), lnwin,
2001  $ one, work( ipw3 ), hrows )
2002  CALL slamov( 'All', hrows, dim4,
2003  $ work(ipw3), hrows,
2004  $ h((jloc-1)*lldh+iloc), lldh )
2005  END IF
2006  END IF
2007  280 CONTINUE
2008  END IF
2009 *
2010  IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN
2011 *
2012 * Compute Z2*U21 + Z1*U11 on the left side
2013 * of border.
2014 *
2015  indxe = min(n,1+(nprow-1)*nb)
2016  DO 290 indx = 1, indxe, nb
2017  IF( mycol.EQ.csrc1 ) THEN
2018  CALL infog2l( indx, i, descz, nprow,
2019  $ npcol, myrow, mycol, iloc, jloc,
2020  $ rsrc, csrc1 )
2021  IF( myrow.EQ.rsrc ) THEN
2022  CALL slamov( 'All', zrows, ks,
2023  $ work( ipw2+zrows*dim4),
2024  $ zrows, work(ipw3), zrows )
2025  CALL strmm( 'Right', 'Upper',
2026  $ 'No transpose',
2027  $ 'Non-unit', zrows, ks, one,
2028  $ work( ipu+dim4 ), lnwin,
2029  $ work(ipw3), zrows )
2030  CALL sgemm( 'No transpose',
2031  $ 'No transpose', zrows, ks,
2032  $ dim4, one, work( ipw2 ),
2033  $ zrows, work( ipu ), lnwin,
2034  $ one, work(ipw3), zrows )
2035  CALL slamov( 'All', zrows, ks,
2036  $ work(ipw3), zrows,
2037  $ z((jloc-1)*lldz+iloc), lldz )
2038  END IF
2039  END IF
2040 *
2041 * Compute Z1*U12 + Z2*U22 on the right side
2042 * of border.
2043 *
2044  IF( mycol.EQ.csrc4 ) THEN
2045  CALL infog2l( indx, i+dim1, descz,
2046  $ nprow, npcol, myrow, mycol, iloc,
2047  $ jloc, rsrc, csrc4 )
2048  IF( myrow.EQ.rsrc ) THEN
2049  CALL slamov( 'All', zrows, dim4,
2050  $ work(ipw2), zrows,
2051  $ work( ipw3 ), zrows )
2052  CALL strmm( 'Right', 'Lower',
2053  $ 'No transpose',
2054  $ 'Non-unit', zrows, dim4,
2055  $ one, work( ipu+lnwin*ks ),
2056  $ lnwin, work( ipw3 ), zrows )
2057  CALL sgemm( 'No transpose',
2058  $ 'No transpose', zrows, dim4,
2059  $ ks, one,
2060  $ work( ipw2+zrows*(dim4)),
2061  $ zrows,
2062  $ work( ipu+lnwin*ks+dim4 ),
2063  $ lnwin, one, work( ipw3 ),
2064  $ zrows )
2065  CALL slamov( 'All', zrows, dim4,
2066  $ work(ipw3), zrows,
2067  $ z((jloc-1)*lldz+iloc), lldz )
2068  END IF
2069  END IF
2070  290 CONTINUE
2071  END IF
2072 *
2073  IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0) THEN
2074  IF ( lkbot.LT.n ) THEN
2075 *
2076 * Compute U21**T*H2 + U11**T*H1 on the upper
2077 * side of the border.
2078 *
2079  IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4.AND.
2080  $ mod(lkbot,nb).NE.0 ) THEN
2081  indx = lkbot + 1
2082  CALL infog2l( lktop, indx, desch, nprow,
2083  $ npcol, myrow, mycol, iloc, jloc,
2084  $ rsrc1, csrc4 )
2085  CALL slamov( 'All', ks, hcols,
2086  $ work( ipw1+dim4 ), lnwin,
2087  $ work(ipw3), ks )
2088  CALL strmm( 'Left', 'Upper', 'Transpose',
2089  $ 'Non-unit', ks, hcols, one,
2090  $ work( ipu+dim4 ), lnwin,
2091  $ work(ipw3), ks )
2092  CALL sgemm( 'Transpose', 'No transpose',
2093  $ ks, hcols, dim4, one, work(ipu),
2094  $ lnwin, work(ipw1), lnwin,
2095  $ one, work(ipw3), ks )
2096  CALL slamov( 'All', ks, hcols,
2097  $ work(ipw3), ks,
2098  $ h((jloc-1)*lldh+iloc), lldh )
2099  END IF
2100 *
2101 * Compute U12**T*H1 + U22**T*H2 one the lower
2102 * side of the border.
2103 *
2104  IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4.AND.
2105  $ mod(lkbot,nb).NE.0 ) THEN
2106  indx = lkbot + 1
2107  CALL infog2l( lktop+dim1, indx, desch,
2108  $ nprow, npcol, myrow, mycol, iloc,
2109  $ jloc, rsrc4, csrc4 )
2110  CALL slamov( 'All', dim4, hcols,
2111  $ work( ipw1 ), lnwin,
2112  $ work( ipw3 ), dim4 )
2113  CALL strmm( 'Left', 'Lower', 'Transpose',
2114  $ 'Non-unit', dim4, hcols, one,
2115  $ work( ipu+lnwin*ks ), lnwin,
2116  $ work( ipw3 ), dim4 )
2117  CALL sgemm( 'Transpose', 'No Transpose',
2118  $ dim4, hcols, ks, one,
2119  $ work( ipu+lnwin*ks+dim4 ), lnwin,
2120  $ work( ipw1+dim1 ), lnwin,
2121  $ one, work( ipw3), dim4 )
2122  CALL slamov( 'All', dim4, hcols,
2123  $ work(ipw3), dim4,
2124  $ h((jloc-1)*lldh+iloc), lldh )
2125  END IF
2126 *
2127 * Compute U21**T*H2 + U11**T*H1 on upper side
2128 * on border.
2129 *
2130  indxs = iceil(lkbot,nb)*nb+1
2131  IF( mod(lkbot,nb).NE.0 ) THEN
2132  indxe = min(n,indxs+(npcol-2)*nb)
2133  ELSE
2134  indxe = min(n,indxs+(npcol-1)*nb)
2135  END IF
2136  DO 300 indx = indxs, indxe, nb
2137  IF( myrow.EQ.rsrc1 ) THEN
2138  CALL infog2l( lktop, indx, desch,
2139  $ nprow, npcol, myrow, mycol, iloc,
2140  $ jloc, rsrc1, csrc )
2141  IF( mycol.EQ.csrc ) THEN
2142  CALL slamov( 'All', ks, hcols,
2143  $ work( ipw1+dim4 ), lnwin,
2144  $ work(ipw3), ks )
2145  CALL strmm( 'Left', 'Upper',
2146  $ 'Transpose', 'Non-unit',
2147  $ ks, hcols, one,
2148  $ work( ipu+dim4 ), lnwin,
2149  $ work(ipw3), ks )
2150  CALL sgemm( 'Transpose',
2151  $ 'No transpose', ks, hcols,
2152  $ dim4, one, work(ipu), lnwin,
2153  $ work(ipw1), lnwin, one,
2154  $ work(ipw3), ks )
2155  CALL slamov( 'All', ks, hcols,
2156  $ work(ipw3), ks,
2157  $ h((jloc-1)*lldh+iloc), lldh )
2158  END IF
2159  END IF
2160 *
2161 * Compute U12**T*H1 + U22**T*H2 on lower
2162 * side of border.
2163 *
2164  IF( myrow.EQ.rsrc4 ) THEN
2165  CALL infog2l( lktop+dim1, indx, desch,
2166  $ nprow, npcol, myrow, mycol, iloc,
2167  $ jloc, rsrc4, csrc )
2168  IF( mycol.EQ.csrc ) THEN
2169  CALL slamov( 'All', dim4, hcols,
2170  $ work( ipw1 ), lnwin,
2171  $ work( ipw3 ), dim4 )
2172  CALL strmm( 'Left', 'Lower',
2173  $ 'Transpose','Non-unit',
2174  $ dim4, hcols, one,
2175  $ work( ipu+lnwin*ks ), lnwin,
2176  $ work( ipw3 ), dim4 )
2177  CALL sgemm( 'Transpose',
2178  $ 'No Transpose', dim4, hcols,
2179  $ ks, one,
2180  $ work( ipu+lnwin*ks+dim4 ),
2181  $ lnwin, work( ipw1+dim1 ),
2182  $ lnwin, one, work( ipw3),
2183  $ dim4 )
2184  CALL slamov( 'All', dim4, hcols,
2185  $ work(ipw3), dim4,
2186  $ h((jloc-1)*lldh+iloc), lldh )
2187  END IF
2188  END IF
2189  300 CONTINUE
2190  END IF
2191  END IF
2192  END IF
2193 *
2194 * Update window information - mark processed windows are
2195 * completed.
2196 *
2197  IF( dir.EQ.2 ) THEN
2198  IF( lkbot.EQ.kbot ) THEN
2199  lktop = kbot+1
2200  lkbot = kbot+1
2201  iwork( 1+(win-1)*5 ) = lktop
2202  iwork( 2+(win-1)*5 ) = lkbot
2203  ELSE
2204  lktop = min( lktop + lnwin - lchain,
2205  $ min( kbot, iceil( lkbot, nb )*nb ) -
2206  $ lchain + 1 )
2207  iwork( 1+(win-1)*5 ) = lktop
2208  lkbot = min( max( lkbot + lnwin - lchain,
2209  $ lktop + nwin - 1), min( kbot,
2210  $ iceil( lkbot, nb )*nb ) )
2211  iwork( 2+(win-1)*5 ) = lkbot
2212  END IF
2213  IF( iwork( 5+(win-1)*5 ).EQ.1 )
2214  $ iwork( 5+(win-1)*5 ) = 2
2215  iwork( 3+(win-1)*5 ) = rsrc4
2216  iwork( 4+(win-1)*5 ) = csrc4
2217  END IF
2218 *
2219 * If nothing was done for the WIN:th window, all
2220 * processors come here and consider the next one
2221 * instead.
2222 *
2223  245 CONTINUE
2224  240 CONTINUE
2225  190 CONTINUE
2226  150 CONTINUE
2227  140 CONTINUE
2228 *
2229 * Chased off bulges from first window?
2230 *
2231  IF( nprocs.GT.1 )
2232  $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1,
2233  $ -1, -1, -1, -1, -1 )
2234 *
2235 * If the bulge was chasen off from first window it is removed.
2236 *
2237  IF( ichoff.GT.0 ) THEN
2238  DO 198 win = 2, anmwin
2239  iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )
2240  iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )
2241  iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )
2242  iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )
2243  198 CONTINUE
2244  anmwin = anmwin - 1
2245  ipiw = 6+(anmwin-1)*5
2246  END IF
2247 *
2248 * If we have no more windows, return.
2249 *
2250  IF( anmwin.LT.1 ) RETURN
2251 *
2252 * Check for any more windows to bring over the border.
2253 *
2254  winfin = 0
2255  DO 199 win = 1, anmwin
2256  winfin = winfin+iwork( 5+(win-1)*5 )
2257  199 CONTINUE
2258  IF( winfin.LT.2*anmwin ) GO TO 137
2259 *
2260 * Zero out process mark for each window - this is legal now when
2261 * the process starts over with local bulge-chasing etc.
2262 *
2263  DO 201 win = 1, anmwin
2264  iwork( 5+(win-1)*5 ) = 0
2265  201 CONTINUE
2266 *
2267  END IF
2268 *
2269 * Go back to local bulge-chase and see if there is more work to do.
2270 *
2271  GO TO 20
2272 *
2273 * End of PSLAQR5
2274 *
2275  END
max
#define max(A, B)
Definition: pcgemr.c:180
pslaqr5
subroutine pslaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, LIWORK)
Definition: pslaqr5.f:4
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pselset
subroutine pselset(A, IA, JA, DESCA, ALPHA)
Definition: pselset.f:2
slaqr6
subroutine slaqr6(JOB, WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
Definition: slaqr6.f:4
min
#define min(A, B)
Definition: pcgemr.c:181