dc/d89/pslaqr5_8f_source.html

      SUBROUTINE pslaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,

     $                    SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK,

     $                    LWORK, IWORK, LIWORK )

*

*     Contribution from the Department of Computing Science and HPC2N,

*     Umea University, Sweden

*

*  -- ScaLAPACK auxiliary routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, N, NSHFTS,

     $                   LWORK, LIWORK

      LOGICAL            WANTT, WANTZ

*     ..

*     .. Array Arguments ..

      INTEGER            DESCH( * ), DESCZ( * ), IWORK( * )

      REAL               H( * ), SI( * ), SR( * ), Z( * ), WORK( * )

*     ..

*

*  Purpose

*  =======

*

*  This auxiliary subroutine called by PSLAQR0 performs a

*  single small-bulge multi-shift QR sweep by chasing separated

*  groups of bulges along the main block diagonal of H.

*

*   WANTT  (global input) logical scalar

*          WANTT = .TRUE. if the quasi-triangular Schur factor

*          is being computed.  WANTT is set to .FALSE. otherwise.

*

*   WANTZ  (global input) logical scalar

*          WANTZ = .TRUE. if the orthogonal Schur factor is being

*          computed.  WANTZ is set to .FALSE. otherwise.

*

*   KACC22 (global input) integer with value 0, 1, or 2.

*          Specifies the computation mode of far-from-diagonal

*          orthogonal updates.

*     = 1: PSLAQR5 accumulates reflections and uses matrix-matrix

*          multiply to update the far-from-diagonal matrix entries.

*     = 2: PSLAQR5 accumulates reflections, uses matrix-matrix

*          multiply to update the far-from-diagonal matrix entries,

*          and takes advantage of 2-by-2 block structure during

*          matrix multiplies.

*

*   N      (global input) integer scalar

*          N is the order of the Hessenberg matrix H upon which this

*          subroutine operates.

*

*   KTOP   (global input) integer scalar

*   KBOT   (global input) integer scalar

*          These are the first and last rows and columns of an

*          isolated diagonal block upon which the QR sweep is to be

*          applied. It is assumed without a check that

*                    either KTOP = 1  or   H(KTOP,KTOP-1) = 0

*          and

*                    either KBOT = N  or   H(KBOT+1,KBOT) = 0.

*

*   NSHFTS (global input) integer scalar

*          NSHFTS gives the number of simultaneous shifts.  NSHFTS

*          must be positive and even.

*

*   SR     (global input) REAL             array of size (NSHFTS)

*   SI     (global input) REAL             array of size (NSHFTS)

*          SR contains the real parts and SI contains the imaginary

*          parts of the NSHFTS shifts of origin that define the

*          multi-shift QR sweep.

*

*   H      (local input/output) REAL             array of size

*          (DESCH(LLD_),*)

*          On input H contains a Hessenberg matrix.  On output a

*          multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied

*          to the isolated diagonal block in rows and columns KTOP

*          through KBOT.

*

*   DESCH  (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix H.

*

*   ILOZ   (global input) INTEGER

*   IHIZ   (global input) INTEGER

*          Specify the rows of Z to which transformations must be

*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N

*

*   Z      (local input/output) REAL             array of size

*          (DESCZ(LLD_),*)

*          If WANTZ = .TRUE., then the QR Sweep orthogonal

*          similarity transformation is accumulated into

*          Z(ILOZ:IHIZ,ILO:IHI) from the right.

*          If WANTZ = .FALSE., then Z is unreferenced.

*

*   DESCZ  (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*   WORK   (local workspace) REAL             array, dimension(DWORK)

*

*   LWORK  (local input) INTEGER

*          The length of the workspace array WORK.

*

*   IWORK  (local workspace) INTEGER array, dimension (LIWORK)

*

*   LIWORK (local input) INTEGER

*          The length of the workspace array IWORK.

*

*     ================================================================

*     Based on contributions by

*        Robert Granat, Department of Computing Science and HPC2N,

*        University of Umea, Sweden.

*

*     ============================================================

*     References:

*       K. Braman, R. Byers, and R. Mathias,

*       The Multi-Shift QR Algorithm Part I: Maintaining Well Focused

*       Shifts, and Level 3 Performance.

*       SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.

*

*       R. Granat, B. Kagstrom, and D. Kressner,

*       A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC

*       Systems.

*       SIAM J. Sci. Comput., 32(4):2345--2378, 2010.

*

*     ============================================================

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      REAL               ZERO, ONE

      PARAMETER          ( ZERO = 0.0e0, one = 1.0e0 )

      INTEGER            NTINY

      parameter( ntiny = 11 )

*     ..

*     .. Local Scalars ..

      REAL               ALPHA, BETA, H11, H12, H21, H22, REFSUM,

     $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,

     $                   ulp, tau, elem, stamp, ddum, orth

      INTEGER            I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,

     $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,

     $                   m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,

     $                   ns, nu, lldh, lldz, lldu, lldv, lldw, lldwh,

     $                   info, ictxt, nprow, npcol, nb, iroffh, itop,

     $                   nwin, myrow, mycol, lns, numwin, lkacc22,

     $                   lchain, win, idonejob, ipnext, anmwin, lenrbuf,

     $                   lencbuf, ichoff, lrsrc, lcsrc, lktop, lkbot,

     $                   ii, jj, swin, ewin, lnwin, dim, llktop, llkbot,

     $                   ipv, ipu, iph, ipw, ku, kwh, kwv, nve, lks,

     $                   idum, nho, dir, winid, indx, iloc, jloc, rsrc1,

     $                   csrc1, rsrc2, csrc2, rsrc3, csrc3, rsrc4, ipuu,

     $                   csrc4, lrows, lcols, indxs, ks, jloc1, iloc1,

     $                   lktop1, lktop2, wchunk, numchunk, oddeven,

     $                   chunknum, dim1, dim4, ipw3, hrows, zrows,

     $                   hcols, ipw1, ipw2, rsrc, east, jloc4, iloc4,

     $                   west, csrc, south, norht, indxe, north,

     $                   ihh, ipiw, lkbot1, nprocs, liroffh,

     $                   winfin, rws3, cls3, indx2, hrows2,

     $                   zrows2, hcols2, mnrbuf,

     $                   mxrbuf, mncbuf, mxcbuf, lwkopt

      LOGICAL            BLK22, BMP22, INTRO, DONEJOB, ODDNPROW,

     $                   ODDNPCOL, LQUERY, BCDONE

      CHARACTER          JBCMPZ*2, JOB

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            PILAENVX, ICEIL, INDXG2P, INDXG2L, NUMROC

      REAL               SLAMCH, SLANGE

      EXTERNAL           slamch, pilaenvx, iceil, indxg2p, indxg2l,

     $                   numroc, lsame, slange

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, float, max, min, mod

*     ..

*     .. Local Arrays ..

      REAL               VT( 3 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           sgemm, slabad, slamov, slaqr1, slarfg, slaset,

     $                   strmm, slaqr6

*     ..

*     .. Executable Statements ..

*

      info = 0

      ictxt = desch( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

      nprocs = nprow*npcol

      lldh = desch( lld_ )

      lldz = descz( lld_ )

      nb = desch( mb_ )

      iroffh = mod( ktop - 1, nb )

      lquery = lwork.EQ.-1 .OR. liwork.EQ.-1

*

*     If there are no shifts, then there is nothing to do.

*

      IF( .NOT. lquery .AND. nshfts.LT.2 )

     $   RETURN

*

*     If the active block is empty or 1-by-1, then there

*     is nothing to do.

*

      IF( .NOT. lquery .AND. ktop.GE.kbot )

     $   RETURN

*

*     Shuffle shifts into pairs of real shifts and pairs of

*     complex conjugate shifts assuming complex conjugate

*     shifts are already adjacent to one another.

*

      IF( .NOT. lquery ) THEN

         DO 10 i = 1, nshfts - 2, 2

            IF( si( i ).NE.-si( i+1 ) ) THEN

*

               swap = sr( i )

               sr( i ) = sr( i+1 )

               sr( i+1 ) = sr( i+2 )

               sr( i+2 ) = swap

*

               swap = si( i )

               si( i ) = si( i+1 )

               si( i+1 ) = si( i+2 )

               si( i+2 ) = swap

            END IF

   10    CONTINUE

      END IF

*

*     NSHFTS is supposed to be even, but if is odd,

*     then simply reduce it by one.  The shuffle above

*     ensures that the dropped shift is real and that

*     the remaining shifts are paired.

*

      ns = nshfts - mod( nshfts, 2 )

*

*     Extract the size of the computational window.

*

      nwin = pilaenvx( ictxt, 19, 'PSLAQR5', jbcmpz, n, nb, nb, nb )

      nwin = min( nwin, kbot-ktop+1 )

*

*     Adjust number of simultaneous shifts if it exceeds the limit

*     set by the number of diagonal blocks in the active submatrix

*     H(KTOP:KBOT,KTOP:KBOT).

*

      ns = max( 2, min( ns, iceil( kbot-ktop+1, nb )*nwin/3 ) )

      ns = ns - mod( ns, 2 )


*

*     Decide the number of simultaneous computational windows

*     from the number of shifts - each window should contain up to

*     (NWIN / 3) shifts. Also compute the number of shifts per

*     window and make sure that number is even.

*

      lns = min( max( 2, nwin / 3 ), max( 2, ns / min(nprow,npcol) ) )

      lns = lns - mod( lns, 2 )

      numwin = max( 1, min( iceil( ns, lns ),

     $     iceil( kbot-ktop+1, nb ) - 1 ) )

      IF( nprow.NE.npcol ) THEN

         numwin = min( numwin, min(nprow,npcol) )

         lns = min( lns, max( 2, ns / min(nprow,npcol) ) )

         lns = lns - mod( lns, 2 )

      END IF

*

*     Machine constants for deflation.

*

      safmin = slamch( 'SAFE MINIMUM' )

      safmax = one / safmin

      CALL slabad( safmin, safmax )

      ulp = slamch( 'PRECISION' )

      smlnum = safmin*( float( n ) / ulp )

*

*     Use accumulated reflections to update far-from-diagonal

*     entries on a local level?

*

      IF( lns.LT.14 ) THEN

         lkacc22 = 1

      ELSE

         lkacc22 = 2

      END IF

*

*     If so, exploit the 2-by-2 block structure?

*     ( Usually it is not efficient to exploit the 2-by-2 structure

*       because the block size is too small. )

*

      blk22 = ( lns.GT.2 ) .AND. ( kacc22.EQ.2 )

*

*     Clear trash.

*

      IF( .NOT. lquery .AND. ktop+2.LE.kbot )

     $   CALL pselset( h, ktop+2, ktop, desch, zero )

*

*     NBMPS = number of 2-shift bulges in each chain

*

      nbmps = lns / 2

*

*     KDU = width of slab

*

      kdu = 6*nbmps - 3

*

*     LCHAIN = length of each chain

*

      lchain = 3 * nbmps + 1

*

*     Check if workspace query.

*

      IF( lquery ) THEN

         hrows = numroc( n, nb, myrow, desch(rsrc_), nprow )

         hcols = numroc( n, nb, mycol, desch(csrc_), npcol )

         lwkopt = (5+2*numwin)*nb**2 + 2*hrows*nb + hcols*nb +

     $        max( hrows*nb, hcols*nb )

         work(1)  = float(lwkopt)

         iwork(1) = 5*numwin

         RETURN

      END IF

*

*     Check if KTOP and KBOT are valid.

*

      IF( ktop.LT.1 .OR. kbot.GT.n ) RETURN

*

*     Create and chase NUMWIN chains of NBMPS bulges.

*

*     Set up window introduction.

*

      anmwin = 0

      intro = .true.

      ipiw = 1

*

*     Main loop:

*     While-loop over the computational windows which is

*     terminated when all windows have been introduced,

*     chased down to the bottom of the considered submatrix

*     and chased off.

*

 20   CONTINUE

*

*     Set up next window as long as we have less than the prescribed

*     number of windows. Each window is described an integer quadruple:

*     1. Local value of KTOP (below denoted by LKTOP)

*     2. Local value of KBOT (below denoted by LKBOT)

*     3-4. Processor indices (LRSRC,LCSRC) associated with the window.

*     (5. Mark that decides if a window is fully processed or not)

*

*     Notice - the next window is only introduced if the first block

*     in the active submatrix does not contain any other windows.

*

      IF( anmwin.GT.0 ) THEN

         lktop = iwork( 1+(anmwin-1)*5 )

      ELSE

         lktop = ktop

      END IF

      IF( intro .AND. (anmwin.EQ.0 .OR. lktop.GT.iceil(ktop,nb)*nb) )

     $     THEN

         anmwin = anmwin + 1

*

*        Structure of IWORK:

*        IWORK( 1+(WIN-1)*5 ): start position

*        IWORK( 2+(WIN-1)*5 ): stop position

*        IWORK( 3+(WIN-1)*5 ): processor row id

*        IWORK( 4+(WIN-1)*5 ): processor col id

*        IWORK( 5+(WIN-1)*5 ): window status (0, 1, or 2)

*

         iwork( 1+(anmwin-1)*5 ) = ktop

         iwork( 2+(anmwin-1)*5 ) = ktop +

     $                             min( nwin,nb-iroffh,kbot-ktop+1 ) - 1

         iwork( 3+(anmwin-1)*5 ) = indxg2p( iwork(1+(anmwin-1)*5), nb,

     $                             myrow, desch(rsrc_), nprow )

         iwork( 4+(anmwin-1)*5 ) = indxg2p( iwork(2+(anmwin-1)*5), nb,

     $                             mycol, desch(csrc_), npcol )

         iwork( 5+(anmwin-1)*5 ) = 0

         ipiw = 6+(anmwin-1)*5

         IF( anmwin.EQ.numwin ) intro = .false.

      END IF

*

*     Do-loop over the number of windows.

*

      ipnext = 1

      donejob = .false.

      idonejob = 0

      lenrbuf = 0

      lencbuf = 0

      ichoff = 0

      DO 40 win = 1, anmwin

*

*        Extract window information to simplify the rest.

*

         lrsrc = iwork( 3+(win-1)*5 )

         lcsrc = iwork( 4+(win-1)*5 )

         lktop = iwork( 1+(win-1)*5 )

         lkbot = iwork( 2+(win-1)*5 )

         lnwin = lkbot - lktop + 1

*

*        Check if anything to do for current window, i.e., if the local

*        chain of bulges has reached the next block border etc.

*

         IF( iwork(5+(win-1)*5).LT.2 .AND. lnwin.GT.1 .AND.

     $        (lnwin.GT.lchain .OR. lkbot.EQ.kbot ) ) THEN

            liroffh = mod(lktop-1,nb)

            swin = lktop-liroffh

            ewin = min(kbot,lktop-liroffh+nb-1)

            dim = ewin-swin+1

            IF( dim.LE.ntiny .AND. .NOT.lkbot.EQ.kbot ) THEN

               iwork( 5+(win-1)*5 ) = 2

               GO TO 45

            END IF

            idonejob = 1

            IF( iwork(5+(win-1)*5).EQ.0 ) THEN

               iwork(5+(win-1)*5) = 1

            END IF

*

*           Let the process that owns the corresponding window do the

*           local bulge chase.

*

            IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN

*

*              Set the kind of job to do in SLAQR6:

*              1. JOB = 'I': Introduce and chase bulges in window WIN

*              2. JOB = 'C': Chase bulges from top to bottom of window WIN

*              3. JOB = 'O': Chase bulges off window WIN

*              4. JOB = 'A': All of 1-3 above is done - this will for

*                            example happen for very small active

*                            submatrices (like 2-by-2)

*

               llkbot = llktop + lnwin - 1

               IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN

                  job = 'All steps'

                  ichoff = 1

               ELSEIF( lktop.EQ.ktop ) THEN

                  job = 'Introduce and chase'

               ELSEIF( lkbot.EQ.kbot ) THEN

                  job = 'Off-chase bulges'

                  ichoff = 1

               ELSE

                  job = 'Chase bulges'

               END IF

*

*              Copy submatrix of H corresponding to window WIN into

*              workspace and set out additional workspace for storing

*              orthogonal transformations. This submatrix must be at

*              least (NTINY+1)-by-(NTINY+1) to fit into SLAQR6 - if not,

*              abort and go for cross border bulge chasing with this

*              particular window.

*

               ii = indxg2l( swin, nb, myrow, desch(rsrc_), nprow )

               jj = indxg2l( swin, nb, mycol, desch(csrc_), npcol )

               llktop = 1 + liroffh

               llkbot = llktop + lnwin - 1

*

               ipu = ipnext

               iph = ipu + lnwin**2

               ipuu = iph + max(ntiny+1,dim)**2

               ipv = ipuu + max(ntiny+1,dim)**2

               ipnext = iph

*

               IF( lsame( job, 'A' ) .OR. lsame( job, 'O' ) .AND.

     $              dim.LT.ntiny+1 ) THEN

                  CALL slaset( 'All', ntiny+1, ntiny+1, zero, one,

     $                 work(iph), ntiny+1 )

               END IF

               CALL slamov( 'Upper', dim, dim, h(ii+(jj-1)*lldh), lldh,

     $              work(iph), max(ntiny+1,dim) )

               CALL scopy(  dim-1, h(ii+(jj-1)*lldh+1), lldh+1,

     $              work(iph+1), max(ntiny+1,dim)+1 )

               IF( lsame( job, 'C' ) .OR. lsame( job, 'O') ) THEN

                  CALL scopy(  dim-2, h(ii+(jj-1)*lldh+2), lldh+1,

     $                 work(iph+2), max(ntiny+1,dim)+1 )

                  CALL scopy(  dim-3, h(ii+(jj-1)*lldh+3), lldh+1,

     $                 work(iph+3), max(ntiny+1,dim)+1 )

                  CALL slaset( 'Lower', dim-4, dim-4, zero,

     $                 zero, work(iph+4), max(ntiny+1,dim) )

               ELSE

                  CALL slaset( 'Lower', dim-2, dim-2, zero,

     $                 zero, work(iph+2), max(ntiny+1,dim) )

               END IF

*

               ku = max(ntiny+1,dim) - kdu + 1

               kwh = kdu + 1

               nho = ( max(ntiny+1,dim)-kdu+1-4 ) - ( kdu+1 ) + 1

               kwv = kdu + 4

               nve = max(ntiny+1,dim) - kdu - kwv + 1

               CALL slaset( 'All', max(ntiny+1,dim),

     $              max(ntiny+1,dim), zero, one, work(ipuu),

     $              max(ntiny+1,dim) )

*

*              Small-bulge multi-shift QR sweep.

*

               lks = max( 1, ns - win*lns + 1 )

               CALL slaqr6( job, wantt, .true., lkacc22,

     $              max(ntiny+1,dim), llktop, llkbot, lns, sr( lks ),

     $              si( lks ), work(iph), max(ntiny+1,dim), llktop,

     $              llkbot, work(ipuu), max(ntiny+1,dim), work(ipu),

     $              3, work( iph+ku-1 ),

     $              max(ntiny+1,dim), nve, work( iph+kwv-1 ),

     $              max(ntiny+1,dim), nho, work( iph-1+ku+(kwh-1)*

     $              max(ntiny+1,dim) ), max(ntiny+1,dim) )

*

*              Copy submatrix of H back.

*

               CALL slamov( 'Upper', dim, dim, work(iph),

     $              max(ntiny+1,dim), h(ii+(jj-1)*lldh), lldh )

               CALL scopy( dim-1, work(iph+1), max(ntiny+1,dim)+1,

     $              h(ii+(jj-1)*lldh+1), lldh+1 )

               IF( lsame( job, 'I' ) .OR. lsame( job, 'C' ) ) THEN

                  CALL scopy( dim-2, work(iph+2), dim+1,

     $                 h(ii+(jj-1)*lldh+2), lldh+1 )

                  CALL scopy( dim-3, work(iph+3), dim+1,

     $                 h(ii+(jj-1)*lldh+3), lldh+1 )

               ELSE

                  CALL slaset( 'Lower', dim-2, dim-2, zero,

     $                 zero, h(ii+(jj-1)*lldh+2), lldh )

               END IF

*

*              Copy actual submatrix of U to the correct place

*              of the buffer.

*

               CALL slamov( 'All', lnwin, lnwin,

     $              work(ipuu+(max(ntiny+1,dim)*liroffh)+liroffh),

     $              max(ntiny+1,dim), work(ipu), lnwin )

            END IF

*

*           In case the local submatrix was smaller than

*           (NTINY+1)-by-(NTINY+1) we go here and proceed.

*

 45         CONTINUE

         ELSE

            iwork( 5+(win-1)*5 ) = 2

         END IF

*

*        Increment counter for buffers of orthogonal transformations.

*

         IF( myrow.EQ.lrsrc .OR. mycol.EQ.lcsrc ) THEN

            IF( idonejob.EQ.1 .AND. iwork(5+(win-1)*5).LT.2 ) THEN

               IF( myrow.EQ.lrsrc ) lenrbuf = lenrbuf + lnwin*lnwin

               IF( mycol.EQ.lcsrc ) lencbuf = lencbuf + lnwin*lnwin

            END IF

         END IF

 40   CONTINUE

*

*     Did some work in the above do-loop?

*

      CALL igsum2d( ictxt, 'All', '1-Tree', 1, 1, idonejob, 1, -1, -1 )

      donejob = idonejob.GT.0

*

*     Chased off bulges from first window?

*

      IF( nprocs.GT.1 )

     $   CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1, -1,

     $        -1, -1, -1, -1 )

*

*     If work was done in the do-loop over local windows, perform

*     updates, otherwise go for cross border bulge chasing and updates.

*

      IF( donejob ) THEN

*

*        Broadcast orthogonal transformations.

*

 49      CONTINUE

         IF( lenrbuf.GT.0 .OR. lencbuf.GT.0 ) THEN

            DO 50 dir = 1, 2

               bcdone = .false.

               DO 60 win = 1, anmwin

                  IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.

     $                 bcdone ) GO TO 62

                  lrsrc = iwork( 3+(win-1)*5 )

                  lcsrc = iwork( 4+(win-1)*5 )

                  IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN

                     IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                    npcol.GT.1 ) THEN

                        CALL sgebs2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf )

                     ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                    nprow.GT.1 ) THEN

                        CALL sgebs2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work, lencbuf )

                     END IF

                     IF( lenrbuf.GT.0 )

     $                  CALL slamov( 'All', lenrbuf, 1, work, lenrbuf,

     $                       work(1+lenrbuf), lencbuf )

                     bcdone = .true.

                  ELSEIF( myrow.EQ.lrsrc .AND. dir.EQ.1 ) THEN

                     IF( lenrbuf.GT.0 .AND. npcol.GT.1 ) THEN

                        CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf, lrsrc, lcsrc )

                        bcdone = .true.

                     END IF

                  ELSEIF( mycol.EQ.lcsrc .AND. dir.EQ.2 ) THEN

                     IF( lencbuf.GT.0 .AND. nprow.GT.1 ) THEN

                        CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work(1+lenrbuf), lencbuf, lrsrc, lcsrc )

                        bcdone = .true.

                     END IF

                  END IF

 62               CONTINUE

 60            CONTINUE

 50         CONTINUE

         END IF

*

*        Compute updates - make sure to skip windows that was skipped

*        regarding local bulge chasing.

*

         DO 65 dir = 1, 2

            winid = 0

            IF( dir.EQ.1 ) THEN

               ipnext = 1

            ELSE

               ipnext = 1 + lenrbuf

            END IF

            DO 70 win = 1, anmwin

               IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 75

               lrsrc = iwork( 3+(win-1)*5 )

               lcsrc = iwork( 4+(win-1)*5 )

               lktop = iwork( 1+(win-1)*5 )

               lkbot = iwork( 2+(win-1)*5 )

               lnwin = lkbot - lktop + 1

               IF( (myrow.EQ.lrsrc.AND.lenrbuf.GT.0.AND.dir.EQ.1) .OR.

     $              (mycol.EQ.lcsrc.AND.lencbuf.GT.0.AND.dir.EQ.2 ) )

     $              THEN

*

*                 Set up workspaces.

*

                  ipu = ipnext

                  ipnext = ipu + lnwin*lnwin

                  ipw = 1 + lenrbuf + lencbuf

                  liroffh = mod(lktop-1,nb)

                  winid = winid + 1

*

*                 Recompute JOB to see if block structure of U could

*                 possibly be exploited or not.

*

                  IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN

                     job = 'All steps'

                  ELSEIF( lktop.EQ.ktop ) THEN

                     job = 'Introduce and chase'

                  ELSEIF( lkbot.EQ.kbot ) THEN

                     job = 'Off-chase bulges'

                  ELSE

                     job = 'Chase bulges'

                  END IF

               END IF

*

*              Use U to update far-from-diagonal entries in H.

*              If required, use U to update Z as well.

*

               IF( .NOT. blk22 .OR. .NOT. lsame(job,'C')

     $              .OR. lns.LE.2 ) THEN

*

                  IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                 mycol.EQ.lcsrc ) THEN

                     IF( wantt ) THEN

                        DO 80 indx = 1, lktop-liroffh-1, nb

                           CALL infog2l( indx, lktop, desch, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              lrows = min( nb, lktop-indx )

                              CALL sgemm('No transpose', 'No transpose',

     $                             lrows, lnwin, lnwin, one,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipu ), lnwin, zero,

     $                             work(ipw),

     $                             lrows )

                              CALL slamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 80                     CONTINUE

                     END IF

                     IF( wantz ) THEN

                        DO 90 indx = 1, n, nb

                           CALL infog2l( indx, lktop, descz, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              lrows = min(nb,n-indx+1)

                              CALL sgemm( 'No transpose',

     $                             'No transpose', lrows, lnwin, lnwin,

     $                             one, z((jloc-1)*lldz+iloc), lldz,

     $                             work( ipu ), lnwin, zero,

     $                             work(ipw), lrows )

                              CALL slamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             z((jloc-1)*lldz+iloc), lldz )

                           END IF

 90                     CONTINUE

                     END IF

                  END IF

*

*                 Update the rows of H affected by the bulge-chase.

*

                  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                 myrow.EQ.lrsrc ) THEN

                     IF( wantt ) THEN

                        IF( iceil(lkbot,nb).EQ.iceil(kbot,nb) ) THEN

                           lcols = min(iceil(kbot,nb)*nb,n) - kbot

                        ELSE

                           lcols = 0

                        END IF

                        IF( lcols.GT.0 ) THEN

                           indx = kbot + 1

                           CALL infog2l( lktop, indx, desch, nprow,

     $                          npcol, myrow, mycol, iloc, jloc,

     $                          rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              CALL sgemm( 'Transpose', 'No Transpose',

     $                             lnwin, lcols, lnwin, one, work(ipu),

     $                             lnwin, h((jloc-1)*lldh+iloc), lldh,

     $                             zero, work(ipw), lnwin )

                              CALL slamov( 'All', lnwin, lcols,

     $                             work(ipw), lnwin,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

                        END IF

 93                     CONTINUE

                        indxs = iceil(lkbot,nb)*nb + 1

                        DO 95 indx = indxs, n, nb

                           CALL infog2l( lktop, indx,

     $                          desch, nprow, npcol, myrow, mycol,

     $                          iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              lcols = min( nb, n-indx+1 )

                              CALL sgemm( 'Transpose', 'No Transpose',

     $                             lnwin, lcols, lnwin, one, work(ipu),

     $                             lnwin, h((jloc-1)*lldh+iloc), lldh,

     $                             zero, work(ipw),

     $                             lnwin )

                              CALL slamov( 'All', lnwin, lcols,

     $                             work(ipw), lnwin,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 95                     CONTINUE

                     END IF

                  END IF

               ELSE

                  ks = lnwin-lns/2*3

*

*                 The LNWIN-by-LNWIN matrix U containing the accumulated

*                 orthogonal transformations has the following structure:

*

*                     [ U11  U12 ]

*                 U = [          ],

*                     [ U21  U22 ]

*

*                 where U21 is KS-by-KS upper triangular and U12 is

*                 (LNWIN-KS)-by-(LNWIN-KS) lower triangular.

*                 Here, KS = LNS.

*

*                 Update the columns of H and Z affected by the bulge

*                 chasing.

*

*                 Compute H2*U21 + H1*U11 in workspace.

*

                  IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                 mycol.EQ.lcsrc ) THEN

                     IF( wantt ) THEN

                        DO 100 indx = 1, lktop-liroffh-1, nb

                           CALL infog2l( indx, lktop, desch, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              jloc1 = indxg2l( lktop+lnwin-ks, nb,

     $                             mycol, desch( csrc_ ), npcol )

                              lrows = min( nb, lktop-indx )

                              CALL slamov( 'All', lrows, ks,

     $                             h((jloc1-1)*lldh+iloc ), lldh,

     $                             work(ipw), lrows )

                              CALL strmm( 'Right', 'Upper',

     $                             'No transpose','Non-unit', lrows,

     $                             ks, one, work( ipu+lnwin-ks ), lnwin,

     $                             work(ipw), lrows )

                              CALL sgemm('No transpose', 'No transpose',

     $                             lrows, ks, lnwin-ks, one,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipu ), lnwin, one, work(ipw),

     $                             lrows )

*

*                             Compute H1*U12 + H2*U22 in workspace.

*

                              CALL slamov( 'All', lrows, lnwin-ks,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipw+ks*lrows ), lrows )

                              CALL strmm( 'Right', 'Lower',

     $                             'No transpose', 'Non-Unit',

     $                             lrows, lnwin-ks, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw+ks*lrows ), lrows )

                              CALL sgemm('No transpose', 'No transpose',

     $                             lrows, lnwin-ks, ks, one,

     $                             h((jloc1-1)*lldh+iloc), lldh,

     $                             work( ipu+lnwin*ks+lnwin-ks ), lnwin,

     $                             one, work( ipw+ks*lrows ), lrows )

*

*                             Copy workspace to H.

*

                              CALL slamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 100                    CONTINUE

                     END IF

*

                     IF( wantz ) THEN

*

*                       Compute Z2*U21 + Z1*U11 in workspace.

*

                        DO 110 indx = 1, n, nb

                           CALL infog2l( indx, lktop, descz, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              jloc1 = indxg2l( lktop+lnwin-ks, nb,

     $                             mycol, descz( csrc_ ), npcol )

                              lrows = min(nb,n-indx+1)

                              CALL slamov( 'All', lrows, ks,

     $                             z((jloc1-1)*lldz+iloc ), lldz,

     $                             work(ipw), lrows )

                              CALL strmm( 'Right', 'Upper',

     $                             'No transpose', 'Non-unit',

     $                             lrows, ks, one, work( ipu+lnwin-ks ),

     $                             lnwin, work(ipw), lrows )

                              CALL sgemm( 'No transpose',

     $                             'No transpose', lrows, ks, lnwin-ks,

     $                             one, z((jloc-1)*lldz+iloc), lldz,

     $                             work( ipu ), lnwin, one, work(ipw),

     $                             lrows )

*

*                             Compute Z1*U12 + Z2*U22 in workspace.

*

                              CALL slamov( 'All', lrows, lnwin-ks,

     $                             z((jloc-1)*lldz+iloc), lldz,

     $                             work( ipw+ks*lrows ), lrows)

                              CALL strmm( 'Right', 'Lower',

     $                             'No transpose', 'Non-unit',

     $                             lrows, lnwin-ks, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw+ks*lrows ), lrows )

                              CALL sgemm( 'No transpose',

     $                             'No transpose', lrows, lnwin-ks, ks,

     $                             one, z((jloc1-1)*lldz+iloc), lldz,

     $                             work( ipu+lnwin*ks+lnwin-ks ), lnwin,

     $                             one, work( ipw+ks*lrows ),

     $                             lrows )

*

*                             Copy workspace to Z.

*

                              CALL slamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             z((jloc-1)*lldz+iloc), lldz )

                           END IF

 110                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                 myrow.EQ.lrsrc ) THEN

                     IF( wantt ) THEN

                        indxs = iceil(lkbot,nb)*nb + 1

                        DO 120 indx = indxs, n, nb

                           CALL infog2l( lktop, indx,

     $                          desch, nprow, npcol, myrow, mycol, iloc,

     $                          jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

*

*                             Compute U21**T*H2 + U11**T*H1 in workspace.

*

                              iloc1 = indxg2l( lktop+lnwin-ks, nb,

     $                             myrow, desch( rsrc_ ), nprow )

                              lcols = min( nb, n-indx+1 )

                              CALL slamov( 'All', ks, lcols,

     $                             h((jloc-1)*lldh+iloc1), lldh,

     $                             work(ipw), lnwin )

                              CALL strmm( 'Left', 'Upper', 'Transpose',

     $                             'Non-unit', ks, lcols, one,

     $                             work( ipu+lnwin-ks ), lnwin,

     $                             work(ipw), lnwin )

                              CALL sgemm( 'Transpose', 'No transpose',

     $                             ks, lcols, lnwin-ks, one, work(ipu),

     $                             lnwin, h((jloc-1)*lldh+iloc), lldh,

     $                             one, work(ipw), lnwin )

*

*                             Compute U12**T*H1 + U22**T*H2 in workspace.

*

                              CALL slamov( 'All', lnwin-ks, lcols,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipw+ks ), lnwin )

                              CALL strmm( 'Left', 'Lower', 'Transpose',

     $                             'Non-unit', lnwin-ks, lcols, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw+ks ), lnwin )

                              CALL sgemm( 'Transpose', 'No Transpose',

     $                             lnwin-ks, lcols, ks, one,

     $                             work( ipu+lnwin*ks+lnwin-ks ), lnwin,

     $                             h((jloc-1)*lldh+iloc1), lldh,

     $                             one, work( ipw+ks ), lnwin )

*

*                             Copy workspace to H.

*

                              CALL slamov( 'All', lnwin, lcols,

     $                             work(ipw), lnwin,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 120                    CONTINUE

                     END IF

                  END IF

               END IF

*

*              Update position information about current window.

*

               IF( dir.EQ.2 ) THEN

                  IF( lkbot.EQ.kbot ) THEN

                     lktop = kbot+1

                     lkbot = kbot+1

                     iwork( 1+(win-1)*5 ) = lktop

                     iwork( 2+(win-1)*5 ) = lkbot

                     iwork( 5+(win-1)*5 ) = 2

                  ELSE

                     lktop = min( lktop + lnwin - lchain,

     $                    iceil( lktop, nb )*nb - lchain + 1,

     $                    kbot )

                     iwork( 1+(win-1)*5 ) = lktop

                     lkbot = min( lkbot + lnwin - lchain,

     $                    iceil( lkbot, nb )*nb, kbot )

                     iwork( 2+(win-1)*5 ) = lkbot

                     lnwin = lkbot-lktop+1

                     IF( lnwin.EQ.lchain ) iwork(5+(win-1)*5) = 2

                  END IF

               END IF

 75            CONTINUE

 70         CONTINUE

 65      CONTINUE

*

*        If bulges were chasen off from first window, the window is

*        removed.

*

         IF( ichoff.GT.0 ) THEN

            DO 128 win = 2, anmwin

               iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )

               iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )

               iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )

               iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )

               iwork( 5+(win-2)*5 ) = iwork( 5+(win-1)*5 )

 128        CONTINUE

            anmwin = anmwin - 1

            ipiw = 6+(anmwin-1)*5

         END IF

*

*        If we have no more windows, return.

*

         IF( anmwin.LT.1 ) RETURN

*

      ELSE

*

*        Set up windows such that as many bulges as possible can be

*        moved over the border to the next block. Make sure that the

*        cross border window is at least (NTINY+1)-by-(NTINY+1), unless

*        we are chasing off the bulges from the last window. This is

*        accomplished by setting the bottom index LKBOT such that the

*        local window has the correct size.

*

*        If LKBOT then becomes larger than KBOT, the endpoint of the whole

*        global submatrix, or LKTOP from a window located already residing

*        at the other side of the border, this is taken care of by some

*        dirty tricks.

*

         DO 130 win = 1, anmwin

            lktop1 = iwork( 1+(win-1)*5 )

            lkbot = iwork( 2+(win-1)*5 )

            lnwin = max( 6, min( lkbot - lktop1 + 1, lchain ) )

            lkbot1 = max( min( kbot, iceil(lktop1,nb)*nb+lchain),

     $           min( kbot, min( lktop1+2*lnwin-1,

     $           (iceil(lktop1,nb)+1)*nb ) ) )

            iwork( 2+(win-1)*5 ) = lkbot1

 130     CONTINUE

         ichoff = 0

*

*        Keep a record over what windows that were moved over the borders

*        such that we can delay some windows due to lack of space on the

*        other side of the border; we do not want to leave any of the

*        bulges behind...

*

*        IWORK( 5+(WIN-1)*5 ) = 0: window WIN has not been processed

*        IWORK( 5+(WIN-1)*5 ) = 1: window WIN is being processed (need to

*                                  know for updates)

*        IWORK( 5+(WIN-1)*5 ) = 2: window WIN has been fully processed

*

*        So, start by marking all windows as not processed.

*

         DO 135 win = 1, anmwin

            iwork( 5+(win-1)*5 ) = 0

 135     CONTINUE

*

*        Do the cross border bulge-chase as follows: Start from the

*        first window (the one that is closest to be chased off the

*        diagonal of H) and take the odd windows first followed by the

*        even ones. To not get into hang-problems on processor meshes

*        with at least one odd dimension, the windows will in such a case

*        be processed in chunks of {the minimum odd process dimension}-1

*        windows to avoid overlapping processor scopes in forming the

*        cross border computational windows and the cross border update

*        regions.

*

         wchunk = max( 1, min( anmwin, nprow-1, npcol-1 ) )

         numchunk = iceil( anmwin, wchunk )

*

*        Based on the computed chunk of windows, start working with

*        crossborder bulge-chasing. Repeat this as long as there is

*        still work left to do (137 is a kind of do-while statement).

*

 137     CONTINUE

*

*        Zero out LENRBUF and LENCBUF each time we restart this loop.

*

         lenrbuf = 0

         lencbuf = 0

*

         DO 140 oddeven = 1, min( 2, anmwin )

         DO 150 chunknum = 1, numchunk

            ipnext = 1

            DO 160 win = oddeven+(chunknum-1)*wchunk,

     $           min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

*

*              Get position and size of the WIN:th active window and

*              make sure that we skip the cross border bulge for this

*              window if the window is not shared between several data

*              layout blocks (and processors).

*

*              Also, delay windows that do not have sufficient size of

*              the other side of the border. Moreover, make sure to skip

*              windows that was already processed in the last round of

*              the do-while loop (137).

*

               IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 165

               lktop = iwork( 1+(win-1)*5 )

               lkbot = iwork( 2+(win-1)*5 )

               IF( win.GT.1 ) THEN

                  lktop2 = iwork( 1+(win-2)*5 )

               ELSE

                  lktop2 = kbot+1

               END IF

               IF( iceil(lktop,nb).EQ.iceil(lkbot,nb) .OR.

     $              lkbot.GE.lktop2 ) GO TO 165

               lnwin = lkbot - lktop + 1

               IF( lnwin.LE.ntiny .AND. lkbot.NE.kbot .AND.

     $              .NOT. mod(lkbot,nb).EQ.0  ) GO TO 165

*

*              If window is going to be processed, mark it as processed.

*

               iwork( 5+(win-1)*5 ) = 1

*

*              Extract processors for current cross border window,

*              as below:

*

*                        1 | 2

*                        --+--

*                        3 | 4

*

               rsrc1 = iwork( 3+(win-1)*5 )

               csrc1 = iwork( 4+(win-1)*5 )

               rsrc2 = rsrc1

               csrc2 = mod( csrc1+1, npcol )

               rsrc3 = mod( rsrc1+1, nprow )

               csrc3 = csrc1

               rsrc4 = mod( rsrc1+1, nprow )

               csrc4 = mod( csrc1+1, npcol )

*

*              Form group of four processors for cross border window.

*

               IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $              ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.

     $              ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.

     $              ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

*

*                 Compute the upper and lower parts of the active

*                 window.

*

                  dim1 = nb - mod(lktop-1,nb)

                  dim4 = lnwin - dim1

*

*                 Temporarily compute a new value of the size of the

*                 computational window that is larger than or equal to

*                 NTINY+1; call the *real* value DIM.

*

                  dim = lnwin

                  lnwin = max(ntiny+1,lnwin)

*

*                 Divide workspace.

*

                  ipu = ipnext

                  iph = ipu + dim**2

                  ipuu = iph + lnwin**2

                  ipv = ipuu + lnwin**2

                  ipnext = iph

                  IF( dim.LT.lnwin ) THEN

                     CALL slaset( 'All', lnwin, lnwin, zero,

     $                    one, work( iph ), lnwin )

                  ELSE

                     CALL slaset( 'All', dim, dim, zero,

     $                    zero, work( iph ), lnwin )

                  END IF

*

*                 Form the active window.

*

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     iloc = indxg2l( lktop, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL slamov( 'All', dim1, dim1,

     $                    h((jloc-1)*lldh+iloc), lldh, work(iph),

     $                    lnwin )

                     IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN

*                       Proc#1 <==> Proc#4

                        CALL sgesd2d( ictxt, dim1, dim1,

     $                       work(iph), lnwin, rsrc4, csrc4 )

                        CALL sgerv2d( ictxt, dim4, dim4,

     $                       work(iph+dim1*lnwin+dim1),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     iloc = indxg2l( lktop+dim1, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL slamov( 'All', dim4, dim4,

     $                    h((jloc-1)*lldh+iloc), lldh,

     $                    work(iph+dim1*lnwin+dim1),

     $                    lnwin )

                     IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 ) THEN

*                       Proc#4 <==> Proc#1

                        CALL sgesd2d( ictxt, dim4, dim4,

     $                       work(iph+dim1*lnwin+dim1),

     $                       lnwin, rsrc1, csrc1 )

                        CALL sgerv2d( ictxt, dim1, dim1,

     $                       work(iph), lnwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     iloc = indxg2l( lktop, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL slamov( 'All', dim1, dim4,

     $                    h((jloc-1)*lldh+iloc), lldh,

     $                    work(iph+dim1*lnwin), lnwin )

                     IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 ) THEN

*                       Proc#2 ==> Proc#1

                        CALL sgesd2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN

*                       Proc#2 ==> Proc#4

                        CALL sgesd2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     iloc = indxg2l( lktop+dim1, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1-1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL slamov( 'All', 1, 1,

     $                    h((jloc-1)*lldh+iloc), lldh,

     $                    work(iph+(dim1-1)*lnwin+dim1),

     $                    lnwin )

                     IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN

*                       Proc#3 ==> Proc#1

                        CALL sgesd2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 ) THEN

*                       Proc#3 ==> Proc#4

                        CALL sgesd2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 ) THEN

*                       Proc#1 <== Proc#2

                        CALL sgerv2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc2, csrc2 )

                     END IF

                     IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN

*                       Proc#1 <== Proc#3

                        CALL sgerv2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc3, csrc3 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN

*                       Proc#4 <== Proc#2

                        CALL sgerv2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc2, csrc2 )

                     END IF

                     IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 ) THEN

*                       Proc#4 <== Proc#3

                        CALL sgerv2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc3, csrc3 )

                     END IF

                  END IF

*

*                 Prepare for call to SLAQR6 - it could happen that no

*                 bulges where introduced in the pre-cross border step

*                 since the chain was too long to fit in the top-left

*                 part of the cross border window. In such a case, the

*                 bulges are introduced here instead.  It could also

*                 happen that the bottom-right part is too small to hold

*                 the whole chain -- in such a case, the bulges are

*                 chasen off immediately, as well.

*

                  IF( (myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1) .OR.

     $                 (myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4) ) THEN

                     IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot .AND.

     $                    (dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN

                        job = 'All steps'

                        ichoff = 1

                     ELSEIF( lktop.EQ.ktop .AND.

     $                    ( dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN

                        job = 'Introduce and chase'

                     ELSEIF( lkbot.EQ.kbot ) THEN

                        job = 'Off-chase bulges'

                        ichoff = 1

                     ELSE

                        job = 'Chase bulges'

                     END IF

                     ku = lnwin - kdu + 1

                     kwh = kdu + 1

                     nho = ( lnwin-kdu+1-4 ) - ( kdu+1 ) + 1

                     kwv = kdu + 4

                     nve = lnwin - kdu - kwv + 1

                     CALL slaset( 'All', lnwin, lnwin,

     $                    zero, one, work(ipuu), lnwin )

*

*                    Small-bulge multi-shift QR sweep.

*

                     lks = max(1, ns - win*lns + 1)

                     CALL slaqr6( job, wantt, .true., lkacc22, lnwin,

     $                    1, dim, lns, sr( lks ), si( lks ),

     $                    work(iph), lnwin, 1, dim,

     $                    work(ipuu), lnwin, work(ipu), 3,

     $                    work( iph+ku-1 ), lnwin, nve,

     $                    work( iph+kwv-1 ), lnwin, nho,

     $                    work( iph-1+ku+(kwh-1)*lnwin ), lnwin )

*

*                    Copy local submatrices of H back to global matrix.

*

                     IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                        iloc = indxg2l( lktop, nb, myrow,

     $                       desch( rsrc_ ), nprow )

                        jloc = indxg2l( lktop, nb, mycol,

     $                       desch( csrc_ ), npcol )

                        CALL slamov( 'All', dim1, dim1, work(iph),

     $                       lnwin, h((jloc-1)*lldh+iloc),

     $                       lldh )

                     END IF

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        iloc = indxg2l( lktop+dim1, nb, myrow,

     $                       desch( rsrc_ ), nprow )

                        jloc = indxg2l( lktop+dim1, nb, mycol,

     $                       desch( csrc_ ), npcol )

                        CALL slamov( 'All', dim4, dim4,

     $                       work(iph+dim1*lnwin+dim1),

     $                       lnwin, h((jloc-1)*lldh+iloc), lldh )

                     END IF

*

*                    Copy actual submatrix of U to the correct place of

*                    the buffer.

*

                     CALL slamov( 'All', dim, dim,

     $                    work(ipuu), lnwin, work(ipu), dim )

                  END IF

*

*                 Return data to process 2 and 3.

*

                  rws3 = min(3,dim4)

                  cls3 = min(3,dim1)

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN

*                       Proc#1 ==> Proc#3

                        CALL sgesd2d( ictxt, rws3, cls3,

     $                       work( iph+(dim1-cls3)*lnwin+dim1 ),

     $                       lnwin, rsrc3, csrc3 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN

*                       Proc#4 ==> Proc#2

                        CALL sgesd2d( ictxt, dim1, dim4,

     $                       work( iph+dim1*lnwin),

     $                       lnwin, rsrc2, csrc2 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     iloc = indxg2l( lktop, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN

*                       Proc#2 <== Proc#4

                        CALL sgerv2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                     CALL slamov( 'All', dim1, dim4,

     $                    work( iph+dim1*lnwin ), lnwin,

     $                    h((jloc-1)*lldh+iloc), lldh )

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     iloc = indxg2l( lktop+dim1, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1-cls3, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN

*                       Proc#3 <== Proc#1

                        CALL sgerv2d( ictxt, rws3, cls3,

     $                       work( iph+(dim1-cls3)*lnwin+dim1 ),

     $                       lnwin, rsrc1, csrc1 )

                     END IF

                     CALL slamov( 'Upper', rws3, cls3,

     $                    work( iph+(dim1-cls3)*lnwin+dim1 ),

     $                    lnwin, h((jloc-1)*lldh+iloc),

     $                    lldh )

                     IF( rws3.GT.1 .AND. cls3.GT.1 ) THEN

                        elem = work( iph+(dim1-cls3)*lnwin+dim1+1 )

                        IF( elem.NE.zero ) THEN

                           CALL slamov( 'Lower', rws3-1, cls3-1,

     $                          work( iph+(dim1-cls3)*lnwin+dim1+1 ),

     $                          lnwin, h((jloc-1)*lldh+iloc+1), lldh )

                        END IF

                     END IF

                  END IF

*

*                 Restore correct value of LNWIN.

*

                  lnwin = dim

*

               END IF

*

*              Increment counter for buffers of orthogonal

*              transformations.

*

               IF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 .OR.

     $              myrow.EQ.rsrc4 .OR. mycol.EQ.csrc4 ) THEN

                  IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 )

     $               lenrbuf = lenrbuf + lnwin*lnwin

                  IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )

     $               lencbuf = lencbuf + lnwin*lnwin

               END IF

*

*              If no cross border bulge chasing was performed for the

*              current WIN:th window, the processor jump to this point

*              and consider the next one.

*

 165           CONTINUE

*

 160        CONTINUE

*

*           Broadcast orthogonal transformations -- this will only happen

*           if the buffer associated with the orthogonal transformations

*           is not empty (controlled by LENRBUF, for row-wise

*           broadcasts, and LENCBUF, for column-wise broadcasts).

*

            DO 170 dir = 1, 2

               bcdone = .false.

               DO 180 win = oddeven+(chunknum-1)*wchunk,

     $              min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

                  IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.

     $                 bcdone ) GO TO 185

                  rsrc1 = iwork( 3+(win-1)*5 )

                  csrc1 = iwork( 4+(win-1)*5 )

                  rsrc4 = mod( rsrc1+1, nprow )

                  csrc4 = mod( csrc1+1, npcol )

                  IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $                 ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

                     IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                    npcol.GT.1 .AND. nprocs.GT.2 ) THEN

                        IF( myrow.EQ.rsrc1 .OR. ( myrow.EQ.rsrc4

     $                       .AND. rsrc4.NE.rsrc1 ) ) THEN

                           CALL sgebs2d( ictxt, 'Row', '1-Tree',

     $                          lenrbuf, 1, work, lenrbuf )

                        ELSE

                           CALL sgebr2d( ictxt, 'Row', '1-Tree',

     $                          lenrbuf, 1, work, lenrbuf, rsrc1,

     $                          csrc1 )

                        END IF

                     ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                       nprow.GT.1 .AND. nprocs.GT.2 ) THEN

                        IF( mycol.EQ.csrc1 .OR. ( mycol.EQ.csrc4

     $                       .AND. csrc4.NE.csrc1 ) ) THEN

                           CALL sgebs2d( ictxt, 'Col', '1-Tree',

     $                          lencbuf, 1, work, lencbuf )

                        ELSE

                           CALL sgebr2d( ictxt, 'Col', '1-Tree',

     $                          lencbuf, 1, work(1+lenrbuf), lencbuf,

     $                          rsrc1, csrc1 )

                        END IF

                     END IF

                     IF( lenrbuf.GT.0 .AND. ( mycol.EQ.csrc1 .OR.

     $                    ( mycol.EQ.csrc4 .AND. csrc4.NE.csrc1 ) ) )

     $                  CALL slamov( 'All', lenrbuf, 1, work, lenrbuf,

     $                       work(1+lenrbuf), lencbuf )

                     bcdone = .true.

                  ELSEIF( myrow.EQ.rsrc1 .AND. dir.EQ.1 ) THEN

                     IF( lenrbuf.GT.0 .AND. npcol.GT.1 )

     $                  CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf, rsrc1, csrc1 )

                     bcdone = .true.

                  ELSEIF( mycol.EQ.csrc1 .AND. dir.EQ.2 ) THEN

                     IF( lencbuf.GT.0 .AND. nprow.GT.1 )

     $                  CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work(1+lenrbuf), lencbuf, rsrc1, csrc1 )

                     bcdone = .true.

                  ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 ) THEN

                     IF( lenrbuf.GT.0 .AND. npcol.GT.1 )

     $                  CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf, rsrc4, csrc4 )

                     bcdone = .true.

                  ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 ) THEN

                     IF( lencbuf.GT.0 .AND. nprow.GT.1 )

     $                  CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work(1+lenrbuf), lencbuf, rsrc4, csrc4 )

                     bcdone = .true.

                  END IF

 185              CONTINUE

 180           CONTINUE

 170        CONTINUE

*

*           Prepare for computing cross border updates by exchanging

*           data in cross border update regions in H and Z.

*

            DO 190 dir = 1, 2

               winid = 0

               ipw3 = 1

               DO 200 win = oddeven+(chunknum-1)*wchunk,

     $              min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

                  IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 205

*

*                 Make sure this part of the code is only executed when

*                 there has been some work performed on the WIN:th

*                 window.

*

                  lktop = iwork( 1+(win-1)*5 )

                  lkbot = iwork( 2+(win-1)*5 )

*

*                 Extract processor indices associated with

*                 the current window.

*

                  rsrc1 = iwork( 3+(win-1)*5 )

                  csrc1 = iwork( 4+(win-1)*5 )

                  rsrc4 = mod( rsrc1+1, nprow )

                  csrc4 = mod( csrc1+1, npcol )

*

*                 Compute local number of rows and columns

*                 of H and Z to exchange.

*

                  IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)

     $                 .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.

     $                 dir.EQ.1)) THEN

                     winid = winid + 1

                     lnwin = lkbot - lktop + 1

                     ipu = ipnext

                     dim1 = nb - mod(lktop-1,nb)

                     dim4 = lnwin - dim1

                     ipnext = ipu + lnwin*lnwin

                     IF( dir.EQ.2 ) THEN

                        IF( wantz ) THEN

                           zrows = numroc( n, nb, myrow, descz( rsrc_ ),

     $                          nprow )

                        ELSE

                           zrows = 0

                        END IF

                        IF( wantt ) THEN

                           hrows = numroc( lktop-1, nb, myrow,

     $                          desch( rsrc_ ), nprow )

                        ELSE

                           hrows = 0

                        END IF

                     ELSE

                        zrows = 0

                        hrows = 0

                     END IF

                     IF( dir.EQ.1 ) THEN

                        IF( wantt ) THEN

                           hcols = numroc( n - (lktop+dim1-1), nb,

     $                          mycol, csrc4, npcol )

                           IF( mycol.EQ.csrc4 ) hcols = hcols - dim4

                        ELSE

                           hcols = 0

                        END IF

                     ELSE

                        hcols = 0

                     END IF

                     ipw = max( 1 + lenrbuf + lencbuf, ipw3 )

                     ipw1 = ipw + hrows * lnwin

                     IF( wantz ) THEN

                        ipw2 = ipw1 + lnwin * hcols

                        ipw3 = ipw2 + zrows * lnwin

                     ELSE

                        ipw3 = ipw1 + lnwin * hcols

                     END IF

                  END IF

*

*                 Let each process row and column involved in the updates

*                 exchange data in H and Z with their neighbours.

*

                  IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN

                     IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN

                        DO 210 indx = 1, nprow

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, lktop, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc1, rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', hrows, dim1,

     $                                h((jloc1-1)*lldh+iloc), lldh,

     $                                work(ipw), hrows )

                                 IF( npcol.GT.1 ) THEN

                                    east = mod( mycol + 1, npcol )

                                    CALL sgesd2d( ictxt, hrows, dim1,

     $                                   work(ipw), hrows, rsrc, east )

                                    CALL sgerv2d( ictxt, hrows, dim4,

     $                                   work(ipw+hrows*dim1), hrows,

     $                                   rsrc, east )

                                 END IF

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, lktop+dim1,

     $                             desch, nprow, npcol, myrow, mycol,

     $                             iloc, jloc4, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', hrows, dim4,

     $                                h((jloc4-1)*lldh+iloc), lldh,

     $                                work(ipw+hrows*dim1), hrows )

                                 IF( npcol.GT.1 ) THEN

                                    west = mod( mycol - 1 + npcol,

     $                                   npcol )

                                    CALL sgesd2d( ictxt, hrows, dim4,

     $                                   work(ipw+hrows*dim1), hrows,

     $                                   rsrc, west )

                                    CALL sgerv2d( ictxt, hrows, dim1,

     $                                   work(ipw), hrows, rsrc, west )

                                 END IF

                              END IF

                           END IF

 210                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN

                     IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 ) THEN

                        DO 220 indx = 1, npcol

                           IF( myrow.EQ.rsrc1 ) THEN

                              IF( indx.EQ.1 ) THEN

                                 IF( lkbot.LT.n ) THEN

                                    CALL infog2l( lktop, lkbot+1, desch,

     $                                   nprow, npcol, myrow, mycol,

     $                                   iloc1, jloc, rsrc1, csrc )

                                 ELSE

                                    csrc = -1

                                 END IF

                              ELSEIF( mod(lkbot,nb).NE.0 ) THEN

                                 CALL infog2l( lktop,

     $                                (iceil(lkbot,nb)+(indx-2))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc1, jloc, rsrc1, csrc )

                              ELSE

                                 CALL infog2l( lktop,

     $                                (iceil(lkbot,nb)+(indx-1))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc1, jloc, rsrc1, csrc )

                              END IF

                              IF( mycol.EQ.csrc ) THEN

                                 CALL slamov( 'All', dim1, hcols,

     $                                h((jloc-1)*lldh+iloc1), lldh,

     $                                work(ipw1), lnwin )

                                 IF( nprow.GT.1 ) THEN

                                    south = mod( myrow + 1, nprow )

                                    CALL sgesd2d( ictxt, dim1, hcols,

     $                                   work(ipw1), lnwin, south,

     $                                   csrc )

                                    CALL sgerv2d( ictxt, dim4, hcols,

     $                                   work(ipw1+dim1), lnwin, south,

     $                                   csrc )

                                 END IF

                              END IF

                           END IF

                           IF( myrow.EQ.rsrc4 ) THEN

                              IF( indx.EQ.1 ) THEN

                                 IF( lkbot.LT.n ) THEN

                                    CALL infog2l( lktop+dim1, lkbot+1,

     $                                   desch, nprow, npcol, myrow,

     $                                   mycol, iloc4, jloc, rsrc4,

     $                                   csrc )

                                 ELSE

                                    csrc = -1

                                 END IF

                              ELSEIF( mod(lkbot,nb).NE.0 ) THEN

                                 CALL infog2l( lktop+dim1,

     $                                (iceil(lkbot,nb)+(indx-2))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc4, jloc, rsrc4, csrc )

                              ELSE

                                 CALL infog2l( lktop+dim1,

     $                                (iceil(lkbot,nb)+(indx-1))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc4, jloc, rsrc4, csrc )

                              END IF

                              IF( mycol.EQ.csrc ) THEN

                                 CALL slamov( 'All', dim4, hcols,

     $                                h((jloc-1)*lldh+iloc4), lldh,

     $                                work(ipw1+dim1), lnwin )

                                 IF( nprow.GT.1 ) THEN

                                    north = mod( myrow - 1 + nprow,

     $                                   nprow )

                                    CALL sgesd2d( ictxt, dim4, hcols,

     $                                   work(ipw1+dim1), lnwin, north,

     $                                   csrc )

                                    CALL sgerv2d( ictxt, dim1, hcols,

     $                                   work(ipw1), lnwin, north,

     $                                   csrc )

                                 END IF

                              END IF

                           END IF

 220                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0) THEN

                     IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN

                        DO 230 indx = 1, nprow

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, lktop,

     $                             descz, nprow, npcol, myrow, mycol,

     $                             iloc, jloc1, rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', zrows, dim1,

     $                                z((jloc1-1)*lldz+iloc), lldz,

     $                                work(ipw2), zrows )

                                 IF( npcol.GT.1 ) THEN

                                    east = mod( mycol + 1, npcol )

                                    CALL sgesd2d( ictxt, zrows, dim1,

     $                                   work(ipw2), zrows, rsrc,

     $                                   east )

                                    CALL sgerv2d( ictxt, zrows, dim4,

     $                                   work(ipw2+zrows*dim1),

     $                                   zrows, rsrc, east )

                                 END IF

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( 1+(indx-1)*nb,

     $                             lktop+dim1, descz, nprow, npcol,

     $                             myrow, mycol, iloc, jloc4, rsrc,

     $                             csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', zrows, dim4,

     $                                z((jloc4-1)*lldz+iloc), lldz,

     $                                work(ipw2+zrows*dim1), zrows )

                                 IF( npcol.GT.1 ) THEN

                                    west = mod( mycol - 1 + npcol,

     $                                   npcol )

                                    CALL sgesd2d( ictxt, zrows, dim4,

     $                                   work(ipw2+zrows*dim1),

     $                                   zrows, rsrc, west )

                                    CALL sgerv2d( ictxt, zrows, dim1,

     $                                   work(ipw2), zrows, rsrc,

     $                                   west )

                                 END IF

                              END IF

                           END IF

 230                    CONTINUE

                     END IF

                  END IF

*

*                 If no exchanges was performed for the current window,

*                 all processors jump to this point and try the next

*                 one.

*

 205              CONTINUE

*

 200           CONTINUE

*

*              Compute crossborder bulge-chase updates.

*

               winid = 0

               IF( dir.EQ.1 ) THEN

                  ipnext = 1

               ELSE

                  ipnext = 1 + lenrbuf

               END IF

               ipw3 = 1

               DO 240 win = oddeven+(chunknum-1)*wchunk,

     $              min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

                  IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 245

*

*                 Only perform this part of the code if there was really

*                 some work performed on the WIN:th window.

*

                  lktop = iwork( 1+(win-1)*5 )

                  lkbot = iwork( 2+(win-1)*5 )

                  lnwin = lkbot - lktop + 1

*

*                 Extract the processor indices associated with

*                 the current window.

*

                  rsrc1 = iwork( 3+(win-1)*5 )

                  csrc1 = iwork( 4+(win-1)*5 )

                  rsrc4 = mod( rsrc1+1, nprow )

                  csrc4 = mod( csrc1+1, npcol )

*

                  IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)

     $                 .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.

     $                 dir.EQ.1)) THEN

*

*                    Set up workspaces.

*

                     winid = winid + 1

                     lktop = iwork( 1+(win-1)*5 )

                     lkbot = iwork( 2+(win-1)*5 )

                     lnwin = lkbot - lktop + 1

                     dim1 = nb - mod(lktop-1,nb)

                     dim4 = lnwin - dim1

                     ipu = ipnext + (winid-1)*lnwin*lnwin

                     IF( dir.EQ.2 ) THEN

                        IF( wantz ) THEN

                           zrows = numroc( n, nb, myrow, descz( rsrc_ ),

     $                          nprow )

                        ELSE

                           zrows = 0

                        END IF

                        IF( wantt ) THEN

                           hrows = numroc( lktop-1, nb, myrow,

     $                          desch( rsrc_ ), nprow )

                        ELSE

                           hrows = 0

                        END IF

                     ELSE

                        zrows = 0

                        hrows = 0

                     END IF

                     IF( dir.EQ.1 ) THEN

                        IF( wantt ) THEN

                           hcols = numroc( n - (lktop+dim1-1), nb,

     $                          mycol, csrc4, npcol )

                           IF( mycol.EQ.csrc4 ) hcols = hcols - dim4

                        ELSE

                           hcols = 0

                        END IF

                     ELSE

                        hcols = 0

                     END IF

*

*                    IPW  = local copy of overlapping column block of H

*                    IPW1 = local copy of overlapping row block of H

*                    IPW2 = local copy of overlapping column block of Z

*                    IPW3 = workspace for right hand side of matrix

*                           multiplication

*

                     ipw = max( 1 + lenrbuf + lencbuf, ipw3 )

                     ipw1 = ipw + hrows * lnwin

                     IF( wantz ) THEN

                        ipw2 = ipw1 + lnwin * hcols

                        ipw3 = ipw2 + zrows * lnwin

                     ELSE

                        ipw3 = ipw1 + lnwin * hcols

                     END IF

*

*                    Recompute job to see if special structure of U

*                    could possibly be exploited.

*

                     IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN

                        job = 'All steps'

                     ELSEIF( lktop.EQ.ktop .AND.

     $                    ( dim1.LT.lchain+1 .OR. dim1.LE.ntiny ) )

     $                    THEN

                        job = 'Introduce and chase'

                     ELSEIF( lkbot.EQ.kbot ) THEN

                        job = 'Off-chase bulges'

                     ELSE

                        job = 'Chase bulges'

                     END IF

                  END IF

*

*                 Test if to exploit sparsity structure of

*                 orthogonal matrix U.

*

                  ks = dim1+dim4-lns/2*3

                  IF( .NOT. blk22 .OR. dim1.NE.ks .OR.

     $                 dim4.NE.ks .OR. lsame(job,'I') .OR.

     $                 lsame(job,'O') .OR. lns.LE.2 ) THEN

*

*                    Update the columns of H and Z.

*

                     IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN

                        DO 250 indx = 1, min(lktop-1,1+(nprow-1)*nb), nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, lktop, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', hrows, dim1,

     $                                lnwin, one, work( ipw ), hrows,

     $                                work( ipu ), lnwin, zero,

     $                                work(ipw3), hrows )

                                 CALL slamov( 'All', hrows, dim1,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, lktop+dim1, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', hrows, dim4,

     $                                lnwin, one, work( ipw ), hrows,

     $                                work( ipu+lnwin*dim1 ), lnwin,

     $                                zero, work(ipw3), hrows )

                                 CALL slamov( 'All', hrows, dim4,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

 250                    CONTINUE

                     END IF

*

                     IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN

                        DO 260 indx = 1, min(n,1+(nprow-1)*nb), nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, lktop, descz, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', zrows, dim1,

     $                                lnwin, one, work( ipw2 ),

     $                                zrows, work( ipu ), lnwin,

     $                                zero, work(ipw3), zrows )

                                 CALL slamov( 'All', zrows, dim1,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, lktop+dim1, descz,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', zrows, dim4,

     $                                lnwin, one, work( ipw2 ),

     $                                zrows,

     $                                work( ipu+lnwin*dim1 ), lnwin,

     $                                zero, work(ipw3), zrows )

                                 CALL slamov( 'All', zrows, dim4,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

 260                    CONTINUE

                     END IF

*

*                    Update the rows of H.

*

                     IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN

                        IF( lkbot.LT.n ) THEN

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4 .AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop, indx, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc4 )

                              CALL sgemm( 'Transpose', 'No Transpose',

     $                             dim1, hcols, lnwin, one, work(ipu),

     $                             lnwin, work( ipw1 ), lnwin, zero,

     $                             work(ipw3), dim1 )

                              CALL slamov( 'All', dim1, hcols,

     $                             work(ipw3), dim1,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

                           IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4 .AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop+dim1, indx, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc4, csrc4 )

                              CALL sgemm( 'Transpose', 'No Transpose',

     $                             dim4, hcols, lnwin, one,

     $                             work( ipu+dim1*lnwin ), lnwin,

     $                             work( ipw1), lnwin, zero,

     $                             work(ipw3), dim4 )

                              CALL slamov( 'All', dim4, hcols,

     $                             work(ipw3), dim4,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

                           indxs = iceil(lkbot,nb)*nb + 1

                           IF( mod(lkbot,nb).NE.0 ) THEN

                              indxe = min(n,indxs+(npcol-2)*nb)

                           ELSE

                              indxe = min(n,indxs+(npcol-1)*nb)

                           END IF

                           DO 270 indx = indxs, indxe, nb

                              IF( myrow.EQ.rsrc1 ) THEN

                                 CALL infog2l( lktop, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc1, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL sgemm( 'Transpose',

     $                                   'No Transpose', dim1, hcols,

     $                                   lnwin, one, work( ipu ), lnwin,

     $                                   work( ipw1 ), lnwin, zero,

     $                                   work(ipw3), dim1 )

                                    CALL slamov( 'All', dim1, hcols,

     $                                   work(ipw3), dim1,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

                              IF( myrow.EQ.rsrc4 ) THEN

                                 CALL infog2l( lktop+dim1, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc4, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL sgemm( 'Transpose',

     $                                   'No Transpose', dim4, hcols,

     $                                   lnwin, one,

     $                                   work( ipu+lnwin*dim1 ), lnwin,

     $                                   work( ipw1 ), lnwin,

     $                                   zero, work(ipw3), dim4 )

                                    CALL slamov( 'All', dim4, hcols,

     $                                   work(ipw3), dim4,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

 270                       CONTINUE

                        END IF

                     END IF

                  ELSE

*

*                    Update the columns of H and Z.

*

*                    Compute H2*U21 + H1*U11 on the left side of the border.

*

                     IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN

                        indxe = min(lktop-1,1+(nprow-1)*nb)

                        DO 280 indx = 1, indxe, nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, lktop, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', hrows, ks,

     $                                work( ipw+hrows*dim4), hrows,

     $                                work(ipw3), hrows )

                                 CALL strmm( 'Right', 'Upper',

     $                                'No transpose',

     $                                'Non-unit', hrows, ks, one,

     $                                work( ipu+dim4 ), lnwin,

     $                                work(ipw3), hrows )

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', hrows, ks, dim4,

     $                                one, work( ipw ), hrows,

     $                                work( ipu ), lnwin, one,

     $                                work(ipw3), hrows )

                                 CALL slamov( 'All', hrows, ks,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

*

*                          Compute H1*U12 + H2*U22 on the right side of

*                          the border.

*

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, lktop+dim1, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', hrows, dim4,

     $                                work(ipw), hrows, work( ipw3 ),

     $                                hrows )

                                 CALL strmm( 'Right', 'Lower',

     $                                'No transpose',

     $                                'Non-unit', hrows, dim4, one,

     $                                work( ipu+lnwin*ks ), lnwin,

     $                                work( ipw3 ), hrows )

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', hrows, dim4, ks,

     $                                one, work( ipw+hrows*dim4),

     $                                hrows,

     $                                work( ipu+lnwin*ks+dim4 ), lnwin,

     $                                one, work( ipw3 ), hrows )

                                 CALL slamov( 'All', hrows, dim4,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

 280                    CONTINUE

                     END IF

*

                     IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN

*

*                       Compute Z2*U21 + Z1*U11 on the left side

*                       of border.

*

                        indxe = min(n,1+(nprow-1)*nb)

                        DO 290 indx = 1, indxe, nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, i, descz, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', zrows, ks,

     $                                work( ipw2+zrows*dim4),

     $                                zrows, work(ipw3), zrows )

                                 CALL strmm( 'Right', 'Upper',

     $                                'No transpose',

     $                                'Non-unit', zrows, ks, one,

     $                                work( ipu+dim4 ), lnwin,

     $                                work(ipw3), zrows )

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', zrows, ks,

     $                                dim4, one, work( ipw2 ),

     $                                zrows, work( ipu ), lnwin,

     $                                one, work(ipw3), zrows )

                                 CALL slamov( 'All', zrows, ks,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

*

*                          Compute Z1*U12 + Z2*U22 on the right side

*                          of border.

*

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, i+dim1, descz,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', zrows, dim4,

     $                                work(ipw2), zrows,

     $                                work( ipw3 ), zrows )

                                 CALL strmm( 'Right', 'Lower',

     $                                'No transpose',

     $                                'Non-unit', zrows, dim4,

     $                                one, work( ipu+lnwin*ks ),

     $                                lnwin, work( ipw3 ), zrows )

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', zrows, dim4,

     $                                ks, one,

     $                                work( ipw2+zrows*(dim4)),

     $                                zrows,

     $                                work( ipu+lnwin*ks+dim4 ),

     $                                lnwin, one, work( ipw3 ),

     $                                zrows )

                                 CALL slamov( 'All', zrows, dim4,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

 290                    CONTINUE

                     END IF

*

                     IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0) THEN

                        IF ( lkbot.LT.n ) THEN

*

*                          Compute U21**T*H2 + U11**T*H1 on the upper

*                          side of the border.

*

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4.AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop, indx, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc4 )

                              CALL slamov( 'All', ks, hcols,

     $                             work( ipw1+dim4 ), lnwin,

     $                             work(ipw3), ks )

                              CALL strmm( 'Left', 'Upper', 'Transpose',

     $                             'Non-unit', ks, hcols, one,

     $                             work( ipu+dim4 ), lnwin,

     $                             work(ipw3), ks )

                              CALL sgemm( 'Transpose', 'No transpose',

     $                             ks, hcols, dim4, one, work(ipu),

     $                             lnwin, work(ipw1), lnwin,

     $                             one, work(ipw3), ks )

                              CALL slamov( 'All', ks, hcols,

     $                             work(ipw3), ks,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

*

*                          Compute U12**T*H1 + U22**T*H2 one the lower

*                          side of the border.

*

                           IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4.AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop+dim1, indx, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc4, csrc4 )

                              CALL slamov( 'All', dim4, hcols,

     $                             work( ipw1 ), lnwin,

     $                             work( ipw3 ), dim4 )

                              CALL strmm( 'Left', 'Lower', 'Transpose',

     $                             'Non-unit', dim4, hcols, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw3 ), dim4 )

                              CALL sgemm( 'Transpose', 'No Transpose',

     $                             dim4, hcols, ks, one,

     $                             work( ipu+lnwin*ks+dim4 ), lnwin,

     $                             work( ipw1+dim1 ), lnwin,

     $                             one, work( ipw3), dim4 )

                              CALL slamov( 'All', dim4, hcols,

     $                             work(ipw3), dim4,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

*

*                          Compute U21**T*H2 + U11**T*H1 on upper side

*                          on border.

*

                           indxs = iceil(lkbot,nb)*nb+1

                           IF( mod(lkbot,nb).NE.0 ) THEN

                              indxe = min(n,indxs+(npcol-2)*nb)

                           ELSE

                              indxe = min(n,indxs+(npcol-1)*nb)

                           END IF

                           DO 300 indx = indxs, indxe, nb

                              IF( myrow.EQ.rsrc1 ) THEN

                                 CALL infog2l( lktop, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc1, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL slamov( 'All', ks, hcols,

     $                                   work( ipw1+dim4 ), lnwin,

     $                                   work(ipw3), ks )

                                    CALL strmm( 'Left', 'Upper',

     $                                   'Transpose', 'Non-unit',

     $                                   ks, hcols, one,

     $                                   work( ipu+dim4 ), lnwin,

     $                                   work(ipw3), ks )

                                    CALL sgemm( 'Transpose',

     $                                   'No transpose', ks, hcols,

     $                                   dim4, one, work(ipu), lnwin,

     $                                   work(ipw1), lnwin, one,

     $                                   work(ipw3), ks )

                                    CALL slamov( 'All', ks, hcols,

     $                                   work(ipw3), ks,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

*

*                             Compute U12**T*H1 + U22**T*H2 on lower

*                             side of border.

*

                              IF( myrow.EQ.rsrc4 ) THEN

                                 CALL infog2l( lktop+dim1, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc4, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL slamov( 'All', dim4, hcols,

     $                                   work( ipw1 ), lnwin,

     $                                   work( ipw3 ), dim4 )

                                    CALL strmm( 'Left', 'Lower',

     $                                   'Transpose','Non-unit',

     $                                   dim4, hcols, one,

     $                                   work( ipu+lnwin*ks ), lnwin,

     $                                   work( ipw3 ), dim4 )

                                    CALL sgemm( 'Transpose',

     $                                   'No Transpose', dim4, hcols,

     $                                   ks, one,

     $                                   work( ipu+lnwin*ks+dim4 ),

     $                                   lnwin, work( ipw1+dim1 ),

     $                                   lnwin, one, work( ipw3),

     $                                   dim4 )

                                    CALL slamov( 'All', dim4, hcols,

     $                                   work(ipw3), dim4,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

 300                       CONTINUE

                        END IF

                     END IF

                  END IF

*

*                 Update window information - mark processed windows are

*                 completed.

*

                  IF( dir.EQ.2 ) THEN

                     IF( lkbot.EQ.kbot ) THEN

                        lktop = kbot+1

                        lkbot = kbot+1

                        iwork( 1+(win-1)*5 ) = lktop

                        iwork( 2+(win-1)*5 ) = lkbot

                     ELSE

                        lktop = min( lktop + lnwin - lchain,

     $                       min( kbot, iceil( lkbot, nb )*nb ) -

     $                       lchain + 1 )

                        iwork( 1+(win-1)*5 ) = lktop

                        lkbot = min( max( lkbot + lnwin - lchain,

     $                       lktop + nwin - 1), min( kbot,

     $                       iceil( lkbot, nb )*nb ) )

                        iwork( 2+(win-1)*5 ) = lkbot

                     END IF

                     IF( iwork( 5+(win-1)*5 ).EQ.1 )

     $                    iwork( 5+(win-1)*5 ) = 2

                     iwork( 3+(win-1)*5 ) = rsrc4

                     iwork( 4+(win-1)*5 ) = csrc4

                  END IF

*

*                 If nothing was done for the WIN:th window, all

*                 processors come here and consider the next one

*                 instead.

*

 245              CONTINUE

 240           CONTINUE

 190        CONTINUE

 150     CONTINUE

 140     CONTINUE

*

*        Chased off bulges from first window?

*

         IF( nprocs.GT.1 )

     $      CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1,

     $           -1, -1, -1, -1, -1 )

*

*        If the bulge was chasen off from first window it is removed.

*

         IF( ichoff.GT.0 ) THEN

            DO 198 win = 2, anmwin

               iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )

               iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )

               iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )

               iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )

 198        CONTINUE

            anmwin = anmwin - 1

            ipiw = 6+(anmwin-1)*5

         END IF

*

*        If we have no more windows, return.

*

         IF( anmwin.LT.1 ) RETURN

*

*        Check for any more windows to bring over the border.

*

         winfin = 0

         DO 199 win = 1, anmwin

            winfin = winfin+iwork( 5+(win-1)*5 )

 199     CONTINUE

         IF( winfin.LT.2*anmwin ) GO TO 137

*

*        Zero out process mark for each window - this is legal now when

*        the process starts over with local bulge-chasing etc.

*

         DO 201 win = 1, anmwin

            iwork( 5+(win-1)*5 ) = 0

 201     CONTINUE

*

      END IF

*

*     Go back to local bulge-chase and see if there is more work to do.

*

      GO TO 20

*

*     End of PSLAQR5

*

      END