ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pstrord.f
Go to the documentation of this file.
00001       SUBROUTINE PSTRORD( COMPQ, SELECT, PARA, N, T, IT, JT,
00002      $     DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK,
00003      $     IWORK, LIWORK, INFO )
00004 *
00005 *     Contribution from the Department of Computing Science and HPC2N,
00006 *     Umea University, Sweden
00007 *
00008 *  -- ScaLAPACK computational routine (version 2.0.2) --
00009 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
00010 *     May 1 2012
00011 *
00012       IMPLICIT NONE
00013 *
00014 *     .. Scalar Arguments ..
00015       CHARACTER          COMPQ
00016       INTEGER            INFO, LIWORK, LWORK, M, N,
00017      $                   IT, JT, IQ, JQ
00018 *     ..
00019 *     .. Array Arguments ..
00020       INTEGER            SELECT( * )
00021       INTEGER            PARA( 6 ), DESCT( * ), DESCQ( * ), IWORK( * )
00022       REAL               Q( * ), T( * ), WI( * ), WORK( * ), WR( * )
00023 *     ..
00024 *
00025 *  Purpose
00026 *  =======
00027 *
00028 *  PSTRORD reorders the real Schur factorization of a real matrix
00029 *  A = Q*T*Q**T, so that a selected cluster of eigenvalues appears
00030 *  in the leading diagonal blocks of the upper quasi-triangular matrix
00031 *  T, and the leading columns of Q form an orthonormal basis of the
00032 *  corresponding right invariant subspace.
00033 *
00034 *  T must be in Schur form (as returned by PSLAHQR), that is, block
00035 *  upper triangular with 1-by-1 and 2-by-2 diagonal blocks.
00036 *
00037 *  This subroutine uses a delay and accumulate procedure for performing
00038 *  the off-diagonal updates (see references for details).
00039 *
00040 *  Notes
00041 *  =====
00042 *
00043 *  Each global data object is described by an associated description
00044 *  vector.  This vector stores the information required to establish
00045 *  the mapping between an object element and its corresponding process
00046 *  and memory location.
00047 *
00048 *  Let A be a generic term for any 2D block cyclicly distributed array.
00049 *  Such a global array has an associated description vector DESCA.
00050 *  In the following comments, the character _ should be read as
00051 *  "of the global array".
00052 *
00053 *  NOTATION        STORED IN      EXPLANATION
00054 *  --------------- -------------- --------------------------------------
00055 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00056 *                                 DTYPE_A = 1.
00057 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00058 *                                 the BLACS process grid A is distribu-
00059 *                                 ted over. The context itself is glo-
00060 *                                 bal, but the handle (the integer
00061 *                                 value) may vary.
00062 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00063 *                                 array A.
00064 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00065 *                                 array A.
00066 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00067 *                                 the rows of the array.
00068 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00069 *                                 the columns of the array.
00070 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00071 *                                 row of the array A is distributed.
00072 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00073 *                                 first column of the array A is
00074 *                                 distributed.
00075 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00076 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00077 *
00078 *  Let K be the number of rows or columns of a distributed matrix,
00079 *  and assume that its process grid has dimension p x q.
00080 *  LOCr( K ) denotes the number of elements of K that a process
00081 *  would receive if K were distributed over the p processes of its
00082 *  process column.
00083 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00084 *  process would receive if K were distributed over the q processes of
00085 *  its process row.
00086 *  The values of LOCr() and LOCc() may be determined via a call to the
00087 *  ScaLAPACK tool function, NUMROC:
00088 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00089 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00090 *  An upper bound for these quantities may be computed by:
00091 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00092 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00093 *
00094 *  Arguments
00095 *  =========
00096 *
00097 *
00098 *  COMPQ   (global input) CHARACTER*1
00099 *          = 'V': update the matrix Q of Schur vectors;
00100 *          = 'N': do not update Q.
00101 *
00102 *  SELECT  (global input/output) INTEGER array, dimension (N)
00103 *          SELECT specifies the eigenvalues in the selected cluster. To
00104 *          select a real eigenvalue w(j), SELECT(j) must be set to 1.
00105 *          To select a complex conjugate pair of eigenvalues
00106 *          w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,
00107 *          either SELECT(j) or SELECT(j+1) or both must be set to 1;
00108 *          a complex conjugate pair of eigenvalues must be
00109 *          either both included in the cluster or both excluded.
00110 *          On output, the (partial) reordering is displayed.
00111 *
00112 *  PARA    (global input) INTEGER*6
00113 *          Block parameters (some should be replaced by calls to
00114 *          PILAENV and others by meaningful default values):
00115 *          PARA(1) = maximum number of concurrent computational windows
00116 *                    allowed in the algorithm;
00117 *                    0 < PARA(1) <= min(NPROW,NPCOL) must hold;
00118 *          PARA(2) = number of eigenvalues in each window;
00119 *                    0 < PARA(2) < PARA(3) must hold;
00120 *          PARA(3) = window size; PARA(2) < PARA(3) < DESCT(MB_)
00121 *                    must hold;
00122 *          PARA(4) = minimal percentage of flops required for
00123 *                    performing matrix-matrix multiplications instead
00124 *                    of pipelined orthogonal transformations;
00125 *                    0 <= PARA(4) <= 100 must hold;
00126 *          PARA(5) = width of block column slabs for row-wise
00127 *                    application of pipelined orthogonal
00128 *                    transformations in their factorized form;
00129 *                    0 < PARA(5) <= DESCT(MB_) must hold.
00130 *          PARA(6) = the maximum number of eigenvalues moved together
00131 *                    over a process border; in practice, this will be
00132 *                    approximately half of the cross border window size
00133 *                    0 < PARA(6) <= PARA(2) must hold;
00134 *
00135 *  N       (global input) INTEGER
00136 *          The order of the globally distributed matrix T. N >= 0.
00137 *
00138 *  T       (local input/output) REAL array,
00139 *          dimension (LLD_T,LOCc(N)).
00140 *          On entry, the local pieces of the global distributed
00141 *          upper quasi-triangular matrix T, in Schur form. On exit, T is
00142 *          overwritten by the local pieces of the reordered matrix T,
00143 *          again in Schur form, with the selected eigenvalues in the
00144 *          globally leading diagonal blocks.
00145 *
00146 *  IT      (global input) INTEGER
00147 *  JT      (global input) INTEGER
00148 *          The row and column index in the global array T indicating the
00149 *          first column of sub( T ). IT = JT = 1 must hold.
00150 *
00151 *  DESCT   (global and local input) INTEGER array of dimension DLEN_.
00152 *          The array descriptor for the global distributed matrix T.
00153 *
00154 *  Q       (local input/output) REAL array,
00155 *          dimension (LLD_Q,LOCc(N)).
00156 *          On entry, if COMPQ = 'V', the local pieces of the global
00157 *          distributed matrix Q of Schur vectors.
00158 *          On exit, if COMPQ = 'V', Q has been postmultiplied by the
00159 *          global orthogonal transformation matrix which reorders T; the
00160 *          leading M columns of Q form an orthonormal basis for the
00161 *          specified invariant subspace.
00162 *          If COMPQ = 'N', Q is not referenced.
00163 *
00164 *  IQ      (global input) INTEGER
00165 *  JQ      (global input) INTEGER
00166 *          The column index in the global array Q indicating the
00167 *          first column of sub( Q ). IQ = JQ = 1 must hold.
00168 *
00169 *  DESCQ   (global and local input) INTEGER array of dimension DLEN_.
00170 *          The array descriptor for the global distributed matrix Q.
00171 *
00172 *  WR      (global output) REAL array, dimension (N)
00173 *  WI      (global output) REAL array, dimension (N)
00174 *          The real and imaginary parts, respectively, of the reordered
00175 *          eigenvalues of T. The eigenvalues are in principle stored in
00176 *          the same order as on the diagonal of T, with WR(i) = T(i,i)
00177 *          and, if T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0
00178 *          and WI(i+1) = -WI(i).
00179 *          Note also that if a complex eigenvalue is sufficiently
00180 *          ill-conditioned, then its value may differ significantly
00181 *          from its value before reordering.
00182 *
00183 *  M       (global output) INTEGER
00184 *          The dimension of the specified invariant subspace.
00185 *          0 <= M <= N.
00186 *
00187 *  WORK    (local workspace/output) REAL array,
00188 *          dimension (LWORK)
00189 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
00190 *
00191 *  LWORK   (local input) INTEGER
00192 *          The dimension of the array WORK.
00193 *
00194 *          If LWORK = -1, then a workspace query is assumed; the routine
00195 *          only calculates the optimal size of the WORK array, returns
00196 *          this value as the first entry of the WORK array, and no error
00197 *          message related to LWORK is issued by PXERBLA.
00198 *
00199 *  IWORK   (local workspace/output) INTEGER array, dimension (LIWORK)
00200 *
00201 *  LIWORK  (local input) INTEGER
00202 *          The dimension of the array IWORK.
00203 *
00204 *          If LIWORK = -1, then a workspace query is assumed; the
00205 *          routine only calculates the optimal size of the IWORK array,
00206 *          returns this value as the first entry of the IWORK array, and
00207 *          no error message related to LIWORK is issued by PXERBLA.
00208 *
00209 *  INFO    (global output) INTEGER
00210 *          = 0: successful exit
00211 *          < 0: if INFO = -i, the i-th argument had an illegal value.
00212 *          If the i-th argument is an array and the j-entry had
00213 *          an illegal value, then INFO = -(i*1000+j), if the i-th
00214 *          argument is a scalar and had an illegal value, then INFO = -i.
00215 *          > 0: here we have several possibilites
00216 *            *) Reordering of T failed because some eigenvalues are too
00217 *               close to separate (the problem is very ill-conditioned);
00218 *               T may have been partially reordered, and WR and WI
00219 *               contain the eigenvalues in the same order as in T.
00220 *               On exit, INFO = {the index of T where the swap failed}.
00221 *            *) A 2-by-2 block to be reordered split into two 1-by-1
00222 *               blocks and the second block failed to swap with an
00223 *               adjacent block.
00224 *               On exit, INFO = {the index of T where the swap failed}.
00225 *            *) If INFO = N+1, there is no valid BLACS context (see the
00226 *               BLACS documentation for details).
00227 *          In a future release this subroutine may distinguish between
00228 *          the case 1 and 2 above.
00229 *
00230 *  Additional requirements
00231 *  =======================
00232 *
00233 *  The following alignment requirements must hold:
00234 *  (a) DESCT( MB_ ) = DESCT( NB_ ) = DESCQ( MB_ ) = DESCQ( NB_ )
00235 *  (b) DESCT( RSRC_ ) = DESCQ( RSRC_ )
00236 *  (c) DESCT( CSRC_ ) = DESCQ( CSRC_ )
00237 *
00238 *  All matrices must be blocked by a block factor larger than or
00239 *  equal to two (3). This is to simplify reordering across processor
00240 *  borders in the presence of 2-by-2 blocks.
00241 *
00242 *  Limitations
00243 *  ===========
00244 *
00245 *  This algorithm cannot work on submatrices of T and Q, i.e.,
00246 *  IT = JT = IQ = JQ = 1 must hold. This is however no limitation
00247 *  since PDLAHQR does not compute Schur forms of submatrices anyway.
00248 *
00249 *  References
00250 *  ==========
00251 *
00252 *  [1] Z. Bai and J. W. Demmel; On swapping diagonal blocks in real
00253 *      Schur form, Linear Algebra Appl., 186:73--95, 1993. Also as
00254 *      LAPACK Working Note 54.
00255 *
00256 *  [2] D. Kressner; Block algorithms for reordering standard and
00257 *      generalized Schur forms, ACM TOMS, 32(4):521-532, 2006.
00258 *      Also LAPACK Working Note 171.
00259 *
00260 *  [3] R. Granat, B. Kagstrom, and D. Kressner; Parallel eigenvalue
00261 *      reordering in real Schur form, Concurrency and Computations:
00262 *      Practice and Experience, 21(9):1225-1250, 2009. Also as
00263 *      LAPACK Working Note 192.
00264 *
00265 *  Parallel execution recommendations
00266 *  ==================================
00267 *
00268 *  Use a square grid, if possible, for maximum performance. The block
00269 *  parameters in PARA should be kept well below the data distribution
00270 *  block size. In particular, see [3] for recommended settings for
00271 *  these parameters.
00272 *
00273 *  In general, the parallel algorithm strives to perform as much work
00274 *  as possible without crossing the block borders on the main block
00275 *  diagonal.
00276 *
00277 *  Contributors
00278 *  ============
00279 *
00280 *  Implemented by Robert Granat, Dept. of Computing Science and HPC2N,
00281 *  Umea University, Sweden, March 2007,
00282 *  in collaboration with Bo Kagstrom and Daniel Kressner.
00283 *  Modified by Meiyue Shao, October 2011.
00284 *
00285 *  Revisions
00286 *  =========
00287 *
00288 *  Please send bug-reports to granat@cs.umu.se
00289 *
00290 *  Keywords
00291 *  ========
00292 *
00293 *  Real Schur form, eigenvalue reordering
00294 *
00295 *  =====================================================================
00296 *     ..
00297 *     .. Parameters ..
00298       CHARACTER          TOP
00299       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00300      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00301       REAL               ZERO, ONE
00302       PARAMETER          ( TOP = '1-Tree',
00303      $                     BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00304      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00305      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9,
00306      $                     ZERO = 0.0, ONE = 1.0 )
00307 *     ..
00308 *     .. Local Scalars ..
00309       LOGICAL            LQUERY, PAIR, SWAP, WANTQ,
00310      $                   ISHH, FIRST, SKIP1CR, BORDER, LASTWAIT
00311       INTEGER            NPROW, NPCOL, MYROW, MYCOL, NB, NPROCS,
00312      $                   IERR, DIM1, INDX, LLDT, TRSRC, TCSRC, ILOC1,
00313      $                   JLOC1, MYIERR, ICTXT,
00314      $                   RSRC1, CSRC1, ILOC3, JLOC3, TRSRC3,
00315      $                   TCSRC3, ILOC, JLOC, TRSRC4, TCSRC4,
00316      $                   FLOPS, I, ILO, IHI, J, K, KK, KKS,
00317      $                   KS, LIWMIN, LWMIN, MMULT, N1, N2,
00318      $                   NCB, NDTRAF, NITRAF, NWIN, NUMWIN, PDTRAF,
00319      $                   PITRAF, PDW, WINEIG, WINSIZ, LLDQ,
00320      $                   RSRC, CSRC, ILILO, ILIHI, ILSEL, IRSRC,
00321      $                   ICSRC, IPIW, IPW1, IPW2, IPW3, TIHI, TILO,
00322      $                   LIHI, WINDOW, LILO, LSEL, BUFFER,
00323      $                   NMWIN2, BUFFLEN, LROWS, LCOLS, ILOC2, JLOC2,
00324      $                   WNEICR, WINDOW0, RSRC4, CSRC4, LIHI4, RSRC3,
00325      $                   CSRC3, RSRC2, CSRC2, LIHIC, LIHI1, ILEN4,
00326      $                   SELI4, ILEN1, DIM4, IPW4, QROWS, TROWS,
00327      $                   TCOLS, IPW5, IPW6, IPW7, IPW8, JLOC4,
00328      $                   EAST, WEST, ILOC4, SOUTH, NORTH, INDXS,
00329      $                   ITT, JTT, ILEN, DLEN, INDXE, TRSRC1, TCSRC1,
00330      $                   TRSRC2, TCSRC2, ILOS, DIR, TLIHI, TLILO, TLSEL,
00331      $                   ROUND, LAST, WIN0S, WIN0E, WINE, MMAX, MMIN
00332       REAL               ELEM, ELEM1, ELEM2, ELEM3, ELEM4, SN, CS, TMP,
00333      $                   ELEM5
00334 *     ..
00335 *     .. Local Arrays ..
00336       INTEGER            IBUFF( 8 ), IDUM1( 1 ), IDUM2( 1 )
00337 *     ..
00338 *     .. External Functions ..
00339       LOGICAL            LSAME
00340       INTEGER            NUMROC, INDXG2P, INDXG2L
00341       EXTERNAL           LSAME, NUMROC, INDXG2P, INDXG2L
00342 *     ..
00343 *     .. External Subroutines ..
00344       EXTERNAL           PSLACPY, PXERBLA, PCHK1MAT, PCHK2MAT,
00345      $                   SGEMM, SLAMOV, ILACPY, CHK1MAT,
00346      $                   INFOG2L, DGSUM2D, SGESD2D, SGERV2D, SGEBS2D,
00347      $                   SGEBR2D, IGSUM2D, BLACS_GRIDINFO, IGEBS2D,
00348      $                   IGEBR2D, IGAMX2D, IGAMN2D, BSLAAPP, BDTREXC
00349 *     ..
00350 *     .. Intrinsic Functions ..
00351       INTRINSIC          ABS, MAX, SQRT, MIN
00352 *     ..
00353 *     .. Local Functions ..
00354       INTEGER            ICEIL
00355 *     ..
00356 *     .. Executable Statements ..
00357 *
00358 *     Get grid parameters.
00359 *
00360       ICTXT = DESCT( CTXT_ )
00361       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00362       NPROCS = NPROW*NPCOL
00363 *
00364 *     Test if grid is O.K., i.e., the context is valid.
00365 *
00366       INFO = 0
00367       IF( NPROW.EQ.-1 ) THEN
00368          INFO = N+1
00369       END IF
00370 *
00371 *     Check if workspace query.
00372 *
00373       LQUERY = LWORK.EQ.-1 .OR. LIWORK.EQ.-1
00374 *
00375 *     Test dimensions for local sanity.
00376 *
00377       IF( INFO.EQ.0 ) THEN
00378          CALL CHK1MAT( N, 5, N, 5, IT, JT, DESCT, 9, INFO )
00379       END IF
00380       IF( INFO.EQ.0 ) THEN
00381          CALL CHK1MAT( N, 5, N, 5, IQ, JQ, DESCQ, 13, INFO )
00382       END IF
00383 *
00384 *     Check the blocking sizes for alignment requirements.
00385 *
00386       IF( INFO.EQ.0 ) THEN
00387          IF( DESCT( MB_ ).NE.DESCT( NB_ ) ) INFO = -(1000*9 + MB_)
00388       END IF
00389       IF( INFO.EQ.0 ) THEN
00390          IF( DESCQ( MB_ ).NE.DESCQ( NB_ ) ) INFO = -(1000*13 + MB_)
00391       END IF
00392       IF( INFO.EQ.0 ) THEN
00393          IF( DESCT( MB_ ).NE.DESCQ( MB_ ) ) INFO = -(1000*9 + MB_)
00394       END IF
00395 *
00396 *     Check the blocking sizes for minimum sizes.
00397 *
00398       IF( INFO.EQ.0 ) THEN
00399          IF( N.NE.DESCT( MB_ ) .AND. DESCT( MB_ ).LT.3 )
00400      $      INFO = -(1000*9 + MB_)
00401          IF( N.NE.DESCQ( MB_ ) .AND. DESCQ( MB_ ).LT.3 )
00402      $      INFO = -(1000*13 + MB_)
00403       END IF
00404 *
00405 *     Check parameters in PARA.
00406 *
00407       NB = DESCT( MB_ )
00408       IF( INFO.EQ.0 ) THEN
00409          IF( PARA(1).LT.1 .OR. PARA(1).GT.MIN(NPROW,NPCOL) )
00410      $      INFO = -(1000 * 4 + 1)
00411          IF( PARA(2).LT.1 .OR. PARA(2).GE.PARA(3) )
00412      $      INFO = -(1000 * 4 + 2)
00413          IF( PARA(3).LT.1 .OR. PARA(3).GT.NB )
00414      $      INFO = -(1000 * 4 + 3)
00415          IF( PARA(4).LT.0 .OR. PARA(4).GT.100 )
00416      $      INFO = -(1000 * 4 + 4)
00417          IF( PARA(5).LT.1 .OR. PARA(5).GT.NB )
00418      $      INFO = -(1000 * 4 + 5)
00419          IF( PARA(6).LT.1 .OR. PARA(6).GT.PARA(2) )
00420      $      INFO = -(1000 * 4 + 6)
00421       END IF
00422 *
00423 *     Check requirements on IT, JT, IQ and JQ.
00424 *
00425       IF( INFO.EQ.0 ) THEN
00426          IF( IT.NE.1 ) INFO = -6
00427          IF( JT.NE.IT ) INFO = -7
00428          IF( IQ.NE.1 ) INFO = -10
00429          IF( JQ.NE.IQ ) INFO = -11
00430       END IF
00431 *
00432 *     Test input parameters for global sanity.
00433 *
00434       IF( INFO.EQ.0 ) THEN
00435          CALL PCHK1MAT( N, 5, N, 5, IT, JT, DESCT, 9, 0, IDUM1,
00436      $        IDUM2, INFO )
00437       END IF
00438       IF( INFO.EQ.0 ) THEN
00439          CALL PCHK1MAT( N, 5, N, 5, IQ, JQ, DESCQ, 13, 0, IDUM1,
00440      $        IDUM2, INFO )
00441       END IF
00442       IF( INFO.EQ.0 ) THEN
00443          CALL PCHK2MAT( N, 5, N, 5, IT, JT, DESCT, 9, N, 5, N, 5,
00444      $        IQ, JQ, DESCQ, 13, 0, IDUM1, IDUM2, INFO )
00445       END IF
00446 *
00447 *     Decode and test the input parameters.
00448 *
00449       IF( INFO.EQ.0 .OR. LQUERY ) THEN
00450 *
00451          WANTQ = LSAME( COMPQ, 'V' )
00452          IF( N.LT.0 ) THEN
00453             INFO = -4
00454          ELSE
00455 *
00456 *           Extract local leading dimension.
00457 *
00458             LLDT = DESCT( LLD_ )
00459             LLDQ = DESCQ( LLD_ )
00460 *
00461 *           Check the SELECT vector for consistency and set M to the
00462 *           dimension of the specified invariant subspace.
00463 *
00464             M = 0
00465             DO 10 K = 1, N
00466                IF( K.LT.N ) THEN
00467                   CALL INFOG2L( K+1, K, DESCT, NPROW, NPCOL,
00468      $                 MYROW, MYCOL, ITT, JTT, TRSRC, TCSRC )
00469                   IF( MYROW.EQ.TRSRC .AND. MYCOL.EQ.TCSRC ) THEN
00470                      ELEM = T( (JTT-1)*LLDT + ITT )
00471                      IF( ELEM.NE.ZERO ) THEN
00472                         IF( SELECT(K).NE.0 .AND.
00473      $                       SELECT(K+1).EQ.0 ) THEN
00474 *                           INFO = -2
00475                            SELECT(K+1) = 1
00476                         ELSEIF( SELECT(K).EQ.0 .AND.
00477      $                          SELECT(K+1).NE.0 ) THEN
00478 *                           INFO = -2
00479                            SELECT(K) = 1
00480                         END IF
00481                      END IF
00482                   END IF
00483                END IF
00484                IF( SELECT(K).NE.0 ) M = M + 1
00485  10         CONTINUE
00486             MMAX = M
00487             MMIN = M
00488             IF( NPROCS.GT.1 )
00489      $         CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, MMAX, 1, -1,
00490      $              -1, -1, -1, -1 )
00491             IF( NPROCS.GT.1 )
00492      $         CALL IGAMN2D( ICTXT, 'All', TOP, 1, 1, MMIN, 1, -1,
00493      $              -1, -1, -1, -1 )
00494             IF( MMAX.GT.MMIN ) THEN
00495                M = MMAX
00496                IF( NPROCS.GT.1 )
00497      $            CALL IGAMX2D( ICTXT, 'All', TOP, N, 1, SELECT, N,
00498      $                 -1, -1, -1, -1, -1 )
00499             END IF
00500 *
00501 *           Compute needed workspace.
00502 *
00503             N1 = M
00504             N2 = N - M
00505 *
00506             TROWS = NUMROC( N, NB, MYROW, DESCT(RSRC_), NPROW )
00507             TCOLS = NUMROC( N, NB, MYCOL, DESCT(CSRC_), NPCOL )
00508             LWMIN = N + 7*NB**2 + 2*TROWS*PARA( 3 ) + TCOLS*PARA( 3 ) +
00509      $           MAX( TROWS*PARA( 3 ), TCOLS*PARA( 3 ) )
00510             LIWMIN = 5*PARA( 1 ) + PARA( 2 )*PARA( 3 ) -
00511      $           PARA( 2 ) * ( PARA( 2 ) + 1 ) / 2
00512 *
00513             IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
00514                INFO = -17
00515             ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
00516                INFO = -19
00517             END IF
00518          END IF
00519       END IF
00520 *
00521 *     Global maximum on info.
00522 *
00523       IF( NPROCS.GT.1 )
00524      $   CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, -1, -1, -1,
00525      $        -1, -1 )
00526 *
00527 *     Return if some argument is incorrect.
00528 *
00529       IF( INFO.NE.0 .AND. .NOT.LQUERY ) THEN
00530          M = 0
00531          CALL PXERBLA( ICTXT, 'PSTRORD', -INFO )
00532          RETURN
00533       ELSEIF( LQUERY ) THEN
00534          WORK( 1 ) = FLOAT(LWMIN)
00535          IWORK( 1 ) = LIWMIN
00536          RETURN
00537       END IF
00538 *
00539 *     Quick return if possible.
00540 *
00541       IF( M.EQ.N .OR. M.EQ.0 ) GO TO 545
00542 *
00543 *     Set parameters.
00544 *
00545       NUMWIN = PARA( 1 )
00546       WINEIG = MAX( PARA( 2 ), 2 )
00547       WINSIZ = MIN( MAX( PARA( 3 ), PARA( 2 )*2 ), NB )
00548       MMULT  = PARA( 4 )
00549       NCB    = PARA( 5 )
00550       WNEICR = PARA( 6 )
00551 *
00552 *     Insert some pointers into INTEGER workspace.
00553 *
00554 *     Information about all the active windows is stored
00555 *     in IWORK( 1:5*NUMWIN ). Each processor has a copy.
00556 *       LILO: start position
00557 *       LIHI: stop position
00558 *       LSEL: number of selected eigenvalues
00559 *       RSRC: processor id (row)
00560 *       CSRC: processor id (col)
00561 *     IWORK( IPIW+ ) contain information of orthogonal transformations.
00562 *
00563       ILILO = 1
00564       ILIHI = ILILO + NUMWIN
00565       ILSEL = ILIHI + NUMWIN
00566       IRSRC = ILSEL + NUMWIN
00567       ICSRC = IRSRC + NUMWIN
00568       IPIW  = ICSRC + NUMWIN
00569 *
00570 *     Insert some pointers into REAL workspace - for now we
00571 *     only need two pointers.
00572 *
00573       IPW1 = 1
00574       IPW2 = IPW1 + NB
00575 *
00576 *     Collect the selected blocks at the top-left corner of T.
00577 *
00578 *     Globally: ignore eigenvalues that are already in order.
00579 *     ILO is a global variable and is kept updated to be consistent
00580 *     throughout the process mesh.
00581 *
00582       ILO = 0
00583  40   CONTINUE
00584       ILO = ILO + 1
00585       IF( ILO.LE.N ) THEN
00586          IF( SELECT(ILO).NE.0 ) GO TO 40
00587       END IF
00588 *
00589 *     Globally: start the collection at the top of the matrix. Here,
00590 *     IHI is a global variable and is kept updated to be consistent
00591 *     throughout the process mesh.
00592 *
00593       IHI = N
00594 *
00595 *     Globally:  While ( ILO <= M ) do
00596  50   CONTINUE
00597 *
00598       IF( ILO.LE.M ) THEN
00599 *
00600 *        Depending on the value of ILO, find the diagonal block index J,
00601 *        such that T(1+(J-1)*NB:1+J*NB,1+(J-1)*NB:1+J*NB) contains the
00602 *        first unsorted eigenvalue. Check that J does not point to a
00603 *        block with only one selected eigenvalue in the last position
00604 *        which belongs to a splitted 2-by-2 block.
00605 *
00606          ILOS = ILO - 1
00607  52      CONTINUE
00608          ILOS = ILOS + 1
00609          IF( SELECT(ILOS).EQ.0 ) GO TO 52
00610          IF( ILOS.LT.N ) THEN
00611             IF( SELECT(ILOS+1).NE.0 .AND. MOD(ILOS,NB).EQ.0 ) THEN
00612                CALL PSELGET( 'All', TOP, ELEM, T, ILOS+1, ILOS, DESCT )
00613                IF( ELEM.NE.ZERO ) GO TO 52
00614             END IF
00615          END IF
00616          J = ICEIL(ILOS,NB)
00617 *
00618 *        Globally: Set start values of LILO and LIHI for all processes.
00619 *        Choose also the number of selected eigenvalues at top of each
00620 *        diagonal block such that the number of eigenvalues which remain
00621 *        to be reordered is an integer multiple of WINEIG.
00622 *
00623 *        All the information is saved into the INTEGER workspace such
00624 *        that all processors are aware of each others operations.
00625 *
00626 *        Compute the number of concurrent windows.
00627 *
00628          NMWIN2 = (ICEIL(IHI,NB)*NB - (ILO-MOD(ILO,NB)+1)+1) / NB
00629          NMWIN2 = MIN( MIN( NUMWIN, NMWIN2 ), ICEIL(N,NB) - J + 1 )
00630 *
00631 *        For all windows, set LSEL = 0 and find a proper start value of
00632 *        LILO such that LILO points at the first non-selected entry in
00633 *        the corresponding diagonal block of T.
00634 *
00635          DO 80 K = 1, NMWIN2
00636             IWORK( ILSEL+K-1) = 0
00637             IWORK( ILILO+K-1) = MAX( ILO, (J-1)*NB+(K-1)*NB+1 )
00638             LILO = IWORK( ILILO+K-1 )
00639  82         CONTINUE
00640             IF( SELECT(LILO).NE.0 .AND. LILO.LT.(J+K-1)*NB ) THEN
00641                LILO = LILO + 1
00642                IF( LILO.LE.N ) GO TO 82
00643             END IF
00644             IWORK( ILILO+K-1 ) = LILO
00645 *
00646 *           Fix each LILO to ensure that no 2-by-2 block is cut in top
00647 *           of the submatrix (LILO:LIHI,LILO:LIHI).
00648 *
00649             LILO = IWORK(ILILO+K-1)
00650             IF( LILO.GT.NB ) THEN
00651                CALL PSELGET( 'All', TOP, ELEM, T, LILO, LILO-1, DESCT )
00652                IF( ELEM.NE.ZERO ) THEN
00653                   IF( LILO.LT.(J+K-1)*NB ) THEN
00654                      IWORK(ILILO+K-1) = IWORK(ILILO+K-1) + 1
00655                   ELSE
00656                      IWORK(ILILO+K-1) = IWORK(ILILO+K-1) - 1
00657                   END IF
00658                END IF
00659             END IF
00660 *
00661 *           Set a proper LIHI value for each window. Also find the
00662 *           processors corresponding to the corresponding windows.
00663 *
00664             IWORK( ILIHI+K-1 ) =  IWORK( ILILO+K-1 )
00665             IWORK( IRSRC+K-1 ) = INDXG2P( IWORK(ILILO+K-1), NB, MYROW,
00666      $           DESCT( RSRC_ ), NPROW )
00667             IWORK( ICSRC+K-1 ) = INDXG2P( IWORK(ILILO+K-1), NB, MYCOL,
00668      $           DESCT( CSRC_ ), NPCOL )
00669             TILO = IWORK(ILILO+K-1)
00670             TIHI = MIN( N, ICEIL( TILO, NB ) * NB )
00671             DO 90 KK = TIHI, TILO, -1
00672                IF( SELECT(KK).NE.0 ) THEN
00673                   IWORK(ILIHI+K-1) = MAX(IWORK(ILIHI+K-1) , KK )
00674                   IWORK(ILSEL+K-1) = IWORK(ILSEL+K-1) + 1
00675                   IF( IWORK(ILSEL+K-1).GT.WINEIG ) THEN
00676                      IWORK(ILIHI+K-1) = KK
00677                      IWORK(ILSEL+K-1) = 1
00678                   END IF
00679                END IF
00680  90         CONTINUE
00681 *
00682 *           Fix each LIHI to avoid that bottom of window cuts 2-by-2
00683 *           block. We exclude such a block if located on block (process)
00684 *           border and on window border or if an inclusion would cause
00685 *           violation on the maximum number of eigenvalues to reorder
00686 *           inside each window. If only on window border, we include it.
00687 *           The excluded block is included automatically later when a
00688 *           subcluster is reordered into the block from South-East.
00689 *
00690             LIHI = IWORK(ILIHI+K-1)
00691             IF( LIHI.LT.N ) THEN
00692                CALL PSELGET( 'All', TOP, ELEM, T, LIHI+1, LIHI, DESCT )
00693                IF( ELEM.NE.ZERO ) THEN
00694                   IF( ICEIL( LIHI, NB ) .NE. ICEIL( LIHI+1, NB ) .OR.
00695      $                 IWORK( ILSEL+K-1 ).EQ.WINEIG ) THEN
00696                      IWORK( ILIHI+K-1 ) = IWORK( ILIHI+K-1 ) - 1
00697                      IF( IWORK( ILSEL+K-1 ).GT.2 )
00698      $                  IWORK( ILSEL+K-1 ) = IWORK( ILSEL+K-1 ) - 1
00699                   ELSE
00700                      IWORK( ILIHI+K-1 ) = IWORK( ILIHI+K-1 ) + 1
00701                      IF( SELECT(LIHI+1).NE.0 )
00702      $                  IWORK( ILSEL+K-1 ) = IWORK( ILSEL+K-1 ) + 1
00703                   END IF
00704                END IF
00705             END IF
00706  80      CONTINUE
00707 *
00708 *        Fix the special cases of LSEL = 0 and LILO = LIHI for each
00709 *        window by assuring that the stop-condition for local reordering
00710 *        is fulfilled directly. Do this by setting LIHI = startposition
00711 *        for the corresponding block and LILO = LIHI + 1.
00712 *
00713          DO 85 K = 1, NMWIN2
00714             LILO = IWORK( ILILO + K - 1 )
00715             LIHI = IWORK( ILIHI + K - 1 )
00716             LSEL = IWORK( ILSEL + K - 1 )
00717             IF( LSEL.EQ.0 .OR. LILO.EQ.LIHI ) THEN
00718                LIHI = IWORK( ILIHI + K - 1 )
00719                IWORK( ILIHI + K - 1 ) = (ICEIL(LIHI,NB)-1)*NB + 1
00720                IWORK( ILILO + K - 1 ) = IWORK( ILIHI + K - 1 ) + 1
00721             END IF
00722  85      CONTINUE
00723 *
00724 *        Associate all processors with the first computational window
00725 *        that should be activated, if possible.
00726 *
00727          LILO = IHI
00728          LIHI = ILO
00729          LSEL = M
00730          FIRST = .TRUE.
00731          DO 95 WINDOW = 1, NMWIN2
00732             RSRC = IWORK(IRSRC+WINDOW-1)
00733             CSRC = IWORK(ICSRC+WINDOW-1)
00734             IF( MYROW.EQ.RSRC .OR. MYCOL.EQ.CSRC ) THEN
00735                TLILO = IWORK( ILILO + WINDOW - 1 )
00736                TLIHI = IWORK( ILIHI + WINDOW - 1 )
00737                TLSEL = IWORK( ILSEL + WINDOW - 1 )
00738                IF( (.NOT. ( LIHI .GE. LILO + LSEL ) ) .AND.
00739      $              ( (TLIHI .GE. TLILO + TLSEL) .OR. FIRST ) ) THEN
00740                   IF( FIRST ) FIRST = .FALSE.
00741                   LILO = TLILO
00742                   LIHI = TLIHI
00743                   LSEL = TLSEL
00744                   GO TO 97
00745                END IF
00746             END IF
00747  95      CONTINUE
00748  97      CONTINUE
00749 *
00750 *        Exclude all processors that are not involved in any
00751 *        computational window right now.
00752 *
00753          IERR = 0
00754          IF( LILO.EQ.IHI .AND. LIHI.EQ.ILO .AND. LSEL.EQ.M )
00755      $      GO TO 114
00756 *
00757 *        Make sure all processors associated with a compuational window
00758 *        enter the local reordering the first time.
00759 *
00760          FIRST = .TRUE.
00761 *
00762 *        Globally for all computational windows:
00763 *        While ( LIHI >= LILO + LSEL ) do
00764          ROUND = 1
00765  130     CONTINUE
00766          IF( FIRST .OR. ( LIHI .GE. LILO + LSEL ) ) THEN
00767 *
00768 *           Perform computations in parallel: loop through all
00769 *           compuational windows, do local reordering and accumulate
00770 *           transformations, broadcast them in the corresponding block
00771 *           row and columns and compute the corresponding updates.
00772 *
00773             DO 110 WINDOW = 1, NMWIN2
00774                RSRC = IWORK(IRSRC+WINDOW-1)
00775                CSRC = IWORK(ICSRC+WINDOW-1)
00776 *
00777 *              The process on the block diagonal computes the
00778 *              reordering.
00779 *
00780                IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC ) THEN
00781                   LILO = IWORK(ILILO+WINDOW-1)
00782                   LIHI = IWORK(ILIHI+WINDOW-1)
00783                   LSEL = IWORK(ILSEL+WINDOW-1)
00784 *
00785 *                 Compute the local value of I -- start position.
00786 *
00787                   I = MAX( LILO, LIHI - WINSIZ + 1 )
00788 *
00789 *                 Fix my I to avoid that top of window cuts a 2-by-2
00790 *                 block.
00791 *
00792                   IF( I.GT.LILO ) THEN
00793                      CALL INFOG2L( I, I-1, DESCT, NPROW, NPCOL, MYROW,
00794      $                    MYCOL, ILOC, JLOC, RSRC, CSRC )
00795                      IF( T( LLDT*(JLOC-1) + ILOC ).NE.ZERO )
00796      $                  I = I + 1
00797                   END IF
00798 *
00799 *                 Compute local indicies for submatrix to operate on.
00800 *
00801                   CALL INFOG2L( I, I, DESCT, NPROW, NPCOL,
00802      $                 MYROW, MYCOL, ILOC1, JLOC1, RSRC, CSRC )
00803 *
00804 *                 The active window is ( I:LIHI, I:LIHI ). Reorder
00805 *                 eigenvalues within this window and pipeline
00806 *                 transformations.
00807 *
00808                   NWIN = LIHI - I + 1
00809                   KS = 0
00810                   PITRAF = IPIW
00811                   PDTRAF = IPW2
00812 *
00813                   PAIR = .FALSE.
00814                   DO 140 K = I, LIHI
00815                      IF( PAIR ) THEN
00816                         PAIR = .FALSE.
00817                      ELSE
00818                         SWAP = SELECT( K ).NE.0
00819                         IF( K.LT.LIHI ) THEN
00820                            CALL INFOG2L( K+1, K, DESCT, NPROW, NPCOL,
00821      $                          MYROW, MYCOL, ILOC, JLOC, RSRC, CSRC )
00822                            IF( T( LLDT*(JLOC-1) + ILOC ).NE.ZERO )
00823      $                        PAIR = .TRUE.
00824                         END IF
00825                         IF( SWAP ) THEN
00826                            KS = KS + 1
00827 *
00828 *                       Swap the K-th block to position I+KS-1.
00829 *
00830                            IERR = 0
00831                            KK  = K - I + 1
00832                            KKS = KS
00833                            IF( KK.NE.KS ) THEN
00834                               NITRAF = LIWORK - PITRAF + 1
00835                               NDTRAF = LWORK - PDTRAF + 1
00836                               CALL BSTREXC( NWIN,
00837      $                             T(LLDT*(JLOC1-1) + ILOC1), LLDT, KK,
00838      $                             KKS, NITRAF, IWORK( PITRAF ), NDTRAF,
00839      $                             WORK( PDTRAF ), WORK(IPW1), IERR )
00840                               PITRAF = PITRAF + NITRAF
00841                               PDTRAF = PDTRAF + NDTRAF
00842 *
00843 *                             Update array SELECT.
00844 *
00845                               IF ( PAIR ) THEN
00846                                  DO 150 J = I+KK-1, I+KKS, -1
00847                                     SELECT(J+1) = SELECT(J-1)
00848  150                             CONTINUE
00849                                  SELECT(I+KKS-1) = 1
00850                                  SELECT(I+KKS) = 1
00851                               ELSE
00852                                  DO 160 J = I+KK-1, I+KKS, -1
00853                                     SELECT(J) = SELECT(J-1)
00854  160                             CONTINUE
00855                                  SELECT(I+KKS-1) = 1
00856                               END IF
00857 *
00858                               IF ( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN
00859 *
00860 *                                Some blocks are too close to swap:
00861 *                                prepare to leave in a clean fashion. If
00862 *                                IERR.EQ.2, we must update SELECT to
00863 *                                account for the fact that the 2 by 2
00864 *                                block to be reordered did split and the
00865 *                                first part of this block is already
00866 *                                reordered.
00867 *
00868                                  IF ( IERR.EQ.2 ) THEN
00869                                     SELECT( I+KKS-3 ) = 1
00870                                     SELECT( I+KKS-1 ) = 0
00871                                     KKS = KKS + 1
00872                                  END IF
00873 *
00874 *                                Update off-diagonal blocks immediately.
00875 *
00876                                  GO TO 170
00877                               END IF
00878                               KS = KKS
00879                            END IF
00880                            IF( PAIR )
00881      $                        KS = KS + 1
00882                         END IF
00883                      END IF
00884  140              CONTINUE
00885                END IF
00886  110        CONTINUE
00887  170        CONTINUE
00888 *
00889 *           The on-diagonal processes save their information from the
00890 *           local reordering in the integer buffer. This buffer is
00891 *           broadcasted to updating processors, see below.
00892 *
00893             DO 175 WINDOW = 1, NMWIN2
00894                RSRC = IWORK(IRSRC+WINDOW-1)
00895                CSRC = IWORK(ICSRC+WINDOW-1)
00896                IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC ) THEN
00897                   IBUFF( 1 ) = I
00898                   IBUFF( 2 ) = NWIN
00899                   IBUFF( 3 ) = PITRAF
00900                   IBUFF( 4 ) = KS
00901                   IBUFF( 5 ) = PDTRAF
00902                   IBUFF( 6 ) = NDTRAF
00903                   ILEN = PITRAF - IPIW
00904                   DLEN = PDTRAF - IPW2
00905                   IBUFF( 7 ) = ILEN
00906                   IBUFF( 8 ) = DLEN
00907                END IF
00908  175        CONTINUE
00909 *
00910 *           For the updates with respect to the local reordering, we
00911 *           organize the updates in two phases where the update
00912 *           "direction" (controlled by the DIR variable below) is first
00913 *           chosen to be the corresponding rows, then the corresponding
00914 *           columns.
00915 *
00916             DO 1111 DIR = 1, 2
00917 *
00918 *           Broadcast information about the reordering and the
00919 *           accumulated transformations: I, NWIN, PITRAF, NITRAF,
00920 *           PDTRAF, NDTRAF. If no broadcast is performed, use an
00921 *           artificial value of KS to prevent updating indicies for
00922 *           windows already finished (use KS = -1).
00923 *
00924             DO 111 WINDOW = 1, NMWIN2
00925                RSRC = IWORK(IRSRC+WINDOW-1)
00926                CSRC = IWORK(ICSRC+WINDOW-1)
00927                IF( MYROW.EQ.RSRC .OR. MYCOL.EQ.CSRC ) THEN
00928                   LILO = IWORK(ILILO+WINDOW-1)
00929                   LIHI = IWORK(ILIHI+WINDOW-1)
00930                   LSEL = IWORK(ILSEL+WINDOW-1)
00931                END IF
00932                IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC ) THEN
00933                   IF( NPCOL.GT.1 .AND. DIR.EQ.1 )
00934      $               CALL IGEBS2D( ICTXT, 'Row', TOP, 8, 1, IBUFF, 8 )
00935                   IF( NPROW.GT.1 .AND. DIR.EQ.2 )
00936      $                 CALL IGEBS2D( ICTXT, 'Col', TOP, 8, 1, IBUFF, 8 )
00937                ELSEIF( MYROW.EQ.RSRC .OR. MYCOL.EQ.CSRC ) THEN
00938                   IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND. MYROW.EQ.RSRC )
00939      $                 THEN
00940                      IF( FIRST .OR. (LIHI .GE. LILO + LSEL) ) THEN
00941                         CALL IGEBR2D( ICTXT, 'Row', TOP, 8, 1, IBUFF, 8,
00942      $                       RSRC, CSRC )
00943                         I = IBUFF( 1 )
00944                         NWIN = IBUFF( 2 )
00945                         PITRAF = IBUFF( 3 )
00946                         KS = IBUFF( 4 )
00947                         PDTRAF = IBUFF( 5 )
00948                         NDTRAF = IBUFF( 6 )
00949                         ILEN = IBUFF( 7 )
00950                         DLEN = IBUFF( 8 )
00951                      ELSE
00952                         ILEN = 0
00953                         DLEN = 0
00954                         KS = -1
00955                      END IF
00956                   END IF
00957                   IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND. MYCOL.EQ.CSRC )
00958      $                 THEN
00959                      IF( FIRST .OR. (LIHI .GE. LILO + LSEL) ) THEN
00960                         CALL IGEBR2D( ICTXT, 'Col', TOP, 8, 1, IBUFF, 8,
00961      $                       RSRC, CSRC )
00962                         I = IBUFF( 1 )
00963                         NWIN = IBUFF( 2 )
00964                         PITRAF = IBUFF( 3 )
00965                         KS = IBUFF( 4 )
00966                         PDTRAF = IBUFF( 5 )
00967                         NDTRAF = IBUFF( 6 )
00968                         ILEN = IBUFF( 7 )
00969                         DLEN = IBUFF( 8 )
00970                      ELSE
00971                         ILEN = 0
00972                         DLEN = 0
00973                         KS = -1
00974                      END IF
00975                   END IF
00976                END IF
00977 *
00978 *              Broadcast the accumulated transformations - copy all
00979 *              information from IWORK(IPIW:PITRAF-1) and
00980 *              WORK(IPW2:PDTRAF-1) to a buffer and broadcast this
00981 *              buffer in the corresponding block row and column.  On
00982 *              arrival, copy the information back to the correct part of
00983 *              the workspace. This step is avoided if no computations
00984 *              were performed at the diagonal processor, i.e.,
00985 *              BUFFLEN = 0.
00986 *
00987                IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC ) THEN
00988                   BUFFER = PDTRAF
00989                   BUFFLEN = DLEN + ILEN
00990                   IF( BUFFLEN.NE.0 ) THEN
00991                      DO 180 INDX = 1, ILEN
00992                         WORK( BUFFER+INDX-1 ) =
00993      $                       FLOAT( IWORK(IPIW+INDX-1) )
00994  180                 CONTINUE
00995                      CALL SLAMOV( 'All', DLEN, 1, WORK( IPW2 ),
00996      $                    DLEN, WORK(BUFFER+ILEN), DLEN )
00997                      IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN
00998                         CALL SGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, 1,
00999      $                       WORK(BUFFER), BUFFLEN )
01000                      END IF
01001                      IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN
01002                         CALL SGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, 1,
01003      $                       WORK(BUFFER), BUFFLEN )
01004                      END IF
01005                   END IF
01006                ELSEIF( MYROW.EQ.RSRC .OR. MYCOL.EQ.CSRC ) THEN
01007                   IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND. MYROW.EQ.RSRC )
01008      $                 THEN
01009                      BUFFER = PDTRAF
01010                      BUFFLEN = DLEN + ILEN
01011                      IF( BUFFLEN.NE.0 ) THEN
01012                         CALL SGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1,
01013      $                       WORK(BUFFER), BUFFLEN, RSRC, CSRC )
01014                      END IF
01015                   END IF
01016                   IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND. MYCOL.EQ.CSRC )
01017      $                 THEN
01018                      BUFFER = PDTRAF
01019                      BUFFLEN = DLEN + ILEN
01020                      IF( BUFFLEN.NE.0 ) THEN
01021                         CALL SGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1,
01022      $                       WORK(BUFFER), BUFFLEN, RSRC, CSRC )
01023                      END IF
01024                   END IF
01025                   IF((NPCOL.GT.1.AND.DIR.EQ.1.AND.MYROW.EQ.RSRC).OR.
01026      $                 (NPROW.GT.1.AND.DIR.EQ.2.AND.MYCOL.EQ.CSRC ) )
01027      $                 THEN
01028                      IF( BUFFLEN.NE.0 ) THEN
01029                         DO 190 INDX = 1, ILEN
01030                            IWORK(IPIW+INDX-1) =
01031      $                          INT(WORK( BUFFER+INDX-1 ))
01032  190                    CONTINUE
01033                         CALL SLAMOV( 'All', DLEN, 1,
01034      $                       WORK( BUFFER+ILEN ), DLEN,
01035      $                       WORK( IPW2 ), DLEN )
01036                      END IF
01037                   END IF
01038                END IF
01039  111        CONTINUE
01040 *
01041 *           Now really perform the updates by applying the orthogonal
01042 *           transformations to the out-of-window parts of T and Q. This
01043 *           step is avoided if no reordering was performed by the on-
01044 *           diagonal processor from the beginning, i.e., BUFFLEN = 0.
01045 *
01046 *           Count number of operations to decide whether to use
01047 *           matrix-matrix multiplications for updating off-diagonal
01048 *           parts or not.
01049 *
01050             DO 112 WINDOW = 1, NMWIN2
01051                RSRC = IWORK(IRSRC+WINDOW-1)
01052                CSRC = IWORK(ICSRC+WINDOW-1)
01053 *
01054                IF( (MYROW.EQ.RSRC .AND. DIR.EQ.1 ).OR.
01055      $              (MYCOL.EQ.CSRC .AND. DIR.EQ.2 ) ) THEN
01056                   LILO = IWORK(ILILO+WINDOW-1)
01057                   LIHI = IWORK(ILIHI+WINDOW-1)
01058                   LSEL = IWORK(ILSEL+WINDOW-1)
01059 *
01060 *                 Skip update part for current WINDOW if BUFFLEN = 0.
01061 *
01062                   IF( BUFFLEN.EQ.0 ) GO TO 295
01063 *
01064                   NITRAF = PITRAF - IPIW
01065                   ISHH = .FALSE.
01066                   FLOPS = 0
01067                   DO 200 K = 1, NITRAF
01068                      IF( IWORK( IPIW + K - 1 ).LE.NWIN ) THEN
01069                         FLOPS = FLOPS + 6
01070                      ELSE
01071                         FLOPS = FLOPS + 11
01072                         ISHH = .TRUE.
01073                      END IF
01074  200              CONTINUE
01075 *
01076 *                 Compute amount of work space necessary for performing
01077 *                 matrix-matrix multiplications.
01078 *
01079                   PDW = BUFFER
01080                   IPW3 = PDW + NWIN*NWIN
01081                ELSE
01082                   FLOPS = 0
01083                END IF
01084 *
01085                IF( FLOPS.NE.0 .AND.
01086      $              ( FLOPS*100 ) / ( 2*NWIN*NWIN ) .GE. MMULT ) THEN
01087 *
01088 *                 Update off-diagonal blocks and Q using matrix-matrix
01089 *                 multiplications; if there are no Householder
01090 *                 reflectors it is preferable to take the triangular
01091 *                 block structure of the transformation matrix into
01092 *                 account.
01093 *
01094                   CALL SLASET( 'All', NWIN, NWIN, ZERO, ONE,
01095      $                 WORK( PDW ), NWIN )
01096                   CALL BSLAAPP( 1, NWIN, NWIN, NCB, WORK( PDW ), NWIN,
01097      $                 NITRAF, IWORK(IPIW), WORK( IPW2 ), WORK(IPW3) )
01098 *
01099                   IF( ISHH ) THEN
01100 *
01101 *                    Loop through the local blocks of the distributed
01102 *                    matrices T and Q and update them according to the
01103 *                    performed reordering.
01104 *
01105 *                    Update the columns of T and Q affected by the
01106 *                    reordering.
01107 *
01108                      IF( DIR.EQ.2 ) THEN
01109                         DO 210 INDX = 1, I-1, NB
01110                            CALL INFOG2L( INDX, I, DESCT, NPROW, NPCOL,
01111      $                          MYROW, MYCOL, ILOC, JLOC, RSRC1, CSRC1 )
01112                            IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 )
01113      $                          THEN
01114                               LROWS = MIN(NB,I-INDX)
01115                               CALL SGEMM( 'No transpose',
01116      $                             'No transpose', LROWS, NWIN, NWIN,
01117      $                             ONE, T((JLOC-1)*LLDT+ILOC), LLDT,
01118      $                             WORK( PDW ), NWIN, ZERO,
01119      $                             WORK(IPW3), LROWS )
01120                               CALL SLAMOV( 'All', LROWS, NWIN,
01121      $                             WORK(IPW3), LROWS,
01122      $                             T((JLOC-1)*LLDT+ILOC), LLDT )
01123                            END IF
01124  210                    CONTINUE
01125                         IF( WANTQ ) THEN
01126                            DO 220 INDX = 1, N, NB
01127                               CALL INFOG2L( INDX, I, DESCQ, NPROW,
01128      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
01129      $                             RSRC1, CSRC1 )
01130                               IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 )
01131      $                             THEN
01132                                  LROWS = MIN(NB,N-INDX+1)
01133                                  CALL SGEMM( 'No transpose',
01134      $                                'No transpose', LROWS, NWIN, NWIN,
01135      $                                ONE, Q((JLOC-1)*LLDQ+ILOC), LLDQ,
01136      $                                WORK( PDW ), NWIN, ZERO,
01137      $                                WORK(IPW3), LROWS )
01138                                  CALL SLAMOV( 'All', LROWS, NWIN,
01139      $                                WORK(IPW3), LROWS,
01140      $                                Q((JLOC-1)*LLDQ+ILOC), LLDQ )
01141                               END IF
01142  220                       CONTINUE
01143                         END IF
01144                      END IF
01145 *
01146 *                    Update the rows of T affected by the reordering
01147 *
01148                      IF( DIR.EQ.1 ) THEN
01149                         IF( LIHI.LT.N ) THEN
01150                            IF( MOD(LIHI,NB).GT.0 ) THEN
01151                               INDX = LIHI + 1
01152                               CALL INFOG2L( I, INDX, DESCT, NPROW,
01153      $                            NPCOL, MYROW, MYCOL, ILOC, JLOC,
01154      $                            RSRC1, CSRC1 )
01155                               IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 )
01156      $                             THEN
01157                                  LCOLS = MOD( MIN( NB-MOD(LIHI,NB),
01158      $                                N-LIHI ), NB )
01159                                  CALL SGEMM( 'Transpose',
01160      $                                'No Transpose', NWIN, LCOLS, NWIN,
01161      $                                ONE, WORK( PDW ), NWIN,
01162      $                                T((JLOC-1)*LLDT+ILOC), LLDT, ZERO,
01163      $                                WORK(IPW3), NWIN )
01164                                  CALL SLAMOV( 'All', NWIN, LCOLS,
01165      $                                WORK(IPW3), NWIN,
01166      $                                T((JLOC-1)*LLDT+ILOC), LLDT )
01167                               END IF
01168                            END IF
01169                            INDXS = ICEIL(LIHI,NB)*NB + 1
01170                            DO 230 INDX = INDXS, N, NB
01171                               CALL INFOG2L( I, INDX, DESCT, NPROW,
01172      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
01173      $                             RSRC1, CSRC1 )
01174                               IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 )
01175      $                             THEN
01176                                  LCOLS = MIN( NB, N-INDX+1 )
01177                                  CALL SGEMM( 'Transpose',
01178      $                                'No Transpose', NWIN, LCOLS, NWIN,
01179      $                                ONE, WORK( PDW ), NWIN,
01180      $                                T((JLOC-1)*LLDT+ILOC), LLDT, ZERO,
01181      $                                WORK(IPW3), NWIN )
01182                                  CALL SLAMOV( 'All', NWIN, LCOLS,
01183      $                                WORK(IPW3), NWIN,
01184      $                                T((JLOC-1)*LLDT+ILOC), LLDT )
01185                               END IF
01186  230                       CONTINUE
01187                         END IF
01188                      END IF
01189                   ELSE
01190 *
01191 *                    The NWIN-by-NWIN matrix U containing the
01192 *                    accumulated orthogonal transformations has the
01193 *                    following structure:
01194 *
01195 *                                  [ U11  U12 ]
01196 *                              U = [          ],
01197 *                                  [ U21  U22 ]
01198 *
01199 *                    where U21 is KS-by-KS upper triangular and U12 is
01200 *                    (NWIN-KS)-by-(NWIN-KS) lower triangular.
01201 *
01202 *                    Update the columns of T and Q affected by the
01203 *                    reordering.
01204 *
01205 *                    Compute T2*U21 + T1*U11 in workspace.
01206 *
01207                      IF( DIR.EQ.2 ) THEN
01208                         DO 240 INDX = 1, I-1, NB
01209                            CALL INFOG2L( INDX, I, DESCT, NPROW, NPCOL,
01210      $                          MYROW, MYCOL, ILOC, JLOC, RSRC1, CSRC1 )
01211                            IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 )
01212      $                          THEN
01213                               JLOC1 = INDXG2L( I+NWIN-KS, NB, MYCOL,
01214      $                             DESCT( CSRC_ ), NPCOL )
01215                               LROWS = MIN(NB,I-INDX)
01216                               CALL SLAMOV( 'All', LROWS, KS,
01217      $                             T((JLOC1-1)*LLDT+ILOC ), LLDT,
01218      $                             WORK(IPW3), LROWS )
01219                               CALL STRMM( 'Right', 'Upper',
01220      $                              'No transpose',
01221      $                             'Non-unit', LROWS, KS, ONE,
01222      $                             WORK( PDW+NWIN-KS ), NWIN,
01223      $                             WORK(IPW3), LROWS )
01224                               CALL SGEMM( 'No transpose',
01225      $                             'No transpose', LROWS, KS, NWIN-KS,
01226      $                             ONE, T((JLOC-1)*LLDT+ILOC), LLDT,
01227      $                             WORK( PDW ), NWIN, ONE, WORK(IPW3),
01228      $                             LROWS )
01229 *
01230 *                             Compute T1*U12 + T2*U22 in workspace.
01231 *
01232                               CALL SLAMOV( 'All', LROWS, NWIN-KS,
01233      $                             T((JLOC-1)*LLDT+ILOC), LLDT,
01234      $                             WORK( IPW3+KS*LROWS ), LROWS )
01235                               CALL STRMM( 'Right', 'Lower',
01236      $                             'No transpose', 'Non-unit',
01237      $                             LROWS, NWIN-KS, ONE,
01238      $                             WORK( PDW+NWIN*KS ), NWIN,
01239      $                             WORK( IPW3+KS*LROWS ), LROWS )
01240                               CALL SGEMM( 'No transpose',
01241      $                             'No transpose', LROWS, NWIN-KS, KS,
01242      $                             ONE, T((JLOC1-1)*LLDT+ILOC), LLDT,
01243      $                             WORK( PDW+NWIN*KS+NWIN-KS ), NWIN,
01244      $                             ONE, WORK( IPW3+KS*LROWS ), LROWS )
01245 *
01246 *                             Copy workspace to T.
01247 *
01248                               CALL SLAMOV( 'All', LROWS, NWIN,
01249      $                             WORK(IPW3), LROWS,
01250      $                             T((JLOC-1)*LLDT+ILOC), LLDT )
01251                            END IF
01252  240                    CONTINUE
01253                         IF( WANTQ ) THEN
01254 *
01255 *                          Compute Q2*U21 + Q1*U11 in workspace.
01256 *
01257                            DO 250 INDX = 1, N, NB
01258                               CALL INFOG2L( INDX, I, DESCQ, NPROW,
01259      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
01260      $                             RSRC1, CSRC1 )
01261                               IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 )
01262      $                             THEN
01263                                  JLOC1 = INDXG2L( I+NWIN-KS, NB,
01264      $                                MYCOL, DESCQ( CSRC_ ), NPCOL )
01265                                  LROWS = MIN(NB,N-INDX+1)
01266                                  CALL SLAMOV( 'All', LROWS, KS,
01267      $                                Q((JLOC1-1)*LLDQ+ILOC ), LLDQ,
01268      $                                WORK(IPW3), LROWS )
01269                                  CALL STRMM( 'Right', 'Upper',
01270      $                                'No transpose', 'Non-unit',
01271      $                                LROWS, KS, ONE,
01272      $                                WORK( PDW+NWIN-KS ), NWIN,
01273      $                                WORK(IPW3), LROWS )
01274                                  CALL SGEMM( 'No transpose',
01275      $                                'No transpose', LROWS, KS,
01276      $                                NWIN-KS, ONE,
01277      $                                Q((JLOC-1)*LLDQ+ILOC), LLDQ,
01278      $                                WORK( PDW ), NWIN, ONE,
01279      $                                WORK(IPW3), LROWS )
01280 *
01281 *                                Compute Q1*U12 + Q2*U22 in workspace.
01282 *
01283                                  CALL SLAMOV( 'All', LROWS, NWIN-KS,
01284      $                                Q((JLOC-1)*LLDQ+ILOC), LLDQ,
01285      $                                WORK( IPW3+KS*LROWS ), LROWS)
01286                                  CALL STRMM( 'Right', 'Lower',
01287      $                                'No transpose', 'Non-unit',
01288      $                                LROWS, NWIN-KS, ONE,
01289      $                                WORK( PDW+NWIN*KS ), NWIN,
01290      $                                WORK( IPW3+KS*LROWS ), LROWS)
01291                                  CALL SGEMM( 'No transpose',
01292      $                                'No transpose', LROWS, NWIN-KS,
01293      $                                KS, ONE, Q((JLOC1-1)*LLDQ+ILOC),
01294      $                                LLDQ, WORK(PDW+NWIN*KS+NWIN-KS),
01295      $                                NWIN, ONE, WORK( IPW3+KS*LROWS ),
01296      $                                LROWS )
01297 *
01298 *                                Copy workspace to Q.
01299 *
01300                                  CALL SLAMOV( 'All', LROWS, NWIN,
01301      $                                WORK(IPW3), LROWS,
01302      $                                Q((JLOC-1)*LLDQ+ILOC), LLDQ )
01303                               END IF
01304  250                       CONTINUE
01305                         END IF
01306                      END IF
01307 *
01308                      IF( DIR.EQ.1 ) THEN
01309                         IF ( LIHI.LT.N ) THEN
01310 *
01311 *                          Compute U21**T*T2 + U11**T*T1 in workspace.
01312 *
01313                            IF( MOD(LIHI,NB).GT.0 ) THEN
01314                               INDX = LIHI + 1
01315                               CALL INFOG2L( I, INDX, DESCT, NPROW,
01316      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
01317      $                             RSRC1, CSRC1 )
01318                               IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 )
01319      $                             THEN
01320                                  ILOC1 = INDXG2L( I+NWIN-KS, NB, MYROW,
01321      $                                DESCT( RSRC_ ), NPROW )
01322                                  LCOLS = MOD( MIN( NB-MOD(LIHI,NB),
01323      $                                N-LIHI ), NB )
01324                                  CALL SLAMOV( 'All', KS, LCOLS,
01325      $                                T((JLOC-1)*LLDT+ILOC1), LLDT,
01326      $                                WORK(IPW3), NWIN )
01327                                  CALL STRMM( 'Left', 'Upper',
01328      $                                'Transpose', 'Non-unit', KS,
01329      $                                LCOLS, ONE, WORK( PDW+NWIN-KS ),
01330      $                                NWIN, WORK(IPW3), NWIN )
01331                                  CALL SGEMM( 'Transpose',
01332      $                                'No transpose', KS, LCOLS,
01333      $                                NWIN-KS, ONE, WORK(PDW), NWIN,
01334      $                                T((JLOC-1)*LLDT+ILOC), LLDT, ONE,
01335      $                                WORK(IPW3), NWIN )
01336 *
01337 *                                Compute U12**T*T1 + U22**T*T2 in
01338 *                                workspace.
01339 *
01340                                  CALL SLAMOV( 'All', NWIN-KS, LCOLS,
01341      $                                T((JLOC-1)*LLDT+ILOC), LLDT,
01342      $                                WORK( IPW3+KS ), NWIN )
01343                                  CALL STRMM( 'Left', 'Lower',
01344      $                                'Transpose', 'Non-unit',
01345      $                                NWIN-KS, LCOLS, ONE,
01346      $                                WORK( PDW+NWIN*KS ), NWIN,
01347      $                                WORK( IPW3+KS ), NWIN )
01348                                  CALL SGEMM( 'Transpose',
01349      $                                'No Transpose', NWIN-KS, LCOLS,
01350      $                                KS, ONE,
01351      $                                WORK( PDW+NWIN*KS+NWIN-KS ),
01352      $                                NWIN, T((JLOC-1)*LLDT+ILOC1),
01353      $                                LLDT, ONE, WORK( IPW3+KS ),
01354      $                                NWIN )
01355 *
01356 *                                Copy workspace to T.
01357 *
01358                                  CALL SLAMOV( 'All', NWIN, LCOLS,
01359      $                                WORK(IPW3), NWIN,
01360      $                                T((JLOC-1)*LLDT+ILOC), LLDT )
01361                               END IF
01362                            END IF
01363                            INDXS = ICEIL(LIHI,NB)*NB + 1
01364                            DO 260 INDX = INDXS, N, NB
01365                               CALL INFOG2L( I, INDX, DESCT, NPROW,
01366      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
01367      $                             RSRC1, CSRC1 )
01368                               IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 )
01369      $                             THEN
01370 *
01371 *                                Compute U21**T*T2 + U11**T*T1 in
01372 *                                workspace.
01373 *
01374                                  ILOC1 = INDXG2L( I+NWIN-KS, NB,
01375      $                                MYROW, DESCT( RSRC_ ), NPROW )
01376                                  LCOLS = MIN( NB, N-INDX+1 )
01377                                  CALL SLAMOV( 'All', KS, LCOLS,
01378      $                                T((JLOC-1)*LLDT+ILOC1), LLDT,
01379      $                                WORK(IPW3), NWIN )
01380                                  CALL STRMM( 'Left', 'Upper',
01381      $                                'Transpose', 'Non-unit', KS,
01382      $                                LCOLS, ONE,
01383      $                                WORK( PDW+NWIN-KS ), NWIN,
01384      $                                WORK(IPW3), NWIN )
01385                                  CALL SGEMM( 'Transpose',
01386      $                                'No transpose', KS, LCOLS,
01387      $                                NWIN-KS, ONE, WORK(PDW), NWIN,
01388      $                                T((JLOC-1)*LLDT+ILOC), LLDT, ONE,
01389      $                                WORK(IPW3), NWIN )
01390 *
01391 *                                Compute U12**T*T1 + U22**T*T2 in
01392 *                                workspace.
01393 *
01394                                  CALL SLAMOV( 'All', NWIN-KS, LCOLS,
01395      $                                T((JLOC-1)*LLDT+ILOC), LLDT,
01396      $                                WORK( IPW3+KS ), NWIN )
01397                                  CALL STRMM( 'Left', 'Lower',
01398      $                                'Transpose', 'Non-unit',
01399      $                                NWIN-KS, LCOLS, ONE,
01400      $                                WORK( PDW+NWIN*KS ), NWIN,
01401      $                                WORK( IPW3+KS ), NWIN )
01402                                  CALL SGEMM( 'Transpose',
01403      $                                'No Transpose', NWIN-KS, LCOLS,
01404      $                                KS, ONE,
01405      $                                WORK( PDW+NWIN*KS+NWIN-KS ),
01406      $                                NWIN, T((JLOC-1)*LLDT+ILOC1),
01407      $                                LLDT, ONE, WORK(IPW3+KS), NWIN )
01408 *
01409 *                                Copy workspace to T.
01410 *
01411                                  CALL SLAMOV( 'All', NWIN, LCOLS,
01412      $                                WORK(IPW3), NWIN,
01413      $                                T((JLOC-1)*LLDT+ILOC), LLDT )
01414                               END IF
01415  260                       CONTINUE
01416                         END IF
01417                      END IF
01418                   END IF
01419                ELSEIF( FLOPS.NE.0 ) THEN
01420 *
01421 *                 Update off-diagonal blocks and Q using the pipelined
01422 *                 elementary transformations.
01423 *
01424                   IF( DIR.EQ.2 ) THEN
01425                      DO 270 INDX = 1, I-1, NB
01426                         CALL INFOG2L( INDX, I, DESCT, NPROW, NPCOL,
01427      $                       MYROW, MYCOL, ILOC, JLOC, RSRC1, CSRC1 )
01428                         IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
01429                            LROWS = MIN(NB,I-INDX)
01430                            CALL BSLAAPP( 1, LROWS, NWIN, NCB,
01431      $                          T((JLOC-1)*LLDT+ILOC ), LLDT, NITRAF,
01432      $                          IWORK(IPIW), WORK( IPW2 ),
01433      $                          WORK(IPW3) )
01434                         END IF
01435  270                 CONTINUE
01436                      IF( WANTQ ) THEN
01437                         DO 280 INDX = 1, N, NB
01438                            CALL INFOG2L( INDX, I, DESCQ, NPROW, NPCOL,
01439      $                          MYROW, MYCOL, ILOC, JLOC, RSRC1, CSRC1 )
01440                            IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 )
01441      $                          THEN
01442                               LROWS = MIN(NB,N-INDX+1)
01443                               CALL BSLAAPP( 1, LROWS, NWIN, NCB,
01444      $                             Q((JLOC-1)*LLDQ+ILOC), LLDQ, NITRAF,
01445      $                             IWORK(IPIW), WORK( IPW2 ),
01446      $                             WORK(IPW3) )
01447                            END IF
01448  280                    CONTINUE
01449                      END IF
01450                   END IF
01451                   IF( DIR.EQ.1 ) THEN
01452                      IF( LIHI.LT.N ) THEN
01453                         IF( MOD(LIHI,NB).GT.0 ) THEN
01454                            INDX = LIHI + 1
01455                            CALL INFOG2L( I, INDX, DESCT, NPROW, NPCOL,
01456      $                          MYROW, MYCOL, ILOC, JLOC, RSRC1, CSRC1 )
01457                            IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 )
01458      $                          THEN
01459                               LCOLS = MOD( MIN( NB-MOD(LIHI,NB),
01460      $                             N-LIHI ), NB )
01461                               CALL BSLAAPP( 0, NWIN, LCOLS, NCB,
01462      $                             T((JLOC-1)*LLDT+ILOC), LLDT, NITRAF,
01463      $                             IWORK(IPIW), WORK( IPW2 ),
01464      $                             WORK(IPW3) )
01465                            END IF
01466                         END IF
01467                         INDXS = ICEIL(LIHI,NB)*NB + 1
01468                         DO 290 INDX = INDXS, N, NB
01469                            CALL INFOG2L( I, INDX, DESCT, NPROW, NPCOL,
01470      $                          MYROW, MYCOL, ILOC, JLOC, RSRC1, CSRC1 )
01471                            IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 )
01472      $                          THEN
01473                               LCOLS = MIN( NB, N-INDX+1 )
01474                               CALL BSLAAPP( 0, NWIN, LCOLS, NCB,
01475      $                             T((JLOC-1)*LLDT+ILOC), LLDT, NITRAF,
01476      $                             IWORK(IPIW), WORK( IPW2 ),
01477      $                             WORK(IPW3) )
01478                            END IF
01479  290                    CONTINUE
01480                      END IF
01481                   END IF
01482                END IF
01483 *
01484 *              If I was not involved in the updates for the current
01485 *              window or the window was fully processed, I go here and
01486 *              try again for the next window.
01487 *
01488  295           CONTINUE
01489 *
01490 *              Update LIHI and LIHI depending on the number of
01491 *              eigenvalues really moved - for on-diagonal processes we
01492 *              do this update only once since each on-diagonal process
01493 *              is only involved with one window at one time. The
01494 *              indicies are updated in three cases:
01495 *                1) When some reordering was really performed
01496 *                   -- indicated by BUFFLEN > 0.
01497 *                2) When no selected eigenvalues was found in the
01498 *                   current window -- indicated by KS = 0.
01499 *                3) When some selected eigenvalues was found in the
01500 *                   current window but no one of them was moved
01501 *                   (KS > 0 and BUFFLEN = 0)
01502 *              False index updating is avoided by sometimes setting
01503 *              KS = -1. This will affect processors involved in more
01504 *              than one window and where the first one ends up with
01505 *              KS = 0 and for the second one is done already.
01506 *
01507                IF( MYROW.EQ.RSRC.AND.MYCOL.EQ.CSRC ) THEN
01508                   IF( DIR.EQ.2 ) THEN
01509                      IF( BUFFLEN.NE.0 .OR. KS.EQ.0 .OR.
01510      $                    ( BUFFLEN.EQ.0 .AND. KS.GT.0 ) )
01511      $                  LIHI = I + KS - 1
01512                      IWORK( ILIHI+WINDOW-1 ) = LIHI
01513                      IF( .NOT. LIHI.GE.LILO+LSEL ) THEN
01514                         LILO = LILO + LSEL
01515                         IWORK( ILILO+WINDOW-1 ) = LILO
01516                      END IF
01517                   END IF
01518                ELSEIF( MYROW.EQ.RSRC .AND. DIR.EQ.1 ) THEN
01519                   IF( BUFFLEN.NE.0 .OR. KS.EQ.0 .OR.
01520      $                 ( BUFFLEN.EQ.0 .AND. KS.GT.0 ) )
01521      $               LIHI = I + KS - 1
01522                   IWORK( ILIHI+WINDOW-1 ) = LIHI
01523                   IF( .NOT. LIHI.GE.LILO+LSEL ) THEN
01524                      LILO = LILO + LSEL
01525                      IWORK( ILILO+WINDOW-1 ) = LILO
01526                   END IF
01527                ELSEIF( MYCOL.EQ.CSRC .AND. DIR.EQ.2 ) THEN
01528                   IF( BUFFLEN.NE.0 .OR. KS.EQ.0 .OR.
01529      $                 ( BUFFLEN.EQ.0 .AND. KS.GT.0 ) )
01530      $               LIHI = I + KS - 1
01531                   IWORK( ILIHI+WINDOW-1 ) = LIHI
01532                   IF( .NOT. LIHI.GE.LILO+LSEL ) THEN
01533                      LILO = LILO + LSEL
01534                      IWORK( ILILO+WINDOW-1 ) = LILO
01535                   END IF
01536                END IF
01537 *
01538  112        CONTINUE
01539 *
01540 *           End of direction loop for updates with respect to local
01541 *           reordering.
01542 *
01543  1111       CONTINUE
01544 *
01545 *           Associate each process with one of the corresponding
01546 *           computational windows such that the test for another round
01547 *           of local reordering is carried out properly. Since the
01548 *           column updates were computed after the row updates, it is
01549 *           sufficient to test for changing the association to the
01550 *           window in the corresponding process row.
01551 *
01552             DO 113 WINDOW = 1, NMWIN2
01553                RSRC = IWORK( IRSRC + WINDOW - 1 )
01554                IF( MYROW.EQ.RSRC .AND. (.NOT. LIHI.GE.LILO+LSEL ) ) THEN
01555                   LILO = IWORK( ILILO + WINDOW - 1 )
01556                   LIHI = IWORK( ILIHI + WINDOW - 1 )
01557                   LSEL = IWORK( ILSEL + WINDOW - 1 )
01558                END IF
01559  113        CONTINUE
01560 *
01561 *           End While ( LIHI >= LILO + LSEL )
01562             ROUND = ROUND + 1
01563             IF( FIRST ) FIRST = .FALSE.
01564             GO TO 130
01565          END IF
01566 *
01567 *        All processors excluded from the local reordering go here.
01568 *
01569  114     CONTINUE
01570 *
01571 *        Barrier to collect the processes before proceeding.
01572 *
01573          CALL BLACS_BARRIER( ICTXT, 'All' )
01574 *
01575 *        Compute global maximum of IERR so that we know if some process
01576 *        experienced a failure in the reordering.
01577 *
01578          MYIERR = IERR
01579          IF( NPROCS.GT.1 )
01580      $      CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, IERR, 1, -1,
01581      $           -1, -1, -1, -1 )
01582 *
01583          IF( IERR.NE.0 ) THEN
01584 *
01585 *           When calling BDTREXC, the block at position I+KKS-1 failed
01586 *           to swap.
01587 *
01588             IF( MYIERR.NE.0 ) INFO = MAX(1,I+KKS-1)
01589             IF( NPROCS.GT.1 )
01590      $         CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, -1,
01591      $              -1, -1, -1, -1 )
01592             GO TO 300
01593          END IF
01594 *
01595 *        Now, for each compuational window, move the selected
01596 *        eigenvalues across the process border. Do this by forming the
01597 *        processors into groups of four working together to bring the
01598 *        window over the border. The processes are numbered as follows
01599 *
01600 *                1 | 2
01601 *                --+--
01602 *                3 | 4
01603 *
01604 *        where '|' and '-' denotes the process (and block) borders.
01605 *        This implies that the cluster to be reordered over the border
01606 *        is held by process 4, process 1 will receive the cluster after
01607 *        the reordering, process 3 holds the local (2,1)th element of a
01608 *        2-by-2 diagonal block located on the block border and process 2
01609 *        holds the closest off-diagonal part of the window that is
01610 *        affected by the cross-border reordering.
01611 *
01612 *        The active window is now ( I : LIHI[4], I : LIHI[4] ), where
01613 *        I = MAX( ILO, LIHI - 2*MOD(LIHI,NB) ). If this active window is
01614 *        too large compared to the value of PARA( 6 ), it will be
01615 *        truncated in both ends such that a maximum of PARA( 6 )
01616 *        eigenvalues is reordered across the border this time.
01617 *
01618 *        The active window will be collected and built in workspace at
01619 *        process 1 and 4, which both compute the reordering and return
01620 *        the updated parts to the corresponding processes 2-3. Next, the
01621 *        accumulated transformations are broadcasted for updates in the
01622 *        block rows and column that corresponds to the process rows and
01623 *        columns where process 1 and 4 reside.
01624 *
01625 *        The off-diagonal blocks are updated by the processes receiving
01626 *        from the broadcasts of the orthogonal transformations. Since
01627 *        the active window is split over the process borders, the
01628 *        updates of T and Q requires that stripes of block rows of
01629 *        columns are exchanged between neighboring processes in the
01630 *        corresponding process rows and columns.
01631 *
01632 *        First, form each group of processors involved in the
01633 *        crossborder reordering. Do this in two (or three) phases:
01634 *        1) Reorder each odd window over the border.
01635 *        2) Reorder each even window over the border.
01636 *        3) Reorder the last odd window over the border, if it was not
01637 *           processed in the first phase.
01638 *
01639 *        When reordering the odd windows over the border, we must make
01640 *        sure that no process row or column is involved in both the
01641 *        first and the last window at the same time. This happens when
01642 *        the total number of windows is odd, greater than one and equal
01643 *        to the minumum process mesh dimension. Therefore the last odd
01644 *        window may be reordered over the border at last.
01645 *
01646          LASTWAIT = NMWIN2.GT.1 .AND. MOD(NMWIN2,2).EQ.1 .AND.
01647      $        NMWIN2.EQ.MIN(NPROW,NPCOL)
01648 *
01649          LAST = 0
01650  308     CONTINUE
01651          IF( LASTWAIT ) THEN
01652             IF( LAST.EQ.0 ) THEN
01653                WIN0S = 1
01654                WIN0E = 2
01655                WINE = NMWIN2 - 1
01656             ELSE
01657                WIN0S = NMWIN2
01658                WIN0E = NMWIN2
01659                WINE = NMWIN2
01660             END IF
01661          ELSE
01662             WIN0S = 1
01663             WIN0E = 2
01664             WINE = NMWIN2
01665          END IF
01666          DO 310 WINDOW0 = WIN0S, WIN0E
01667             DO 320 WINDOW = WINDOW0, WINE, 2
01668 *
01669 *              Define the process holding the down-right part of the
01670 *              window.
01671 *
01672                RSRC4 = IWORK(IRSRC+WINDOW-1)
01673                CSRC4 = IWORK(ICSRC+WINDOW-1)
01674 *
01675 *              Define the other processes in the group of four.
01676 *
01677                RSRC3 = RSRC4
01678                CSRC3 = MOD( CSRC4 - 1 + NPCOL, NPCOL )
01679                RSRC2 = MOD( RSRC4 - 1 + NPROW, NPROW )
01680                CSRC2 = CSRC4
01681                RSRC1 = RSRC2
01682                CSRC1 = CSRC3
01683                IF( ( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) .OR.
01684      $             ( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) .OR.
01685      $             ( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) .OR.
01686      $             ( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) ) THEN
01687 *
01688 *                 Compute the correct active window - for reordering
01689 *                 into a block that has not been active at all before,
01690 *                 we try to reorder as many of our eigenvalues over the
01691 *                 border as possible without knowing of the situation on
01692 *                 the other side - this may cause very few eigenvalues
01693 *                 to be reordered over the border this time (perhaps not
01694 *                 any) but this should be an initial problem.  Anyway,
01695 *                 the bottom-right position of the block will be at
01696 *                 position LIHIC.
01697 *
01698                   IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
01699                      LIHI4 = ( IWORK( ILILO + WINDOW - 1 ) +
01700      $                    IWORK( ILIHI + WINDOW - 1 ) ) / 2
01701                      LIHIC = MIN(LIHI4,(ICEIL(LIHI4,NB)-1)*NB+WNEICR)
01702 *
01703 *                    Fix LIHIC to avoid that bottom of window cuts
01704 *                    2-by-2 block and make sure all processors in the
01705 *                    group knows about the correct value.
01706 *
01707                      IF( (.NOT. LIHIC.LE.NB) .AND. LIHIC.LT.N ) THEN
01708                         ILOC = INDXG2L( LIHIC+1, NB, MYROW,
01709      $                       DESCT( RSRC_ ), NPROW )
01710                         JLOC = INDXG2L( LIHIC, NB, MYCOL,
01711      $                       DESCT( CSRC_ ), NPCOL )
01712                         IF( T( (JLOC-1)*LLDT+ILOC ).NE.ZERO ) THEN
01713                            IF( MOD( LIHIC, NB ).EQ.1 .OR.
01714      $                          ( MOD( LIHIC, NB ).EQ.2 .AND.
01715      $                          SELECT(LIHIC-2).EQ.0 ) )
01716      $                          THEN
01717                               LIHIC = LIHIC + 1
01718                            ELSE
01719                               LIHIC = LIHIC - 1
01720                            END IF
01721                         END IF
01722                      END IF
01723                      IF( RSRC4.NE.RSRC1 .OR. CSRC4.NE.CSRC1 )
01724      $                  CALL IGESD2D( ICTXT, 1, 1, LIHIC, 1, RSRC1,
01725      $                       CSRC1 )
01726                      IF( RSRC4.NE.RSRC2 .OR. CSRC4.NE.CSRC2 )
01727      $                  CALL IGESD2D( ICTXT, 1, 1, LIHIC, 1, RSRC2,
01728      $                       CSRC2 )
01729                      IF( RSRC4.NE.RSRC3 .OR. CSRC4.NE.CSRC3 )
01730      $                  CALL IGESD2D( ICTXT, 1, 1, LIHIC, 1, RSRC3,
01731      $                       CSRC3 )
01732                   END IF
01733                   IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
01734                      IF( RSRC4.NE.RSRC1 .OR. CSRC4.NE.CSRC1 )
01735      $                  CALL IGERV2D( ICTXT, 1, 1, LIHIC, 1, RSRC4,
01736      $                       CSRC4 )
01737                   END IF
01738                   IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
01739                      IF( RSRC4.NE.RSRC2 .OR. CSRC4.NE.CSRC2 )
01740      $                  CALL IGERV2D( ICTXT, 1, 1, LIHIC, 1, RSRC4,
01741      $                       CSRC4 )
01742                   END IF
01743                   IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
01744                      IF( RSRC4.NE.RSRC3 .OR. CSRC4.NE.CSRC3 )
01745      $                  CALL IGERV2D( ICTXT, 1, 1, LIHIC, 1, RSRC4,
01746      $                       CSRC4 )
01747                   END IF
01748 *
01749 *                 Avoid going over the border with the first window if
01750 *                 it resides in the block where the last global position
01751 *                 T(ILO,ILO) is or ILO has been updated to point to a
01752 *                 position right of T(LIHIC,LIHIC).
01753 *
01754                   SKIP1CR = WINDOW.EQ.1 .AND.
01755      $                 ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
01756 *
01757 *                 Decide I, where to put top of window, such that top of
01758 *                 window does not cut 2-by-2 block. Make sure that we do
01759 *                 not end up in a situation where a 2-by-2 block
01760 *                 splitted on the border is left in its original place
01761 *                 -- this can cause infinite loops.
01762 *                 Remedy: make sure that the part of the window that
01763 *                 resides left to the border is at least of dimension
01764 *                 two (2) in case we have 2-by-2 blocks in top of the
01765 *                 cross border window.
01766 *
01767 *                 Also make sure all processors in the group knows about
01768 *                 the correct value of I. When skipping the crossborder
01769 *                 reordering, just set I = LIHIC.
01770 *
01771                   IF( .NOT. SKIP1CR ) THEN
01772                      IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
01773                         IF( WINDOW.EQ.1 ) THEN
01774                            LIHI1 = ILO
01775                         ELSE
01776                            LIHI1 = IWORK( ILIHI + WINDOW - 2 )
01777                         END IF
01778                         I = MAX( LIHI1,
01779      $                       MIN( LIHIC-2*MOD(LIHIC,NB) + 1,
01780      $                       (ICEIL(LIHIC,NB)-1)*NB - 1  ) )
01781                         ILOC = INDXG2L( I, NB, MYROW, DESCT( RSRC_ ),
01782      $                       NPROW )
01783                         JLOC = INDXG2L( I-1, NB, MYCOL, DESCT( CSRC_ ),
01784      $                       NPCOL )
01785                         IF( T( (JLOC-1)*LLDT+ILOC ).NE.ZERO )
01786      $                     I = I - 1
01787                         IF( RSRC1.NE.RSRC4 .OR. CSRC1.NE.CSRC4 )
01788      $                     CALL IGESD2D( ICTXT, 1, 1, I, 1, RSRC4,
01789      $                          CSRC4 )
01790                         IF( RSRC1.NE.RSRC2 .OR. CSRC1.NE.CSRC2 )
01791      $                     CALL IGESD2D( ICTXT, 1, 1, I, 1, RSRC2,
01792      $                          CSRC2 )
01793                         IF( RSRC1.NE.RSRC3 .OR. CSRC1.NE.CSRC3 )
01794      $                     CALL IGESD2D( ICTXT, 1, 1, I, 1, RSRC3,
01795      $                          CSRC3 )
01796                      END IF
01797                      IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
01798                         IF( RSRC1.NE.RSRC2 .OR. CSRC1.NE.CSRC2 )
01799      $                     CALL IGERV2D( ICTXT, 1, 1, I, 1, RSRC1,
01800      $                          CSRC1 )
01801                      END IF
01802                      IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
01803                         IF( RSRC1.NE.RSRC3 .OR. CSRC1.NE.CSRC3 )
01804      $                     CALL IGERV2D( ICTXT, 1, 1, I, 1, RSRC1,
01805      $                          CSRC1 )
01806                      END IF
01807                      IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
01808                         IF( RSRC1.NE.RSRC4 .OR. CSRC1.NE.CSRC4 )
01809      $                     CALL IGERV2D( ICTXT, 1, 1, I, 1, RSRC1,
01810      $                          CSRC1 )
01811                      END IF
01812                   ELSE
01813                      I = LIHIC
01814                   END IF
01815 *
01816 *                 Finalize computation of window size: active window is
01817 *                 now (I:LIHIC,I:LIHIC).
01818 *
01819                   NWIN = LIHIC - I + 1
01820                   KS = 0
01821 *
01822 *                 Skip rest of this part if appropriate.
01823 *
01824                   IF( SKIP1CR ) GO TO 360
01825 *
01826 *                 Divide workspace -- put active window in
01827 *                 WORK(IPW2:IPW2+NWIN**2-1) and orthogonal
01828 *                 transformations in WORK(IPW3:...).
01829 *
01830                   CALL SLASET( 'All', NWIN, NWIN, ZERO, ZERO,
01831      $                 WORK( IPW2 ), NWIN )
01832 *
01833                   PITRAF = IPIW
01834                   IPW3 = IPW2 + NWIN*NWIN
01835                   PDTRAF = IPW3
01836 *
01837 *                 Exchange the current view of SELECT for the active
01838 *                 window between process 1 and 4 to make sure that
01839 *                 exactly the same job is performed for both processes.
01840 *
01841                   IF( RSRC1.NE.RSRC4 .OR. CSRC1.NE.CSRC4 ) THEN
01842                      ILEN4 = MOD(LIHIC,NB)
01843                      SELI4 = ICEIL(I,NB)*NB+1
01844                      ILEN1 = NWIN - ILEN4
01845                      IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
01846                         CALL IGESD2D( ICTXT, ILEN1, 1, SELECT(I),
01847      $                       ILEN1, RSRC4, CSRC4 )
01848                         CALL IGERV2D( ICTXT, ILEN4, 1, SELECT(SELI4),
01849      $                       ILEN4, RSRC4, CSRC4 )
01850                      END IF
01851                      IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
01852                         CALL IGESD2D( ICTXT, ILEN4, 1, SELECT(SELI4),
01853      $                       ILEN4, RSRC1, CSRC1 )
01854                         CALL IGERV2D( ICTXT, ILEN1, 1, SELECT(I),
01855      $                       ILEN1, RSRC1, CSRC1 )
01856                      END IF
01857                   END IF
01858 *
01859 *                 Form the active window by a series of point-to-point
01860 *                 sends and receives.
01861 *
01862                   DIM1 = NB - MOD(I-1,NB)
01863                   DIM4 = NWIN - DIM1
01864                   IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
01865                      ILOC = INDXG2L( I, NB, MYROW, DESCT( RSRC_ ),
01866      $                    NPROW )
01867                      JLOC = INDXG2L( I, NB, MYCOL, DESCT( CSRC_ ),
01868      $                    NPCOL )
01869                      CALL SLAMOV( 'All', DIM1, DIM1,
01870      $                    T((JLOC-1)*LLDT+ILOC), LLDT, WORK(IPW2),
01871      $                    NWIN )
01872                      IF( RSRC1.NE.RSRC4 .OR. CSRC1.NE.CSRC4 ) THEN
01873                         CALL SGESD2D( ICTXT, DIM1, DIM1,
01874      $                       WORK(IPW2), NWIN, RSRC4, CSRC4 )
01875                         CALL SGERV2D( ICTXT, DIM4, DIM4,
01876      $                       WORK(IPW2+DIM1*NWIN+DIM1), NWIN, RSRC4,
01877      $                       CSRC4 )
01878                      END IF
01879                   END IF
01880                   IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
01881                      ILOC = INDXG2L( I+DIM1, NB, MYROW, DESCT( RSRC_ ),
01882      $                    NPROW )
01883                      JLOC = INDXG2L( I+DIM1, NB, MYCOL, DESCT( CSRC_ ),
01884      $                    NPCOL )
01885                      CALL SLAMOV( 'All', DIM4, DIM4,
01886      $                    T((JLOC-1)*LLDT+ILOC), LLDT,
01887      $                    WORK(IPW2+DIM1*NWIN+DIM1), NWIN )
01888                      IF( RSRC4.NE.RSRC1 .OR. CSRC4.NE.CSRC1 ) THEN
01889                         CALL SGESD2D( ICTXT, DIM4, DIM4,
01890      $                       WORK(IPW2+DIM1*NWIN+DIM1), NWIN, RSRC1,
01891      $                       CSRC1 )
01892                         CALL SGERV2D( ICTXT, DIM1, DIM1,
01893      $                       WORK(IPW2), NWIN, RSRC1, CSRC1 )
01894                      END IF
01895                   END IF
01896                   IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
01897                      ILOC = INDXG2L( I, NB, MYROW, DESCT( RSRC_ ),
01898      $                    NPROW )
01899                      JLOC = INDXG2L( I+DIM1, NB, MYCOL, DESCT( CSRC_ ),
01900      $                    NPCOL )
01901                      CALL SLAMOV( 'All', DIM1, DIM4,
01902      $                    T((JLOC-1)*LLDT+ILOC), LLDT,
01903      $                    WORK(IPW2+DIM1*NWIN), NWIN )
01904                      IF( RSRC2.NE.RSRC1 .OR. CSRC2.NE.CSRC1 ) THEN
01905                         CALL SGESD2D( ICTXT, DIM1, DIM4,
01906      $                       WORK(IPW2+DIM1*NWIN), NWIN, RSRC1, CSRC1 )
01907                      END IF
01908                   END IF
01909                   IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
01910                      IF( RSRC2.NE.RSRC4 .OR. CSRC2.NE.CSRC4 ) THEN
01911                         CALL SGESD2D( ICTXT, DIM1, DIM4,
01912      $                       WORK(IPW2+DIM1*NWIN), NWIN, RSRC4, CSRC4 )
01913                      END IF
01914                   END IF
01915                   IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
01916                      ILOC = INDXG2L( I+DIM1, NB, MYROW, DESCT( RSRC_ ),
01917      $                    NPROW )
01918                      JLOC = INDXG2L( I+DIM1-1, NB, MYCOL,
01919      $                    DESCT( CSRC_ ), NPCOL )
01920                      CALL SLAMOV( 'All', 1, 1,
01921      $                    T((JLOC-1)*LLDT+ILOC), LLDT,
01922      $                    WORK(IPW2+(DIM1-1)*NWIN+DIM1), NWIN )
01923                      IF( RSRC3.NE.RSRC1 .OR. CSRC3.NE.CSRC1 ) THEN
01924                         CALL SGESD2D( ICTXT, 1, 1,
01925      $                       WORK(IPW2+(DIM1-1)*NWIN+DIM1), NWIN,
01926      $                       RSRC1, CSRC1 )
01927                      END IF
01928                   END IF
01929                   IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
01930                      IF( RSRC3.NE.RSRC4 .OR. CSRC3.NE.CSRC4 ) THEN
01931                         CALL SGESD2D( ICTXT, 1, 1,
01932      $                       WORK(IPW2+(DIM1-1)*NWIN+DIM1), NWIN,
01933      $                       RSRC4, CSRC4 )
01934                      END IF
01935                   END IF
01936                   IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
01937                      IF( RSRC1.NE.RSRC2 .OR. CSRC1.NE.CSRC2 ) THEN
01938                         CALL SGERV2D( ICTXT, DIM1, DIM4,
01939      $                       WORK(IPW2+DIM1*NWIN), NWIN, RSRC2,
01940      $                       CSRC2 )
01941                      END IF
01942                      IF( RSRC1.NE.RSRC3 .OR. CSRC1.NE.CSRC3 ) THEN
01943                         CALL SGERV2D( ICTXT, 1, 1,
01944      $                       WORK(IPW2+(DIM1-1)*NWIN+DIM1), NWIN,
01945      $                       RSRC3, CSRC3 )
01946                      END IF
01947                   END IF
01948                   IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
01949                      IF( RSRC4.NE.RSRC2 .OR. CSRC4.NE.CSRC2 ) THEN
01950                         CALL SGERV2D( ICTXT, DIM1, DIM4,
01951      $                       WORK(IPW2+DIM1*NWIN), NWIN, RSRC2,
01952      $                       CSRC2 )
01953                      END IF
01954                      IF( RSRC4.NE.RSRC3 .OR. CSRC4.NE.CSRC3 ) THEN
01955                         CALL SGERV2D( ICTXT, 1, 1,
01956      $                       WORK(IPW2+(DIM1-1)*NWIN+DIM1), NWIN,
01957      $                       RSRC3, CSRC3 )
01958                      END IF
01959                   END IF
01960 *
01961 *                 Compute the reordering (just as in the total local
01962 *                 case) and accumulate the transformations (ONLY
01963 *                 ON-DIAGONAL PROCESSES).
01964 *
01965                   IF( ( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) .OR.
01966      $                ( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) ) THEN
01967                      PAIR = .FALSE.
01968                      DO 330 K = I, LIHIC
01969                         IF( PAIR ) THEN
01970                            PAIR = .FALSE.
01971                         ELSE
01972                            SWAP = SELECT( K ).NE.0
01973                            IF( K.LT.LIHIC ) THEN
01974                               ELEM = WORK(IPW2+(K-I)*NWIN+K-I+1)
01975                               IF( ELEM.NE.ZERO )
01976      $                           PAIR = .TRUE.
01977                            END IF
01978                            IF( SWAP ) THEN
01979                               KS = KS + 1
01980 *
01981 *                             Swap the K-th block to position I+KS-1.
01982 *
01983                               IERR = 0
01984                               KK  = K - I + 1
01985                               KKS = KS
01986                               IF( KK.NE.KS ) THEN
01987                                  NITRAF = LIWORK - PITRAF + 1
01988                                  NDTRAF = LWORK - PDTRAF + 1
01989                                  CALL BSTREXC( NWIN, WORK(IPW2), NWIN,
01990      $                                KK, KKS, NITRAF, IWORK( PITRAF ),
01991      $                                NDTRAF, WORK( PDTRAF ),
01992      $                                WORK(IPW1), IERR )
01993                                  PITRAF = PITRAF + NITRAF
01994                                  PDTRAF = PDTRAF + NDTRAF
01995 *
01996 *                                Update array SELECT.
01997 *
01998                                  IF ( PAIR ) THEN
01999                                     DO 340 J = I+KK-1, I+KKS, -1
02000                                        SELECT(J+1) = SELECT(J-1)
02001  340                                CONTINUE
02002                                     SELECT(I+KKS-1) = 1
02003                                     SELECT(I+KKS) = 1
02004                                  ELSE
02005                                     DO 350 J = I+KK-1, I+KKS, -1
02006                                        SELECT(J) = SELECT(J-1)
02007  350                                CONTINUE
02008                                     SELECT(I+KKS-1) = 1
02009                                  END IF
02010 *
02011                                  IF ( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN
02012 *
02013                                     IF ( IERR.EQ.2 ) THEN
02014                                        SELECT( I+KKS-3 ) = 1
02015                                        SELECT( I+KKS-1 ) = 0
02016                                        KKS = KKS + 1
02017                                     END IF
02018 *
02019                                     GO TO 360
02020                                  END IF
02021                                  KS = KKS
02022                               END IF
02023                               IF( PAIR )
02024      $                           KS = KS + 1
02025                            END IF
02026                         END IF
02027  330                 CONTINUE
02028                   END IF
02029  360              CONTINUE
02030 *
02031 *                 Save information about the reordering.
02032 *
02033                   IF( ( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) .OR.
02034      $                 ( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) ) THEN
02035                      IBUFF( 1 ) = I
02036                      IBUFF( 2 ) = NWIN
02037                      IBUFF( 3 ) = PITRAF
02038                      IBUFF( 4 ) = KS
02039                      IBUFF( 5 ) = PDTRAF
02040                      IBUFF( 6 ) = NDTRAF
02041                      ILEN = PITRAF - IPIW + 1
02042                      DLEN = PDTRAF - IPW3 + 1
02043                      IBUFF( 7 ) = ILEN
02044                      IBUFF( 8 ) = DLEN
02045 *
02046 *                    Put reordered data back into global matrix if a
02047 *                    reordering took place.
02048 *
02049                      IF( .NOT. SKIP1CR ) THEN
02050                         IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
02051                            ILOC = INDXG2L( I, NB, MYROW, DESCT( RSRC_ ),
02052      $                          NPROW )
02053                            JLOC = INDXG2L( I, NB, MYCOL, DESCT( CSRC_ ),
02054      $                          NPCOL )
02055                            CALL SLAMOV( 'All', DIM1, DIM1, WORK(IPW2),
02056      $                          NWIN, T((JLOC-1)*LLDT+ILOC), LLDT )
02057                         END IF
02058                         IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
02059                            ILOC = INDXG2L( I+DIM1, NB, MYROW,
02060      $                          DESCT( RSRC_ ), NPROW )
02061                            JLOC = INDXG2L( I+DIM1, NB, MYCOL,
02062      $                          DESCT( CSRC_ ), NPCOL )
02063                            CALL SLAMOV( 'All', DIM4, DIM4,
02064      $                          WORK(IPW2+DIM1*NWIN+DIM1), NWIN,
02065      $                          T((JLOC-1)*LLDT+ILOC), LLDT )
02066                         END IF
02067                      END IF
02068                   END IF
02069 *
02070 *                 Break if appropriate -- IBUFF(3:8) may now contain
02071 *                 nonsens, but that's no problem. The processors outside
02072 *                 the cross border group only needs to know about I and
02073 *                 NWIN to get a correct value of SKIP1CR (see below) and
02074 *                 to skip the cross border updates if necessary.
02075 *
02076                   IF( WINDOW.EQ.1 .AND. SKIP1CR ) GO TO 325
02077 *
02078 *                 Return reordered data to process 2 and 3.
02079 *
02080                   IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
02081                      IF( RSRC1.NE.RSRC3 .OR. CSRC1.NE.CSRC3 ) THEN
02082                         CALL SGESD2D( ICTXT, 1, 1,
02083      $                       WORK( IPW2+(DIM1-1)*NWIN+DIM1 ), NWIN,
02084      $                       RSRC3, CSRC3 )
02085                      END IF
02086                   END IF
02087                   IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
02088                      IF( RSRC4.NE.RSRC2 .OR. CSRC4.NE.CSRC2 ) THEN
02089                         CALL SGESD2D( ICTXT, DIM1, DIM4,
02090      $                       WORK( IPW2+DIM1*NWIN), NWIN, RSRC2,
02091      $                       CSRC2 )
02092                      END IF
02093                   END IF
02094                   IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
02095                      ILOC = INDXG2L( I, NB, MYROW, DESCT( RSRC_ ),
02096      $                    NPROW )
02097                      JLOC = INDXG2L( I+DIM1, NB, MYCOL,
02098      $                    DESCT( CSRC_ ), NPCOL )
02099                      IF( RSRC2.NE.RSRC4 .OR. CSRC2.NE.CSRC4 ) THEN
02100                         CALL SGERV2D( ICTXT, DIM1, DIM4,
02101      $                       WORK(IPW2+DIM1*NWIN), NWIN, RSRC4, CSRC4 )
02102                      END IF
02103                      CALL SLAMOV( 'All', DIM1, DIM4,
02104      $                    WORK( IPW2+DIM1*NWIN ), NWIN,
02105      $                    T((JLOC-1)*LLDT+ILOC), LLDT )
02106                   END IF
02107                   IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
02108                      ILOC = INDXG2L( I+DIM1, NB, MYROW,
02109      $                    DESCT( RSRC_ ), NPROW )
02110                      JLOC = INDXG2L( I+DIM1-1, NB, MYCOL,
02111      $                    DESCT( CSRC_ ), NPCOL )
02112                      IF( RSRC3.NE.RSRC1 .OR. CSRC3.NE.CSRC1 ) THEN
02113                         CALL SGERV2D( ICTXT, 1, 1,
02114      $                       WORK( IPW2+(DIM1-1)*NWIN+DIM1 ), NWIN,
02115      $                       RSRC1, CSRC1 )
02116                      END IF
02117                      T((JLOC-1)*LLDT+ILOC) =
02118      $                    WORK( IPW2+(DIM1-1)*NWIN+DIM1 )
02119                   END IF
02120                END IF
02121 *
02122  325           CONTINUE
02123 *
02124  320        CONTINUE
02125 *
02126 *           For the crossborder updates, we use the same directions as
02127 *           in the local reordering case above.
02128 *
02129             DO 2222 DIR = 1, 2
02130 *
02131 *              Broadcast information about the reordering.
02132 *
02133                DO 321 WINDOW = WINDOW0, WINE, 2
02134                   RSRC4 = IWORK(IRSRC+WINDOW-1)
02135                   CSRC4 = IWORK(ICSRC+WINDOW-1)
02136                   RSRC1 = MOD( RSRC4 - 1 + NPROW, NPROW )
02137                   CSRC1 = MOD( CSRC4 - 1 + NPCOL, NPCOL )
02138                   IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
02139                      IF( NPCOL.GT.1 .AND. DIR.EQ.1 )
02140      $                  CALL IGEBS2D( ICTXT, 'Row', TOP, 8, 1,
02141      $                       IBUFF, 8 )
02142                      IF( NPROW.GT.1 .AND. DIR.EQ.2 )
02143      $                  CALL IGEBS2D( ICTXT, 'Col', TOP, 8, 1,
02144      $                       IBUFF, 8 )
02145                      SKIP1CR = WINDOW.EQ.1 .AND.
02146      $                    ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02147                   ELSEIF( MYROW.EQ.RSRC1 .OR. MYCOL.EQ.CSRC1 ) THEN
02148                      IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND.
02149      $                    MYROW.EQ.RSRC1 ) THEN
02150                         CALL IGEBR2D( ICTXT, 'Row', TOP, 8, 1,
02151      $                       IBUFF, 8, RSRC1, CSRC1 )
02152                         I = IBUFF( 1 )
02153                         NWIN = IBUFF( 2 )
02154                         PITRAF = IBUFF( 3 )
02155                         KS = IBUFF( 4 )
02156                         PDTRAF = IBUFF( 5 )
02157                         NDTRAF = IBUFF( 6 )
02158                         ILEN = IBUFF( 7 )
02159                         DLEN = IBUFF( 8 )
02160                         BUFFLEN = ILEN + DLEN
02161                         IPW3 = IPW2 + NWIN*NWIN
02162                         DIM1 = NB - MOD(I-1,NB)
02163                         DIM4 = NWIN - DIM1
02164                         LIHIC = NWIN + I - 1
02165                         SKIP1CR = WINDOW.EQ.1 .AND.
02166      $                       ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02167                      END IF
02168                      IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND.
02169      $                    MYCOL.EQ.CSRC1 ) THEN
02170                         CALL IGEBR2D( ICTXT, 'Col', TOP, 8, 1,
02171      $                       IBUFF, 8, RSRC1, CSRC1 )
02172                         I = IBUFF( 1 )
02173                         NWIN = IBUFF( 2 )
02174                         PITRAF = IBUFF( 3 )
02175                         KS = IBUFF( 4 )
02176                         PDTRAF = IBUFF( 5 )
02177                         NDTRAF = IBUFF( 6 )
02178                         ILEN = IBUFF( 7 )
02179                         DLEN = IBUFF( 8 )
02180                         BUFFLEN = ILEN + DLEN
02181                         IPW3 = IPW2 + NWIN*NWIN
02182                         DIM1 = NB - MOD(I-1,NB)
02183                         DIM4 = NWIN - DIM1
02184                         LIHIC = NWIN + I - 1
02185                         SKIP1CR = WINDOW.EQ.1 .AND.
02186      $                       ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02187                      END IF
02188                   END IF
02189                   IF( RSRC1.NE.RSRC4 ) THEN
02190                      IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
02191                         IF( NPCOL.GT.1 .AND. DIR.EQ.1 )
02192      $                     CALL IGEBS2D( ICTXT, 'Row', TOP, 8, 1,
02193      $                          IBUFF, 8 )
02194                         SKIP1CR = WINDOW.EQ.1 .AND.
02195      $                       ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02196                      ELSEIF( MYROW.EQ.RSRC4 ) THEN
02197                         IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN
02198                            CALL IGEBR2D( ICTXT, 'Row', TOP, 8, 1,
02199      $                          IBUFF, 8, RSRC4, CSRC4 )
02200                            I = IBUFF( 1 )
02201                            NWIN = IBUFF( 2 )
02202                            PITRAF = IBUFF( 3 )
02203                            KS = IBUFF( 4 )
02204                            PDTRAF = IBUFF( 5 )
02205                            NDTRAF = IBUFF( 6 )
02206                            ILEN = IBUFF( 7 )
02207                            DLEN = IBUFF( 8 )
02208                            BUFFLEN = ILEN + DLEN
02209                            IPW3 = IPW2 + NWIN*NWIN
02210                            DIM1 = NB - MOD(I-1,NB)
02211                            DIM4 = NWIN - DIM1
02212                            LIHIC = NWIN + I - 1
02213                            SKIP1CR = WINDOW.EQ.1 .AND.
02214      $                          ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02215                         END IF
02216                      END IF
02217                   END IF
02218                   IF( CSRC1.NE.CSRC4 ) THEN
02219                      IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
02220                         IF( NPROW.GT.1 .AND. DIR.EQ.2 )
02221      $                     CALL IGEBS2D( ICTXT, 'Col', TOP, 8, 1,
02222      $                          IBUFF, 8 )
02223                         SKIP1CR = WINDOW.EQ.1 .AND.
02224      $                       ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02225                      ELSEIF( MYCOL.EQ.CSRC4 ) THEN
02226                         IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN
02227                            CALL IGEBR2D( ICTXT, 'Col', TOP, 8, 1,
02228      $                          IBUFF, 8, RSRC4, CSRC4 )
02229                            I = IBUFF( 1 )
02230                            NWIN = IBUFF( 2 )
02231                            PITRAF = IBUFF( 3 )
02232                            KS = IBUFF( 4 )
02233                            PDTRAF = IBUFF( 5 )
02234                            NDTRAF = IBUFF( 6 )
02235                            ILEN = IBUFF( 7 )
02236                            DLEN = IBUFF( 8 )
02237                            BUFFLEN = ILEN + DLEN
02238                            IPW3 = IPW2 + NWIN*NWIN
02239                            DIM1 = NB - MOD(I-1,NB)
02240                            DIM4 = NWIN - DIM1
02241                            LIHIC = NWIN + I - 1
02242                            SKIP1CR = WINDOW.EQ.1 .AND.
02243      $                          ICEIL(LIHIC,NB).LE.ICEIL(ILO,NB)
02244                         END IF
02245                      END IF
02246                   END IF
02247 *
02248 *                 Skip rest of broadcasts and updates if appropriate.
02249 *
02250                   IF( SKIP1CR ) GO TO 326
02251 *
02252 *                 Broadcast the orthogonal transformations.
02253 *
02254                   IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
02255                      BUFFER = PDTRAF
02256                      BUFFLEN = DLEN + ILEN
02257                      IF( (NPROW.GT.1 .AND. DIR.EQ.2) .OR.
02258      $                   (NPCOL.GT.1 .AND. DIR.EQ.1) ) THEN
02259                         DO 370 INDX = 1, ILEN
02260                            WORK( BUFFER+INDX-1 ) =
02261      $                          FLOAT( IWORK(IPIW+INDX-1) )
02262  370                    CONTINUE
02263                         CALL SLAMOV( 'All', DLEN, 1, WORK( IPW3 ),
02264      $                       DLEN, WORK(BUFFER+ILEN), DLEN )
02265                      END IF
02266                      IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN
02267                         CALL SGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, 1,
02268      $                       WORK(BUFFER), BUFFLEN )
02269                      END IF
02270                      IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN
02271                         CALL SGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, 1,
02272      $                       WORK(BUFFER), BUFFLEN )
02273                      END IF
02274                   ELSEIF( MYROW.EQ.RSRC1 .OR. MYCOL.EQ.CSRC1 ) THEN
02275                      IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND.
02276      $                    MYROW.EQ.RSRC1 ) THEN
02277                         BUFFER = PDTRAF
02278                         BUFFLEN = DLEN + ILEN
02279                         CALL SGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1,
02280      $                       WORK(BUFFER), BUFFLEN, RSRC1, CSRC1 )
02281                      END IF
02282                      IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND.
02283      $                    MYCOL.EQ.CSRC1 ) THEN
02284                         BUFFER = PDTRAF
02285                         BUFFLEN = DLEN + ILEN
02286                         CALL SGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1,
02287      $                       WORK(BUFFER), BUFFLEN, RSRC1, CSRC1 )
02288                      END IF
02289                      IF( (NPCOL.GT.1.AND.DIR.EQ.1.AND.MYROW.EQ.RSRC1)
02290      $                    .OR. (NPROW.GT.1.AND.DIR.EQ.2.AND.
02291      $                    MYCOL.EQ.CSRC1) ) THEN
02292                         DO 380 INDX = 1, ILEN
02293                            IWORK(IPIW+INDX-1) =
02294      $                          INT( WORK( BUFFER+INDX-1 ) )
02295  380                    CONTINUE
02296                         CALL SLAMOV( 'All', DLEN, 1,
02297      $                       WORK( BUFFER+ILEN ), DLEN,
02298      $                       WORK( IPW3 ), DLEN )
02299                      END IF
02300                   END IF
02301                   IF( RSRC1.NE.RSRC4 ) THEN
02302                      IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
02303                         BUFFER = PDTRAF
02304                         BUFFLEN = DLEN + ILEN
02305                         IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN
02306                            DO 390 INDX = 1, ILEN
02307                               WORK( BUFFER+INDX-1 ) =
02308      $                             FLOAT( IWORK(IPIW+INDX-1) )
02309  390                       CONTINUE
02310                            CALL SLAMOV( 'All', DLEN, 1, WORK( IPW3 ),
02311      $                          DLEN, WORK(BUFFER+ILEN), DLEN )
02312                            CALL SGEBS2D( ICTXT, 'Row', TOP, BUFFLEN,
02313      $                          1, WORK(BUFFER), BUFFLEN )
02314                         END IF
02315                      ELSEIF( MYROW.EQ.RSRC4 .AND. DIR.EQ.1 .AND.
02316      $                    NPCOL.GT.1 ) THEN
02317                         BUFFER = PDTRAF
02318                         BUFFLEN = DLEN + ILEN
02319                         CALL SGEBR2D( ICTXT, 'Row', TOP, BUFFLEN,
02320      $                       1, WORK(BUFFER), BUFFLEN, RSRC4, CSRC4 )
02321                         DO 400 INDX = 1, ILEN
02322                            IWORK(IPIW+INDX-1) =
02323      $                          INT( WORK( BUFFER+INDX-1 ) )
02324  400                    CONTINUE
02325                         CALL SLAMOV( 'All', DLEN, 1,
02326      $                       WORK( BUFFER+ILEN ), DLEN,
02327      $                       WORK( IPW3 ), DLEN )
02328                      END IF
02329                   END IF
02330                   IF( CSRC1.NE.CSRC4 ) THEN
02331                      IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
02332                         BUFFER = PDTRAF
02333                         BUFFLEN = DLEN + ILEN
02334                         IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN
02335                            DO 395 INDX = 1, ILEN
02336                               WORK( BUFFER+INDX-1 ) =
02337      $                             FLOAT( IWORK(IPIW+INDX-1) )
02338  395                       CONTINUE
02339                            CALL SLAMOV( 'All', DLEN, 1, WORK( IPW3 ),
02340      $                          DLEN, WORK(BUFFER+ILEN), DLEN )
02341                            CALL SGEBS2D( ICTXT, 'Col', TOP, BUFFLEN,
02342      $                          1, WORK(BUFFER), BUFFLEN )
02343                         END IF
02344                      ELSEIF( MYCOL.EQ.CSRC4 .AND. DIR.EQ.2 .AND.
02345      $                    NPROW.GT.1 ) THEN
02346                         BUFFER = PDTRAF
02347                         BUFFLEN = DLEN + ILEN
02348                         CALL SGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1,
02349      $                       WORK(BUFFER), BUFFLEN, RSRC4, CSRC4 )
02350                         DO 402 INDX = 1, ILEN
02351                            IWORK(IPIW+INDX-1) =
02352      $                          INT( WORK( BUFFER+INDX-1 ) )
02353  402                    CONTINUE
02354                         CALL SLAMOV( 'All', DLEN, 1,
02355      $                       WORK( BUFFER+ILEN ), DLEN,
02356      $                       WORK( IPW3 ), DLEN )
02357                      END IF
02358                   END IF
02359 *
02360  326              CONTINUE
02361 *
02362  321           CONTINUE
02363 *
02364 *              Compute crossborder updates.
02365 *
02366                DO 322 WINDOW = WINDOW0, WINE, 2
02367                   IF( WINDOW.EQ.1 .AND. SKIP1CR ) GO TO 327
02368                   RSRC4 = IWORK(IRSRC+WINDOW-1)
02369                   CSRC4 = IWORK(ICSRC+WINDOW-1)
02370                   RSRC1 = MOD( RSRC4 - 1 + NPROW, NPROW )
02371                   CSRC1 = MOD( CSRC4 - 1 + NPCOL, NPCOL )
02372 *
02373 *                 Prepare workspaces for updates:
02374 *                   IPW3 holds now the orthogonal transformations
02375 *                   IPW4 holds the explicit orthogonal matrix, if formed
02376 *                   IPW5 holds the crossborder block column of T
02377 *                   IPW6 holds the crossborder block row of T
02378 *                   IPW7 holds the crossborder block column of Q
02379 *                        (if WANTQ=.TRUE.)
02380 *                   IPW8 points to the leftover workspace used as lhs in
02381 *                        matrix multiplications
02382 *
02383                   IF( ((MYCOL.EQ.CSRC1.OR.MYCOL.EQ.CSRC4).AND.DIR.EQ.2)
02384      $                 .OR. ((MYROW.EQ.RSRC1.OR.MYROW.EQ.RSRC4).AND.
02385      $                 DIR.EQ.1)) THEN
02386                      IPW4 = BUFFER
02387                      IF( DIR.EQ.2 ) THEN
02388                         IF( WANTQ ) THEN
02389                            QROWS = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ),
02390      $                          NPROW )
02391                         ELSE
02392                            QROWS = 0
02393                         END IF
02394                         TROWS = NUMROC( I-1, NB, MYROW, DESCT( RSRC_ ),
02395      $                       NPROW )
02396                      ELSE
02397                         QROWS = 0
02398                         TROWS = 0
02399                      END IF
02400                      IF( DIR.EQ.1 ) THEN
02401                         TCOLS = NUMROC( N - (I+DIM1-1), NB, MYCOL,
02402      $                       CSRC4, NPCOL )
02403                         IF( MYCOL.EQ.CSRC4 ) TCOLS = TCOLS - DIM4
02404                      ELSE
02405                         TCOLS = 0
02406                      END IF
02407                      IPW5 = IPW4 + NWIN*NWIN
02408                      IPW6 = IPW5 + TROWS * NWIN
02409                      IF( WANTQ ) THEN
02410                         IPW7 = IPW6 + NWIN * TCOLS
02411                         IPW8 = IPW7 + QROWS * NWIN
02412                      ELSE
02413                         IPW8 = IPW6 + NWIN * TCOLS
02414                      END IF
02415                   END IF
02416 *
02417 *                 Let each process row and column involved in the updates
02418 *                 exchange data in T and Q with their neighbours.
02419 *
02420                   IF( DIR.EQ.2 ) THEN
02421                      IF( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC4 ) THEN
02422                         DO 410 INDX = 1, NPROW
02423                            IF( MYCOL.EQ.CSRC1 ) THEN
02424                               CALL INFOG2L( 1+(INDX-1)*NB, I, DESCT,
02425      $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
02426      $                             JLOC1, RSRC, CSRC1 )
02427                               IF( MYROW.EQ.RSRC ) THEN
02428                                  CALL SLAMOV( 'All', TROWS, DIM1,
02429      $                                T((JLOC1-1)*LLDT+ILOC), LLDT,
02430      $                                WORK(IPW5), TROWS )
02431                                  IF( NPCOL.GT.1 ) THEN
02432                                     EAST = MOD( MYCOL + 1, NPCOL )
02433                                     CALL SGESD2D( ICTXT, TROWS, DIM1,
02434      $                                   WORK(IPW5), TROWS, RSRC,
02435      $                                   EAST )
02436                                     CALL SGERV2D( ICTXT, TROWS, DIM4,
02437      $                                   WORK(IPW5+TROWS*DIM1), TROWS,
02438      $                                   RSRC, EAST )
02439                                  END IF
02440                               END IF
02441                            END IF
02442                            IF( MYCOL.EQ.CSRC4 ) THEN
02443                               CALL INFOG2L( 1+(INDX-1)*NB, I+DIM1,
02444      $                             DESCT, NPROW, NPCOL, MYROW, MYCOL,
02445      $                             ILOC, JLOC4, RSRC, CSRC4 )
02446                               IF( MYROW.EQ.RSRC ) THEN
02447                                  CALL SLAMOV( 'All', TROWS, DIM4,
02448      $                                T((JLOC4-1)*LLDT+ILOC), LLDT,
02449      $                                WORK(IPW5+TROWS*DIM1), TROWS )
02450                                  IF( NPCOL.GT.1 ) THEN
02451                                     WEST = MOD( MYCOL-1+NPCOL, NPCOL )
02452                                     CALL SGESD2D( ICTXT, TROWS, DIM4,
02453      $                                   WORK(IPW5+TROWS*DIM1), TROWS,
02454      $                                   RSRC, WEST )
02455                                     CALL SGERV2D( ICTXT, TROWS, DIM1,
02456      $                                   WORK(IPW5), TROWS, RSRC,
02457      $                                   WEST )
02458                                  END IF
02459                               END IF
02460                            END IF
02461  410                    CONTINUE
02462                      END IF
02463                   END IF
02464 *
02465                   IF( DIR.EQ.1 ) THEN
02466                      IF( MYROW.EQ.RSRC1 .OR. MYROW.EQ.RSRC4 ) THEN
02467                         DO 420 INDX = 1, NPCOL
02468                            IF( MYROW.EQ.RSRC1 ) THEN
02469                               IF( INDX.EQ.1 ) THEN
02470                                  CALL INFOG2L( I, LIHIC+1, DESCT, NPROW,
02471      $                                NPCOL, MYROW, MYCOL, ILOC1, JLOC,
02472      $                                RSRC1, CSRC )
02473                               ELSE
02474                                  CALL INFOG2L( I,
02475      $                                (ICEIL(LIHIC,NB)+(INDX-2))*NB+1,
02476      $                                DESCT, NPROW, NPCOL, MYROW, MYCOL,
02477      $                                ILOC1, JLOC, RSRC1, CSRC )
02478                               END IF
02479                               IF( MYCOL.EQ.CSRC ) THEN
02480                                  CALL SLAMOV( 'All', DIM1, TCOLS,
02481      $                                T((JLOC-1)*LLDT+ILOC1), LLDT,
02482      $                                WORK(IPW6), NWIN )
02483                                  IF( NPROW.GT.1 ) THEN
02484                                     SOUTH = MOD( MYROW + 1, NPROW )
02485                                     CALL SGESD2D( ICTXT, DIM1, TCOLS,
02486      $                                   WORK(IPW6), NWIN, SOUTH,
02487      $                                   CSRC )
02488                                     CALL SGERV2D( ICTXT, DIM4, TCOLS,
02489      $                                   WORK(IPW6+DIM1), NWIN, SOUTH,
02490      $                                   CSRC )
02491                                  END IF
02492                               END IF
02493                            END IF
02494                            IF( MYROW.EQ.RSRC4 ) THEN
02495                               IF( INDX.EQ.1 ) THEN
02496                                  CALL INFOG2L( I+DIM1, LIHIC+1, DESCT,
02497      $                                NPROW, NPCOL, MYROW, MYCOL, ILOC4,
02498      $                                JLOC, RSRC4, CSRC )
02499                               ELSE
02500                                  CALL INFOG2L( I+DIM1,
02501      $                                (ICEIL(LIHIC,NB)+(INDX-2))*NB+1,
02502      $                                DESCT, NPROW, NPCOL, MYROW, MYCOL,
02503      $                                ILOC4, JLOC, RSRC4, CSRC )
02504                               END IF
02505                               IF( MYCOL.EQ.CSRC ) THEN
02506                                  CALL SLAMOV( 'All', DIM4, TCOLS,
02507      $                                T((JLOC-1)*LLDT+ILOC4), LLDT,
02508      $                                WORK(IPW6+DIM1), NWIN )
02509                                  IF( NPROW.GT.1 ) THEN
02510                                     NORTH = MOD( MYROW-1+NPROW, NPROW )
02511                                     CALL SGESD2D( ICTXT, DIM4, TCOLS,
02512      $                                   WORK(IPW6+DIM1), NWIN, NORTH,
02513      $                                   CSRC )
02514                                     CALL SGERV2D( ICTXT, DIM1, TCOLS,
02515      $                                   WORK(IPW6), NWIN, NORTH,
02516      $                                   CSRC )
02517                                  END IF
02518                               END IF
02519                            END IF
02520  420                    CONTINUE
02521                      END IF
02522                   END IF
02523 *
02524                   IF( DIR.EQ.2 ) THEN
02525                      IF( WANTQ ) THEN
02526                         IF( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC4 ) THEN
02527                            DO 430 INDX = 1, NPROW
02528                               IF( MYCOL.EQ.CSRC1 ) THEN
02529                                  CALL INFOG2L( 1+(INDX-1)*NB, I, DESCQ,
02530      $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
02531      $                                JLOC1, RSRC, CSRC1 )
02532                                  IF( MYROW.EQ.RSRC ) THEN
02533                                     CALL SLAMOV( 'All', QROWS, DIM1,
02534      $                                   Q((JLOC1-1)*LLDQ+ILOC), LLDQ,
02535      $                                   WORK(IPW7), QROWS )
02536                                     IF( NPCOL.GT.1 ) THEN
02537                                        EAST = MOD( MYCOL + 1, NPCOL )
02538                                        CALL SGESD2D( ICTXT, QROWS, DIM1,
02539      $                                      WORK(IPW7), QROWS, RSRC,
02540      $                                      EAST )
02541                                        CALL SGERV2D( ICTXT, QROWS, DIM4,
02542      $                                      WORK(IPW7+QROWS*DIM1),
02543      $                                      QROWS, RSRC, EAST )
02544                                     END IF
02545                                  END IF
02546                               END IF
02547                               IF( MYCOL.EQ.CSRC4 ) THEN
02548                                  CALL INFOG2L( 1+(INDX-1)*NB, I+DIM1,
02549      $                                DESCQ, NPROW, NPCOL, MYROW, MYCOL,
02550      $                                ILOC, JLOC4, RSRC, CSRC4 )
02551                                  IF( MYROW.EQ.RSRC ) THEN
02552                                     CALL SLAMOV( 'All', QROWS, DIM4,
02553      $                                   Q((JLOC4-1)*LLDQ+ILOC), LLDQ,
02554      $                                   WORK(IPW7+QROWS*DIM1), QROWS )
02555                                     IF( NPCOL.GT.1 ) THEN
02556                                        WEST = MOD( MYCOL-1+NPCOL,
02557      $                                      NPCOL )
02558                                        CALL SGESD2D( ICTXT, QROWS, DIM4,
02559      $                                      WORK(IPW7+QROWS*DIM1),
02560      $                                      QROWS, RSRC, WEST )
02561                                        CALL SGERV2D( ICTXT, QROWS, DIM1,
02562      $                                      WORK(IPW7), QROWS, RSRC,
02563      $                                      WEST )
02564                                     END IF
02565                                  END IF
02566                               END IF
02567  430                       CONTINUE
02568                         END IF
02569                      END IF
02570                   END IF
02571 *
02572  327              CONTINUE
02573 *
02574  322           CONTINUE
02575 *
02576                DO 323 WINDOW = WINDOW0, WINE, 2
02577                   RSRC4 = IWORK(IRSRC+WINDOW-1)
02578                   CSRC4 = IWORK(ICSRC+WINDOW-1)
02579                   RSRC1 = MOD( RSRC4 - 1 + NPROW, NPROW )
02580                   CSRC1 = MOD( CSRC4 - 1 + NPCOL, NPCOL )
02581                   FLOPS = 0
02582                   IF( ((MYCOL.EQ.CSRC1.OR.MYCOL.EQ.CSRC4).AND.DIR.EQ.2)
02583      $                 .OR. ((MYROW.EQ.RSRC1.OR.MYROW.EQ.RSRC4).AND.
02584      $                 DIR.EQ.1) ) THEN
02585 *
02586 *                    Skip this part of the updates if appropriate.
02587 *
02588                      IF( WINDOW.EQ.1 .AND. SKIP1CR ) GO TO 328
02589 *
02590 *                    Count number of operations to decide whether to use
02591 *                    matrix-matrix multiplications for updating
02592 *                    off-diagonal parts or not.
02593 *
02594                      NITRAF = PITRAF - IPIW
02595                      ISHH = .FALSE.
02596                      DO 405 K = 1, NITRAF
02597                         IF( IWORK( IPIW + K - 1 ).LE.NWIN ) THEN
02598                            FLOPS = FLOPS + 6
02599                         ELSE
02600                            FLOPS = FLOPS + 11
02601                            ISHH = .TRUE.
02602                         END IF
02603  405                 CONTINUE
02604 *
02605 *                    Perform updates in parallel.
02606 *
02607                      IF( FLOPS.NE.0 .AND.
02608      $                    ( 2*FLOPS*100 )/( 2*NWIN*NWIN ) .GE. MMULT )
02609      $                    THEN
02610 *
02611                         CALL SLASET( 'All', NWIN, NWIN, ZERO, ONE,
02612      $                       WORK( IPW4 ), NWIN )
02613                         WORK(IPW8) = FLOAT(MYROW)
02614                         WORK(IPW8+1) = FLOAT(MYCOL)
02615                         CALL BSLAAPP( 1, NWIN, NWIN, NCB, WORK( IPW4 ),
02616      $                       NWIN, NITRAF, IWORK(IPIW), WORK( IPW3 ),
02617      $                       WORK(IPW8) )
02618 *
02619 *                       Test if sparsity structure of orthogonal matrix
02620 *                       can be exploited (see below).
02621 *
02622                         IF( ISHH .OR. DIM1.NE.KS .OR. DIM4.NE.KS ) THEN
02623 *
02624 *                          Update the columns of T and Q affected by the
02625 *                          reordering.
02626 *
02627                            IF( DIR.EQ.2 ) THEN
02628                               DO 440 INDX = 1, MIN(I-1,1+(NPROW-1)*NB),
02629      $                             NB
02630                                  IF( MYCOL.EQ.CSRC1 ) THEN
02631                                     CALL INFOG2L( INDX, I, DESCT, NPROW,
02632      $                                   NPCOL, MYROW, MYCOL, ILOC,
02633      $                                   JLOC, RSRC, CSRC1 )
02634                                     IF( MYROW.EQ.RSRC ) THEN
02635                                        CALL SGEMM( 'No transpose',
02636      $                                      'No transpose', TROWS, DIM1,
02637      $                                      NWIN, ONE, WORK( IPW5 ),
02638      $                                      TROWS, WORK( IPW4 ), NWIN,
02639      $                                      ZERO, WORK(IPW8), TROWS )
02640                                        CALL SLAMOV( 'All', TROWS, DIM1,
02641      $                                      WORK(IPW8), TROWS,
02642      $                                      T((JLOC-1)*LLDT+ILOC),
02643      $                                      LLDT )
02644                                     END IF
02645                                  END IF
02646                                  IF( MYCOL.EQ.CSRC4 ) THEN
02647                                     CALL INFOG2L( INDX, I+DIM1, DESCT,
02648      $                                   NPROW, NPCOL, MYROW, MYCOL,
02649      $                                   ILOC, JLOC, RSRC, CSRC4 )
02650                                     IF( MYROW.EQ.RSRC ) THEN
02651                                        CALL SGEMM( 'No transpose',
02652      $                                      'No transpose', TROWS, DIM4,
02653      $                                      NWIN, ONE, WORK( IPW5 ),
02654      $                                      TROWS,
02655      $                                      WORK( IPW4+NWIN*DIM1 ),
02656      $                                      NWIN, ZERO, WORK(IPW8),
02657      $                                      TROWS )
02658                                        CALL SLAMOV( 'All', TROWS, DIM4,
02659      $                                      WORK(IPW8), TROWS,
02660      $                                      T((JLOC-1)*LLDT+ILOC),
02661      $                                      LLDT )
02662                                     END IF
02663                                  END IF
02664  440                          CONTINUE
02665 *
02666                               IF( WANTQ ) THEN
02667                                  DO 450 INDX = 1, MIN(N,1+(NPROW-1)*NB),
02668      $                                NB
02669                                     IF( MYCOL.EQ.CSRC1 ) THEN
02670                                        CALL INFOG2L( INDX, I, DESCQ,
02671      $                                      NPROW, NPCOL, MYROW, MYCOL,
02672      $                                      ILOC, JLOC, RSRC, CSRC1 )
02673                                        IF( MYROW.EQ.RSRC ) THEN
02674                                           CALL SGEMM( 'No transpose',
02675      $                                         'No transpose', QROWS,
02676      $                                         DIM1, NWIN, ONE,
02677      $                                         WORK( IPW7 ), QROWS,
02678      $                                         WORK( IPW4 ), NWIN,
02679      $                                         ZERO, WORK(IPW8),
02680      $                                         QROWS )
02681                                           CALL SLAMOV( 'All', QROWS,
02682      $                                         DIM1, WORK(IPW8), QROWS,
02683      $                                         Q((JLOC-1)*LLDQ+ILOC),
02684      $                                         LLDQ )
02685                                        END IF
02686                                     END IF
02687                                     IF( MYCOL.EQ.CSRC4 ) THEN
02688                                        CALL INFOG2L( INDX, I+DIM1,
02689      $                                      DESCQ, NPROW, NPCOL, MYROW,
02690      $                                      MYCOL, ILOC, JLOC, RSRC,
02691      $                                      CSRC4 )
02692                                        IF( MYROW.EQ.RSRC ) THEN
02693                                           CALL SGEMM( 'No transpose',
02694      $                                         'No transpose', QROWS,
02695      $                                         DIM4, NWIN, ONE,
02696      $                                         WORK( IPW7 ), QROWS,
02697      $                                         WORK( IPW4+NWIN*DIM1 ),
02698      $                                         NWIN, ZERO, WORK(IPW8),
02699      $                                         QROWS )
02700                                           CALL SLAMOV( 'All', QROWS,
02701      $                                         DIM4, WORK(IPW8), QROWS,
02702      $                                         Q((JLOC-1)*LLDQ+ILOC),
02703      $                                         LLDQ )
02704                                        END IF
02705                                     END IF
02706  450                             CONTINUE
02707                               END IF
02708                            END IF
02709 *
02710 *                          Update the rows of T affected by the
02711 *                          reordering.
02712 *
02713                            IF( DIR.EQ.1 ) THEN
02714                               IF ( LIHIC.LT.N ) THEN
02715                                  IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC4
02716      $                               .AND.MOD(LIHIC,NB).NE.0 ) THEN
02717                                     INDX = LIHIC + 1
02718                                     CALL INFOG2L( I, INDX, DESCT, NPROW,
02719      $                                   NPCOL, MYROW, MYCOL, ILOC,
02720      $                                   JLOC, RSRC1, CSRC4 )
02721                                     CALL SGEMM( 'Transpose',
02722      $                                   'No Transpose', DIM1, TCOLS,
02723      $                                   NWIN, ONE, WORK(IPW4), NWIN,
02724      $                                   WORK( IPW6 ), NWIN, ZERO,
02725      $                                   WORK(IPW8), DIM1 )
02726                                     CALL SLAMOV( 'All', DIM1, TCOLS,
02727      $                                   WORK(IPW8), DIM1,
02728      $                                   T((JLOC-1)*LLDT+ILOC), LLDT )
02729                                  END IF
02730                                  IF( MYROW.EQ.RSRC4.AND.MYCOL.EQ.CSRC4
02731      $                               .AND.MOD(LIHIC,NB).NE.0 ) THEN
02732                                     INDX = LIHIC + 1
02733                                     CALL INFOG2L( I+DIM1, INDX, DESCT,
02734      $                                   NPROW, NPCOL, MYROW, MYCOL,
02735      $                                   ILOC, JLOC, RSRC4, CSRC4 )
02736                                     CALL SGEMM( 'Transpose',
02737      $                                  'No Transpose', DIM4, TCOLS,
02738      $                                   NWIN, ONE,
02739      $                                   WORK( IPW4+DIM1*NWIN ), NWIN,
02740      $                                   WORK( IPW6), NWIN, ZERO,
02741      $                                   WORK(IPW8), DIM4 )
02742                                     CALL SLAMOV( 'All', DIM4, TCOLS,
02743      $                                   WORK(IPW8), DIM4,
02744      $                                   T((JLOC-1)*LLDT+ILOC), LLDT )
02745                                  END IF
02746                                  INDXS = ICEIL(LIHIC,NB)*NB + 1
02747                                  INDXE = MIN(N,INDXS+(NPCOL-2)*NB)
02748                                  DO 460 INDX = INDXS, INDXE, NB
02749                                     IF( MYROW.EQ.RSRC1 ) THEN
02750                                        CALL INFOG2L( I, INDX, DESCT,
02751      $                                      NPROW, NPCOL, MYROW, MYCOL,
02752      $                                      ILOC, JLOC, RSRC1, CSRC )
02753                                        IF( MYCOL.EQ.CSRC ) THEN
02754                                           CALL SGEMM( 'Transpose',
02755      $                                         'No Transpose', DIM1,
02756      $                                         TCOLS, NWIN, ONE,
02757      $                                         WORK( IPW4 ), NWIN,
02758      $                                         WORK( IPW6 ), NWIN,
02759      $                                         ZERO, WORK(IPW8), DIM1 )
02760                                           CALL SLAMOV( 'All', DIM1,
02761      $                                         TCOLS, WORK(IPW8), DIM1,
02762      $                                         T((JLOC-1)*LLDT+ILOC),
02763      $                                         LLDT )
02764                                        END IF
02765                                     END IF
02766                                     IF( MYROW.EQ.RSRC4 ) THEN
02767                                        CALL INFOG2L( I+DIM1, INDX,
02768      $                                      DESCT, NPROW, NPCOL, MYROW,
02769      $                                      MYCOL, ILOC, JLOC, RSRC4,
02770      $                                      CSRC )
02771                                        IF( MYCOL.EQ.CSRC ) THEN
02772                                           CALL SGEMM( 'Transpose',
02773      $                                         'No Transpose', DIM4,
02774      $                                         TCOLS, NWIN, ONE,
02775      $                                         WORK( IPW4+NWIN*DIM1 ),
02776      $                                         NWIN, WORK( IPW6 ),
02777      $                                         NWIN, ZERO, WORK(IPW8),
02778      $                                         DIM4 )
02779                                           CALL SLAMOV( 'All', DIM4,
02780      $                                         TCOLS, WORK(IPW8), DIM4,
02781      $                                         T((JLOC-1)*LLDT+ILOC),
02782      $                                         LLDT )
02783                                        END IF
02784                                     END IF
02785  460                             CONTINUE
02786                               END IF
02787                            END IF
02788                         ELSE
02789 *
02790 *                          The NWIN-by-NWIN matrix U containing the
02791 *                          accumulated orthogonal transformations has
02792 *                          the following structure:
02793 *
02794 *                                        [ U11  U12 ]
02795 *                                    U = [          ],
02796 *                                        [ U21  U22 ]
02797 *
02798 *                          where U21 is KS-by-KS upper triangular and
02799 *                          U12 is (NWIN-KS)-by-(NWIN-KS) lower
02800 *                          triangular. For reordering over the border
02801 *                          the structure is only exploited when the
02802 *                          border cuts the columns of U conformally with
02803 *                          the structure itself. This happens exactly
02804 *                          when all eigenvalues in the subcluster was
02805 *                          moved to the other side of the border and
02806 *                          fits perfectly in their new positions, i.e.,
02807 *                          the reordering stops when the last eigenvalue
02808 *                          to cross the border is reordered to the
02809 *                          position closest to the border. Tested by
02810 *                          checking is KS = DIM1 = DIM4 (see above).
02811 *                          This should hold quite often. But this branch
02812 *                          is entered only if all involved eigenvalues
02813 *                          are real.
02814 *
02815 *                          Update the columns of T and Q affected by the
02816 *                          reordering.
02817 *
02818 *                          Compute T2*U21 + T1*U11 on the left side of
02819 *                          the border.
02820 *
02821                            IF( DIR.EQ.2 ) THEN
02822                               INDXE = MIN(I-1,1+(NPROW-1)*NB)
02823                               DO 470 INDX = 1, INDXE, NB
02824                                  IF( MYCOL.EQ.CSRC1 ) THEN
02825                                     CALL INFOG2L( INDX, I, DESCT, NPROW,
02826      $                                   NPCOL, MYROW, MYCOL, ILOC,
02827      $                                   JLOC, RSRC, CSRC1 )
02828                                     IF( MYROW.EQ.RSRC ) THEN
02829                                        CALL SLAMOV( 'All', TROWS, KS,
02830      $                                      WORK( IPW5+TROWS*DIM4),
02831      $                                      TROWS, WORK(IPW8), TROWS )
02832                                        CALL STRMM( 'Right', 'Upper',
02833      $                                      'No transpose',
02834      $                                      'Non-unit', TROWS, KS,
02835      $                                      ONE, WORK( IPW4+DIM4 ),
02836      $                                      NWIN, WORK(IPW8), TROWS )
02837                                        CALL SGEMM( 'No transpose',
02838      $                                      'No transpose', TROWS, KS,
02839      $                                      DIM4, ONE, WORK( IPW5 ),
02840      $                                      TROWS, WORK( IPW4 ), NWIN,
02841      $                                      ONE, WORK(IPW8), TROWS )
02842                                        CALL SLAMOV( 'All', TROWS, KS,
02843      $                                      WORK(IPW8), TROWS,
02844      $                                      T((JLOC-1)*LLDT+ILOC),
02845      $                                      LLDT )
02846                                     END IF
02847                                  END IF
02848 *
02849 *                                Compute T1*U12 + T2*U22 on the right
02850 *                                side of the border.
02851 *
02852                                  IF( MYCOL.EQ.CSRC4 ) THEN
02853                                     CALL INFOG2L( INDX, I+DIM1, DESCT,
02854      $                                   NPROW, NPCOL, MYROW, MYCOL,
02855      $                                   ILOC, JLOC, RSRC, CSRC4 )
02856                                     IF( MYROW.EQ.RSRC ) THEN
02857                                        CALL SLAMOV( 'All', TROWS, DIM4,
02858      $                                      WORK(IPW5), TROWS,
02859      $                                      WORK( IPW8 ), TROWS )
02860                                        CALL STRMM( 'Right', 'Lower',
02861      $                                      'No transpose',
02862      $                                      'Non-unit', TROWS, DIM4,
02863      $                                      ONE, WORK( IPW4+NWIN*KS ),
02864      $                                      NWIN, WORK( IPW8 ), TROWS )
02865                                        CALL SGEMM( 'No transpose',
02866      $                                      'No transpose', TROWS, DIM4,
02867      $                                      KS, ONE,
02868      $                                      WORK( IPW5+TROWS*DIM4),
02869      $                                      TROWS,
02870      $                                      WORK( IPW4+NWIN*KS+DIM4 ),
02871      $                                      NWIN, ONE, WORK( IPW8 ),
02872      $                                      TROWS )
02873                                        CALL SLAMOV( 'All', TROWS, DIM4,
02874      $                                      WORK(IPW8), TROWS,
02875      $                                      T((JLOC-1)*LLDT+ILOC),
02876      $                                      LLDT )
02877                                     END IF
02878                                  END IF
02879  470                          CONTINUE
02880                               IF( WANTQ ) THEN
02881 *
02882 *                                Compute Q2*U21 + Q1*U11 on the left
02883 *                                side of border.
02884 *
02885                                  INDXE = MIN(N,1+(NPROW-1)*NB)
02886                                  DO 480 INDX = 1, INDXE, NB
02887                                     IF( MYCOL.EQ.CSRC1 ) THEN
02888                                        CALL INFOG2L( INDX, I, DESCQ,
02889      $                                      NPROW, NPCOL, MYROW, MYCOL,
02890      $                                      ILOC, JLOC, RSRC, CSRC1 )
02891                                        IF( MYROW.EQ.RSRC ) THEN
02892                                           CALL SLAMOV( 'All', QROWS, KS,
02893      $                                         WORK( IPW7+QROWS*DIM4),
02894      $                                         QROWS, WORK(IPW8),
02895      $                                         QROWS )
02896                                           CALL STRMM( 'Right', 'Upper',
02897      $                                         'No transpose',
02898      $                                         'Non-unit', QROWS,
02899      $                                         KS, ONE,
02900      $                                         WORK( IPW4+DIM4 ), NWIN,
02901      $                                         WORK(IPW8), QROWS )
02902                                           CALL SGEMM( 'No transpose',
02903      $                                         'No transpose', QROWS,
02904      $                                         KS, DIM4, ONE,
02905      $                                         WORK( IPW7 ), QROWS,
02906      $                                         WORK( IPW4 ), NWIN, ONE,
02907      $                                         WORK(IPW8), QROWS )
02908                                           CALL SLAMOV( 'All', QROWS, KS,
02909      $                                         WORK(IPW8), QROWS,
02910      $                                         Q((JLOC-1)*LLDQ+ILOC),
02911      $                                         LLDQ )
02912                                        END IF
02913                                     END IF
02914 *
02915 *                                   Compute Q1*U12 + Q2*U22 on the right
02916 *                                   side of border.
02917 *
02918                                     IF( MYCOL.EQ.CSRC4 ) THEN
02919                                        CALL INFOG2L( INDX, I+DIM1,
02920      $                                      DESCQ, NPROW, NPCOL, MYROW,
02921      $                                      MYCOL, ILOC, JLOC, RSRC,
02922      $                                      CSRC4 )
02923                                        IF( MYROW.EQ.RSRC ) THEN
02924                                           CALL SLAMOV( 'All', QROWS,
02925      $                                         DIM4, WORK(IPW7), QROWS,
02926      $                                         WORK( IPW8 ), QROWS )
02927                                           CALL STRMM( 'Right', 'Lower',
02928      $                                         'No transpose',
02929      $                                         'Non-unit', QROWS,
02930      $                                         DIM4, ONE,
02931      $                                         WORK( IPW4+NWIN*KS ),
02932      $                                         NWIN, WORK( IPW8 ),
02933      $                                         QROWS )
02934                                           CALL SGEMM( 'No transpose',
02935      $                                         'No transpose', QROWS,
02936      $                                         DIM4, KS, ONE,
02937      $                                         WORK(IPW7+QROWS*(DIM4)),
02938      $                                         QROWS,
02939      $                                         WORK(IPW4+NWIN*KS+DIM4),
02940      $                                         NWIN, ONE, WORK( IPW8 ),
02941      $                                         QROWS )
02942                                           CALL SLAMOV( 'All', QROWS,
02943      $                                         DIM4, WORK(IPW8), QROWS,
02944      $                                         Q((JLOC-1)*LLDQ+ILOC),
02945      $                                         LLDQ )
02946                                        END IF
02947                                     END IF
02948  480                             CONTINUE
02949                               END IF
02950                            END IF
02951 *
02952                            IF( DIR.EQ.1 ) THEN
02953                               IF ( LIHIC.LT.N ) THEN
02954 *
02955 *                                Compute U21**T*T2 + U11**T*T1 on the
02956 *                                upper side of the border.
02957 *
02958                                  IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC4
02959      $                               .AND.MOD(LIHIC,NB).NE.0 ) THEN
02960                                     INDX = LIHIC + 1
02961                                     CALL INFOG2L( I, INDX, DESCT, NPROW,
02962      $                                   NPCOL, MYROW, MYCOL, ILOC,
02963      $                                   JLOC, RSRC1, CSRC4 )
02964                                     CALL SLAMOV( 'All', KS, TCOLS,
02965      $                                   WORK( IPW6+DIM4 ), NWIN,
02966      $                                   WORK(IPW8), KS )
02967                                     CALL STRMM( 'Left', 'Upper',
02968      $                                   'Transpose', 'Non-unit',
02969      $                                   KS, TCOLS, ONE,
02970      $                                   WORK( IPW4+DIM4 ), NWIN,
02971      $                                   WORK(IPW8), KS )
02972                                     CALL SGEMM( 'Transpose',
02973      $                                   'No transpose', KS, TCOLS,
02974      $                                   DIM4, ONE, WORK(IPW4), NWIN,
02975      $                                   WORK(IPW6), NWIN, ONE,
02976      $                                   WORK(IPW8), KS )
02977                                     CALL SLAMOV( 'All', KS, TCOLS,
02978      $                                   WORK(IPW8), KS,
02979      $                                   T((JLOC-1)*LLDT+ILOC), LLDT )
02980                                  END IF
02981 *
02982 *                                Compute U12**T*T1 + U22**T*T2 on the
02983 *                                lower side of the border.
02984 *
02985                                  IF( MYROW.EQ.RSRC4.AND.MYCOL.EQ.CSRC4
02986      $                               .AND.MOD(LIHIC,NB).NE.0 ) THEN
02987                                     INDX = LIHIC + 1
02988                                     CALL INFOG2L( I+DIM1, INDX, DESCT,
02989      $                                   NPROW, NPCOL, MYROW, MYCOL,
02990      $                                   ILOC, JLOC, RSRC4, CSRC4 )
02991                                     CALL SLAMOV( 'All', DIM4, TCOLS,
02992      $                                   WORK( IPW6 ), NWIN,
02993      $                                   WORK( IPW8 ), DIM4 )
02994                                     CALL STRMM( 'Left', 'Lower',
02995      $                                   'Transpose', 'Non-unit',
02996      $                                   DIM4, TCOLS, ONE,
02997      $                                   WORK( IPW4+NWIN*KS ), NWIN,
02998      $                                   WORK( IPW8 ), DIM4 )
02999                                     CALL SGEMM( 'Transpose',
03000      $                                   'No Transpose', DIM4, TCOLS,
03001      $                                   KS, ONE,
03002      $                                   WORK( IPW4+NWIN*KS+DIM4 ),
03003      $                                   NWIN, WORK( IPW6+DIM1 ), NWIN,
03004      $                                   ONE, WORK( IPW8), DIM4 )
03005                                     CALL SLAMOV( 'All', DIM4, TCOLS,
03006      $                                   WORK(IPW8), DIM4,
03007      $                                   T((JLOC-1)*LLDT+ILOC), LLDT )
03008                                  END IF
03009 *
03010 *                                Compute U21**T*T2 + U11**T*T1 on upper
03011 *                                side on border.
03012 *
03013                                  INDXS = ICEIL(LIHIC,NB)*NB+1
03014                                  INDXE = MIN(N,INDXS+(NPCOL-2)*NB)
03015                                  DO 490 INDX = INDXS, INDXE, NB
03016                                     IF( MYROW.EQ.RSRC1 ) THEN
03017                                        CALL INFOG2L( I, INDX, DESCT,
03018      $                                      NPROW, NPCOL, MYROW, MYCOL,
03019      $                                      ILOC, JLOC, RSRC1, CSRC )
03020                                        IF( MYCOL.EQ.CSRC ) THEN
03021                                           CALL SLAMOV( 'All', KS, TCOLS,
03022      $                                         WORK( IPW6+DIM4 ), NWIN,
03023      $                                         WORK(IPW8), KS )
03024                                           CALL STRMM( 'Left', 'Upper',
03025      $                                         'Transpose',
03026      $                                         'Non-unit', KS,
03027      $                                         TCOLS, ONE,
03028      $                                         WORK( IPW4+DIM4 ), NWIN,
03029      $                                         WORK(IPW8), KS )
03030                                           CALL SGEMM( 'Transpose',
03031      $                                         'No transpose', KS,
03032      $                                         TCOLS, DIM4, ONE,
03033      $                                         WORK(IPW4), NWIN,
03034      $                                         WORK(IPW6), NWIN, ONE,
03035      $                                         WORK(IPW8), KS )
03036                                           CALL SLAMOV( 'All', KS, TCOLS,
03037      $                                         WORK(IPW8), KS,
03038      $                                         T((JLOC-1)*LLDT+ILOC),
03039      $                                         LLDT )
03040                                        END IF
03041                                     END IF
03042 *
03043 *                                   Compute U12**T*T1 + U22**T*T2 on
03044 *                                   lower side of border.
03045 *
03046                                     IF( MYROW.EQ.RSRC4 ) THEN
03047                                        CALL INFOG2L( I+DIM1, INDX,
03048      $                                      DESCT, NPROW, NPCOL, MYROW,
03049      $                                      MYCOL, ILOC, JLOC, RSRC4,
03050      $                                      CSRC )
03051                                        IF( MYCOL.EQ.CSRC ) THEN
03052                                           CALL SLAMOV( 'All', DIM4,
03053      $                                         TCOLS, WORK( IPW6 ),
03054      $                                         NWIN, WORK( IPW8 ),
03055      $                                         DIM4 )
03056                                           CALL STRMM( 'Left', 'Lower',
03057      $                                         'Transpose',
03058      $                                         'Non-unit', DIM4,
03059      $                                         TCOLS, ONE,
03060      $                                         WORK( IPW4+NWIN*KS ),
03061      $                                         NWIN, WORK( IPW8 ),
03062      $                                         DIM4 )
03063                                           CALL SGEMM( 'Transpose',
03064      $                                         'No Transpose', DIM4,
03065      $                                         TCOLS, KS, ONE,
03066      $                                         WORK(IPW4+NWIN*KS+DIM4),
03067      $                                         NWIN, WORK( IPW6+DIM1 ),
03068      $                                         NWIN, ONE, WORK( IPW8),
03069      $                                         DIM4 )
03070                                           CALL SLAMOV( 'All', DIM4,
03071      $                                         TCOLS, WORK(IPW8), DIM4,
03072      $                                         T((JLOC-1)*LLDT+ILOC),
03073      $                                         LLDT )
03074                                        END IF
03075                                     END IF
03076  490                             CONTINUE
03077                               END IF
03078                            END IF
03079                         END IF
03080                      ELSEIF( FLOPS.NE.0 ) THEN
03081 *
03082 *                       Update off-diagonal blocks and Q using the
03083 *                       pipelined elementary transformations. Now we
03084 *                       have a delicate problem: how to do this without
03085 *                       redundant work? For now, we let the processes
03086 *                       involved compute the whole crossborder block
03087 *                       rows and column saving only the part belonging
03088 *                       to the corresponding side of the border. To make
03089 *                       this a realistic alternative, we have modified
03090 *                       the ratio r_flops (see Reference [2] above) to
03091 *                       give more favor to the ordinary matrix
03092 *                       multiplication.
03093 *
03094                         IF( DIR.EQ.2 ) THEN
03095                            INDXE =  MIN(I-1,1+(NPROW-1)*NB)
03096                            DO 500 INDX = 1, INDXE, NB
03097                               CALL INFOG2L( INDX, I, DESCT, NPROW,
03098      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
03099      $                             RSRC, CSRC )
03100                               IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC )
03101      $                             THEN
03102                                  CALL BSLAAPP( 1, TROWS, NWIN, NCB,
03103      $                                WORK(IPW5), TROWS, NITRAF,
03104      $                                IWORK(IPIW), WORK( IPW3 ),
03105      $                                WORK(IPW8) )
03106                                  CALL SLAMOV( 'All', TROWS, DIM1,
03107      $                                WORK(IPW5), TROWS,
03108      $                                T((JLOC-1)*LLDT+ILOC ), LLDT )
03109                               END IF
03110                               CALL INFOG2L( INDX, I+DIM1, DESCT, NPROW,
03111      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
03112      $                             RSRC, CSRC )
03113                               IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC )
03114      $                             THEN
03115                                  IF( NPCOL.GT.1 )
03116      $                                CALL BSLAAPP( 1, TROWS, NWIN, NCB,
03117      $                                WORK(IPW5), TROWS, NITRAF,
03118      $                                IWORK(IPIW), WORK( IPW3 ),
03119      $                                WORK(IPW8) )
03120                                  CALL SLAMOV( 'All', TROWS, DIM4,
03121      $                                WORK(IPW5+TROWS*DIM1), TROWS,
03122      $                                T((JLOC-1)*LLDT+ILOC ), LLDT )
03123                               END IF
03124  500                       CONTINUE
03125                            IF( WANTQ ) THEN
03126                               INDXE = MIN(N,1+(NPROW-1)*NB)
03127                               DO 510 INDX = 1, INDXE, NB
03128                                  CALL INFOG2L( INDX, I, DESCQ, NPROW,
03129      $                                NPCOL, MYROW, MYCOL, ILOC, JLOC,
03130      $                                RSRC, CSRC )
03131                                  IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC )
03132      $                                THEN
03133                                     CALL BSLAAPP( 1, QROWS, NWIN, NCB,
03134      $                                   WORK(IPW7), QROWS, NITRAF,
03135      $                                   IWORK(IPIW), WORK( IPW3 ),
03136      $                                   WORK(IPW8) )
03137                                     CALL SLAMOV( 'All', QROWS, DIM1,
03138      $                                   WORK(IPW7), QROWS,
03139      $                                   Q((JLOC-1)*LLDQ+ILOC ), LLDQ )
03140                                  END IF
03141                                  CALL INFOG2L( INDX, I+DIM1, DESCQ,
03142      $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
03143      $                                JLOC, RSRC, CSRC )
03144                                  IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC )
03145      $                                THEN
03146                                     IF( NPCOL.GT.1 )
03147      $                                   CALL BSLAAPP( 1, QROWS, NWIN,
03148      $                                   NCB, WORK(IPW7), QROWS,
03149      $                                   NITRAF, IWORK(IPIW),
03150      $                                   WORK( IPW3 ), WORK(IPW8) )
03151                                     CALL SLAMOV( 'All', QROWS, DIM4,
03152      $                                   WORK(IPW7+QROWS*DIM1), QROWS,
03153      $                                   Q((JLOC-1)*LLDQ+ILOC ), LLDQ )
03154                                  END IF
03155  510                          CONTINUE
03156                            END IF
03157                         END IF
03158 *
03159                         IF( DIR.EQ.1 ) THEN
03160                            IF( LIHIC.LT.N ) THEN
03161                               INDX = LIHIC + 1
03162                               CALL INFOG2L( I, INDX, DESCT, NPROW,
03163      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
03164      $                             RSRC, CSRC )
03165                               IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC.AND.
03166      $                            MOD(LIHIC,NB).NE.0 ) THEN
03167                                  CALL BSLAAPP( 0, NWIN, TCOLS, NCB,
03168      $                                WORK( IPW6 ), NWIN, NITRAF,
03169      $                                IWORK(IPIW), WORK( IPW3 ),
03170      $                                WORK(IPW8) )
03171                                  CALL SLAMOV( 'All', DIM1, TCOLS,
03172      $                                WORK( IPW6 ), NWIN,
03173      $                                T((JLOC-1)*LLDT+ILOC), LLDT )
03174                               END IF
03175                               CALL INFOG2L( I+DIM1, INDX, DESCT, NPROW,
03176      $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
03177      $                             RSRC, CSRC )
03178                               IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC.AND.
03179      $                             MOD(LIHIC,NB).NE.0 ) THEN
03180                                  IF( NPROW.GT.1 )
03181      $                                CALL BSLAAPP( 0, NWIN, TCOLS, NCB,
03182      $                                WORK( IPW6 ), NWIN, NITRAF,
03183      $                                IWORK(IPIW), WORK( IPW3 ),
03184      $                                WORK(IPW8) )
03185                                  CALL SLAMOV( 'All', DIM4, TCOLS,
03186      $                                WORK( IPW6+DIM1 ), NWIN,
03187      $                                T((JLOC-1)*LLDT+ILOC), LLDT )
03188                               END IF
03189                               INDXS = ICEIL(LIHIC,NB)*NB + 1
03190                               INDXE = MIN(N,INDXS+(NPCOL-2)*NB)
03191                               DO 520 INDX = INDXS, INDXE, NB
03192                                  CALL INFOG2L( I, INDX, DESCT, NPROW,
03193      $                                NPCOL, MYROW, MYCOL, ILOC, JLOC,
03194      $                                RSRC, CSRC )
03195                                  IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC )
03196      $                                THEN
03197                                     CALL BSLAAPP( 0, NWIN, TCOLS, NCB,
03198      $                                   WORK(IPW6), NWIN, NITRAF,
03199      $                                   IWORK(IPIW), WORK( IPW3 ),
03200      $                                   WORK(IPW8) )
03201                                     CALL SLAMOV( 'All', DIM1, TCOLS,
03202      $                                   WORK( IPW6 ), NWIN,
03203      $                                   T((JLOC-1)*LLDT+ILOC), LLDT )
03204                                  END IF
03205                                  CALL INFOG2L( I+DIM1, INDX, DESCT,
03206      $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
03207      $                                JLOC, RSRC, CSRC )
03208                                  IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC )
03209      $                                THEN
03210                                     IF( NPROW.GT.1 )
03211      $                                   CALL BSLAAPP( 0, NWIN, TCOLS,
03212      $                                   NCB, WORK(IPW6), NWIN, NITRAF,
03213      $                                   IWORK(IPIW), WORK( IPW3 ),
03214      $                                   WORK(IPW8) )
03215                                     CALL SLAMOV( 'All', DIM4, TCOLS,
03216      $                                   WORK( IPW6+DIM1 ), NWIN,
03217      $                                   T((JLOC-1)*LLDT+ILOC), LLDT )
03218                                  END IF
03219  520                          CONTINUE
03220                            END IF
03221                         END IF
03222                      END IF
03223                   END IF
03224 *
03225  328              CONTINUE
03226 *
03227  323           CONTINUE
03228 *
03229 *              End of loops over directions (DIR).
03230 *
03231  2222       CONTINUE
03232 *
03233 *           End of loops over diagonal blocks for reordering over the
03234 *           block diagonal.
03235 *
03236  310     CONTINUE
03237          LAST = LAST + 1
03238          IF( LASTWAIT .AND. LAST.LT.2 ) GO TO 308
03239 *
03240 *        Barrier to collect the processes before proceeding.
03241 *
03242          CALL BLACS_BARRIER( ICTXT, 'All' )
03243 *
03244 *        Compute global maximum of IERR so that we know if some process
03245 *        experienced a failure in the reordering.
03246 *
03247          MYIERR = IERR
03248          IF( NPROCS.GT.1 )
03249      $      CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, IERR, 1, -1,
03250      $           -1, -1, -1, -1 )
03251 *
03252          IF( IERR.NE.0 ) THEN
03253 *
03254 *           When calling BDTREXC, the block at position I+KKS-1 failed
03255 *           to swap.
03256 *
03257             IF( MYIERR.NE.0 ) INFO = MAX(1,I+KKS-1)
03258             IF( NPROCS.GT.1 )
03259      $         CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, -1,
03260      $              -1, -1, -1, -1 )
03261             GO TO 300
03262          END IF
03263 *
03264 *        Do a global update of the SELECT vector.
03265 *
03266          DO 530 K = 1, N
03267             RSRC = INDXG2P( K, NB, MYROW, DESCT( RSRC_ ), NPROW )
03268             CSRC = INDXG2P( K, NB, MYCOL, DESCT( CSRC_ ), NPCOL )
03269             IF( MYROW.NE.RSRC .OR. MYCOL.NE.CSRC )
03270      $         SELECT( K ) = 0
03271  530     CONTINUE
03272          IF( NPROCS.GT.1 )
03273      $      CALL IGSUM2D( ICTXT, 'All', TOP, N, 1, SELECT, N, -1, -1 )
03274 *
03275 *        Find the global minumum of ILO and IHI.
03276 *
03277          ILO = ILO - 1
03278  523     CONTINUE
03279          ILO = ILO + 1
03280          IF( ILO.LE.N ) THEN
03281             IF( SELECT(ILO).NE.0 ) GO TO 523
03282          END IF
03283          IHI = IHI + 1
03284  527     CONTINUE
03285          IHI = IHI - 1
03286          IF( IHI.GE.1 ) THEN
03287             IF( SELECT(IHI).EQ.0 ) GO TO 527
03288          END IF
03289 *
03290 *        End While ( ILO <= M )
03291          GO TO 50
03292       END IF
03293 *
03294  300  CONTINUE
03295 *
03296 *     In case an error occured, do an additional global update of
03297 *     SELECT.
03298 *
03299       IF( INFO.NE.0 ) THEN
03300          DO 540 K = 1, N
03301             RSRC = INDXG2P( K, NB, MYROW, DESCT( RSRC_ ), NPROW )
03302             CSRC = INDXG2P( K, NB, MYCOL, DESCT( CSRC_ ), NPCOL )
03303             IF( MYROW.NE.RSRC .OR. MYCOL.NE.CSRC )
03304      $           SELECT( K ) = 0
03305  540     CONTINUE
03306          IF( NPROCS.GT.1 )
03307      $        CALL IGSUM2D( ICTXT, 'All', TOP, N, 1, SELECT, N, -1, -1 )
03308       END IF
03309 *
03310  545  CONTINUE
03311 *
03312 *     Store the output eigenvalues in WR and WI: first let all the
03313 *     processes compute the eigenvalue inside their diagonal blocks in
03314 *     parallel, except for the eigenvalue located next to a block
03315 *     border. After that, compute all eigenvalues located next to the
03316 *     block borders. Finally, do a global summation over WR and WI so
03317 *     that all processors receive the result. Notice: real eigenvalues
03318 *     extracted from a non-canonical 2-by-2 block are not stored in
03319 *     any particular order.
03320 *
03321       DO 550 K = 1, N
03322          WR( K ) = ZERO
03323          WI( K ) = ZERO
03324  550  CONTINUE
03325 *
03326 *     Loop 560: extract eigenvalues from the blocks which are not laid
03327 *     out across a border of the processor mesh, except for those 1x1
03328 *     blocks on the border.
03329 *
03330       PAIR = .FALSE.
03331       DO 560 K = 1, N
03332          IF( .NOT. PAIR ) THEN
03333             BORDER = ( K.NE.N .AND. MOD( K, NB ).EQ.0 ) .OR.
03334      %           ( K.NE.1 .AND. MOD( K, NB ).EQ.1 )
03335             IF( .NOT. BORDER ) THEN
03336                CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
03337      $              ILOC1, JLOC1, TRSRC1, TCSRC1 )
03338                IF( MYROW.EQ.TRSRC1 .AND. MYCOL.EQ.TCSRC1 ) THEN
03339                   ELEM1 = T((JLOC1-1)*LLDT+ILOC1)
03340                   IF( K.LT.N ) THEN
03341                      ELEM3 = T((JLOC1-1)*LLDT+ILOC1+1)
03342                   ELSE
03343                      ELEM3 = ZERO
03344                   END IF
03345                   IF( ELEM3.NE.ZERO ) THEN
03346                      ELEM2 = T((JLOC1)*LLDT+ILOC1)
03347                      ELEM4 = T((JLOC1)*LLDT+ILOC1+1)
03348                      CALL SLANV2( ELEM1, ELEM2, ELEM3, ELEM4,
03349      $                    WR( K ), WI( K ), WR( K+1 ), WI( K+1 ), SN,
03350      $                    CS )
03351                      PAIR = .TRUE.
03352                   ELSE
03353                      IF( K.GT.1 ) THEN
03354                         TMP = T((JLOC1-2)*LLDT+ILOC1)
03355                         IF( TMP.NE.ZERO ) THEN
03356                            ELEM1 = T((JLOC1-2)*LLDT+ILOC1-1)
03357                            ELEM2 = T((JLOC1-1)*LLDT+ILOC1-1)
03358                            ELEM3 = T((JLOC1-2)*LLDT+ILOC1)
03359                            ELEM4 = T((JLOC1-1)*LLDT+ILOC1)
03360                            CALL SLANV2( ELEM1, ELEM2, ELEM3, ELEM4,
03361      $                          WR( K-1 ), WI( K-1 ), WR( K ),
03362      $                          WI( K ), SN, CS )
03363                         ELSE
03364                            WR( K ) = ELEM1
03365                         END IF
03366                      ELSE
03367                         WR( K ) = ELEM1
03368                      END IF
03369                   END IF
03370                END IF
03371             END IF
03372          ELSE
03373             PAIR = .FALSE.
03374          END IF
03375  560  CONTINUE
03376 *
03377 *     Loop 570: extract eigenvalues from the blocks which are laid
03378 *     out across a border of the processor mesh. The processors are
03379 *     numbered as below:
03380 *
03381 *                1 | 2
03382 *                --+--
03383 *                3 | 4
03384 *
03385       DO 570 K = NB, N-1, NB
03386          CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
03387      $        ILOC1, JLOC1, TRSRC1, TCSRC1 )
03388          CALL INFOG2L( K, K+1, DESCT, NPROW, NPCOL, MYROW, MYCOL,
03389      $        ILOC2, JLOC2, TRSRC2, TCSRC2 )
03390          CALL INFOG2L( K+1, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
03391      $        ILOC3, JLOC3, TRSRC3, TCSRC3 )
03392          CALL INFOG2L( K+1, K+1, DESCT, NPROW, NPCOL, MYROW, MYCOL,
03393      $        ILOC4, JLOC4, TRSRC4, TCSRC4 )
03394          IF( MYROW.EQ.TRSRC2 .AND. MYCOL.EQ.TCSRC2 ) THEN
03395             ELEM2 = T((JLOC2-1)*LLDT+ILOC2)
03396             IF( TRSRC1.NE.TRSRC2 .OR. TCSRC1.NE.TCSRC2 )
03397      $         CALL SGESD2D( ICTXT, 1, 1, ELEM2, 1, TRSRC1, TCSRC1 )
03398          END IF
03399          IF( MYROW.EQ.TRSRC3 .AND. MYCOL.EQ.TCSRC3 ) THEN
03400             ELEM3 = T((JLOC3-1)*LLDT+ILOC3)
03401             IF( TRSRC1.NE.TRSRC3 .OR. TCSRC1.NE.TCSRC3 )
03402      $         CALL SGESD2D( ICTXT, 1, 1, ELEM3, 1, TRSRC1, TCSRC1 )
03403          END IF
03404          IF( MYROW.EQ.TRSRC4 .AND. MYCOL.EQ.TCSRC4 ) THEN
03405             WORK(1) = T((JLOC4-1)*LLDT+ILOC4)
03406             IF( K+1.LT.N ) THEN
03407                WORK(2) = T((JLOC4-1)*LLDT+ILOC4+1)
03408             ELSE
03409                WORK(2) = ZERO
03410             END IF
03411             IF( TRSRC1.NE.TRSRC4 .OR. TCSRC1.NE.TCSRC4 )
03412      $         CALL SGESD2D( ICTXT, 2, 1, WORK, 2, TRSRC1, TCSRC1 )
03413          END IF
03414          IF( MYROW.EQ.TRSRC1 .AND. MYCOL.EQ.TCSRC1 ) THEN
03415             ELEM1 = T((JLOC1-1)*LLDT+ILOC1)
03416             IF( TRSRC1.NE.TRSRC2 .OR. TCSRC1.NE.TCSRC2 )
03417      $         CALL SGERV2D( ICTXT, 1, 1, ELEM2, 1, TRSRC2, TCSRC2 )
03418             IF( TRSRC1.NE.TRSRC3 .OR. TCSRC1.NE.TCSRC3 )
03419      $         CALL SGERV2D( ICTXT, 1, 1, ELEM3, 1, TRSRC3, TCSRC3 )
03420             IF( TRSRC1.NE.TRSRC4 .OR. TCSRC1.NE.TCSRC4 )
03421      $         CALL SGERV2D( ICTXT, 2, 1, WORK, 2, TRSRC4, TCSRC4 )
03422             ELEM4 = WORK(1)
03423             ELEM5 = WORK(2)
03424             IF( ELEM5.EQ.ZERO ) THEN
03425                IF( WR( K ).EQ.ZERO .AND. WI( K ).EQ.ZERO ) THEN
03426                   CALL SLANV2( ELEM1, ELEM2, ELEM3, ELEM4, WR( K ),
03427      $                 WI( K ), WR( K+1 ), WI( K+1 ), SN, CS )
03428                ELSEIF( WR( K+1 ).EQ.ZERO .AND. WI( K+1 ).EQ.ZERO ) THEN
03429                   WR( K+1 ) = ELEM4
03430                END IF
03431             ELSEIF( WR( K ).EQ.ZERO .AND. WI( K ).EQ.ZERO ) THEN
03432                WR( K ) = ELEM1
03433             END IF
03434          END IF
03435  570  CONTINUE
03436 *
03437       IF( NPROCS.GT.1 ) THEN
03438          CALL SGSUM2D( ICTXT, 'All', TOP, N, 1, WR, N, -1, -1 )
03439          CALL SGSUM2D( ICTXT, 'All', TOP, N, 1, WI, N, -1, -1 )
03440       END IF
03441 *
03442 *     Store storage requirements in workspaces.
03443 *
03444       WORK( 1 ) = FLOAT(LWMIN)
03445       IWORK( 1 ) = LIWMIN
03446 *
03447 *     Return to calling program.
03448 *
03449       RETURN
03450 *
03451 *     End of PSTRORD
03452 *
03453       END
03454 *