ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pslaqr2.f
Go to the documentation of this file.
00001       SUBROUTINE PSLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA,
00002      $                    ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT,
00003      $                    V, LDV, WR, WI, WORK, LWORK )
00004 *
00005 *     Contribution from the Department of Computing Science and HPC2N,
00006 *     Umea University, Sweden
00007 *
00008 *  -- ScaLAPACK routine (version 2.0.2) --
00009 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
00010 *     May 1 2012
00011 *
00012       IMPLICIT NONE
00013 *
00014 *     .. Scalar Arguments ..
00015       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDT, LDV, LWORK, N, ND,
00016      $                   NS, NW
00017       LOGICAL            WANTT, WANTZ
00018 *     ..
00019 *     .. Array Arguments ..
00020       INTEGER            DESCA( * ), DESCZ( * )
00021       REAL               A( * ), SI( KBOT ), SR( KBOT ), T( LDT, * ),
00022      $                   V( LDV, * ), WORK( * ), WI( * ), WR( * ),
00023      $                   Z( * )
00024 *     ..
00025 *
00026 *  Purpose
00027 *  =======
00028 *
00029 *  Aggressive early deflation:
00030 *
00031 *  PSLAQR2 accepts as input an upper Hessenberg matrix A and performs an
00032 *  orthogonal similarity transformation designed to detect and deflate
00033 *  fully converged eigenvalues from a trailing principal submatrix.  On
00034 *  output A has been overwritten by a new Hessenberg matrix that is a
00035 *  perturbation of an orthogonal similarity transformation of A.  It is
00036 *  to be hoped that the final version of H has many zero subdiagonal
00037 *  entries.
00038 *
00039 *  This routine handles small deflation windows which is affordable by
00040 *  one processor. Normally, it is called by PSLAQR1. All the inputs are
00041 *  assumed to be valid without checking.
00042 *
00043 *  Notes
00044 *  =====
00045 *
00046 *  Each global data object is described by an associated description
00047 *  vector.  This vector stores the information required to establish
00048 *  the mapping between an object element and its corresponding process
00049 *  and memory location.
00050 *
00051 *  Let A be a generic term for any 2D block cyclicly distributed array.
00052 *  Such a global array has an associated description vector DESCA.
00053 *  In the following comments, the character _ should be read as
00054 *  "of the global array".
00055 *
00056 *  NOTATION        STORED IN      EXPLANATION
00057 *  --------------- -------------- --------------------------------------
00058 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00059 *                                 DTYPE_A = 1.
00060 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00061 *                                 the BLACS process grid A is distribu-
00062 *                                 ted over. The context itself is glo-
00063 *                                 bal, but the handle (the integer
00064 *                                 value) may vary.
00065 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00066 *                                 array A.
00067 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00068 *                                 array A.
00069 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00070 *                                 the rows of the array.
00071 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00072 *                                 the columns of the array.
00073 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00074 *                                 row of the array A is distributed.
00075 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00076 *                                 first column of the array A is
00077 *                                 distributed.
00078 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00079 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00080 *
00081 *  Let K be the number of rows or columns of a distributed matrix,
00082 *  and assume that its process grid has dimension p x q.
00083 *  LOCr( K ) denotes the number of elements of K that a process
00084 *  would receive if K were distributed over the p processes of its
00085 *  process column.
00086 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00087 *  process would receive if K were distributed over the q processes of
00088 *  its process row.
00089 *  The values of LOCr() and LOCc() may be determined via a call to the
00090 *  ScaLAPACK tool function, NUMROC:
00091 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00092 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00093 *  An upper bound for these quantities may be computed by:
00094 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00095 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00096 *
00097 *  Arguments
00098 *  =========
00099 *
00100 *  WANTT   (global input) LOGICAL
00101 *          If .TRUE., then the Hessenberg matrix H is fully updated
00102 *          so that the quasi-triangular Schur factor may be
00103 *          computed (in cooperation with the calling subroutine).
00104 *          If .FALSE., then only enough of H is updated to preserve
00105 *          the eigenvalues.
00106 *
00107 *  WANTZ   (global input) LOGICAL
00108 *          If .TRUE., then the orthogonal matrix Z is updated so
00109 *          so that the orthogonal Schur factor may be computed
00110 *          (in cooperation with the calling subroutine).
00111 *          If .FALSE., then Z is not referenced.
00112 *
00113 *  N       (global input) INTEGER
00114 *          The order of the matrix H and (if WANTZ is .TRUE.) the
00115 *          order of the orthogonal matrix Z.
00116 *
00117 *  KTOP    (global input) INTEGER
00118 *  KBOT    (global input) INTEGER
00119 *          It is assumed without a check that either
00120 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
00121 *          determine an isolated block along the diagonal of the
00122 *          Hessenberg matrix. However, H(KTOP,KTOP-1)=0 is not
00123 *          essentially necessary if WANTT is .TRUE. .
00124 *
00125 *  NW      (global input) INTEGER
00126 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
00127 *          Normally NW .GE. 3 if PSLAQR2 is called by PSLAQR1.
00128 *
00129 *  A       (local input/output) REAL             array, dimension
00130 *          (DESCH(LLD_),*)
00131 *          On input the initial N-by-N section of A stores the
00132 *          Hessenberg matrix undergoing aggressive early deflation.
00133 *          On output A has been transformed by an orthogonal
00134 *          similarity transformation, perturbed, and the returned
00135 *          to Hessenberg form that (it is to be hoped) has some
00136 *          zero subdiagonal entries.
00137 *
00138 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00139 *          The array descriptor for the distributed matrix A.
00140 *
00141 *  ILOZ    (global input) INTEGER
00142 *  IHIZ    (global input) INTEGER
00143 *          Specify the rows of Z to which transformations must be
00144 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
00145 *
00146 *  Z       (input/output) REAL             array, dimension
00147 *          (DESCH(LLD_),*)
00148 *          IF WANTZ is .TRUE., then on output, the orthogonal
00149 *          similarity transformation mentioned above has been
00150 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
00151 *          If WANTZ is .FALSE., then Z is unreferenced.
00152 *
00153 *  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
00154 *          The array descriptor for the distributed matrix Z.
00155 *
00156 *  NS      (global output) INTEGER
00157 *          The number of unconverged (ie approximate) eigenvalues
00158 *          returned in SR and SI that may be used as shifts by the
00159 *          calling subroutine.
00160 *
00161 *  ND      (global output) INTEGER
00162 *          The number of converged eigenvalues uncovered by this
00163 *          subroutine.
00164 *
00165 *  SR      (global output) REAL             array, dimension KBOT
00166 *  SI      (global output) REAL             array, dimension KBOT
00167 *          On output, the real and imaginary parts of approximate
00168 *          eigenvalues that may be used for shifts are stored in
00169 *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
00170 *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
00171 *          On proc #0, the real and imaginary parts of converged
00172 *          eigenvalues are stored in SR(KBOT-ND+1) through SR(KBOT) and
00173 *          SI(KBOT-ND+1) through SI(KBOT), respectively. On other
00174 *          processors, these entries are set to zero.
00175 *
00176 *  T       (local workspace) REAL             array, dimension LDT*NW.
00177 *
00178 *  LDT     (local input) INTEGER
00179 *          The leading dimension of the array T.
00180 *          LDT >= NW.
00181 *
00182 *  V       (local workspace) REAL             array, dimension LDV*NW.
00183 *
00184 *  LDV     (local input) INTEGER
00185 *          The leading dimension of the array V.
00186 *          LDV >= NW.
00187 *
00188 *  WR      (local workspace) REAL             array, dimension KBOT.
00189 *  WI      (local workspace) REAL             array, dimension KBOT.
00190 *
00191 *  WORK    (local workspace) REAL             array, dimension LWORK.
00192 *
00193 *  LWORK   (local input) INTEGER
00194 *          WORK(LWORK) is a local array and LWORK is assumed big enough
00195 *          so that LWORK >= NW*NW.
00196 *
00197 *  ================================================================
00198 *  Implemented by
00199 *        Meiyue Shao, Department of Computing Science and HPC2N,
00200 *        Umea University, Sweden
00201 *
00202 *  ================================================================
00203 *  References:
00204 *        B. Kagstrom, D. Kressner, and M. Shao,
00205 *        On Aggressive Early Deflation in Parallel Variants of the QR
00206 *        Algorithm.
00207 *        Para 2010, to appear.
00208 *
00209 *  ================================================================
00210 *     .. Parameters ..
00211       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00212      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00213       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00214      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00215      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00216       REAL               ZERO, ONE
00217       PARAMETER          ( ZERO = 0.0, ONE = 1.0 )
00218 *     ..
00219 *     .. Local Scalars ..
00220       INTEGER            CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
00221      $                   ICOL2, INFO, II, IROW, IROW1, IROW2, ITMP1,
00222      $                   ITMP2, J, JAFIRST, JJ, K, L, LDA, LDZ, LLDTMP,
00223      $                   MYCOL, MYROW, NODE, NPCOL, NPROW, DBLK,
00224      $                   HSTEP, VSTEP, KKROW, KKCOL, KLN, LTOP, LEFT,
00225      $                   RIGHT, UP, DOWN, D1, D2
00226 *     ..
00227 *     .. Local Arrays ..
00228       INTEGER            DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
00229      $                   DESCWV( 9 )
00230 *     ..
00231 *     .. External Functions ..
00232       INTEGER            NUMROC
00233       EXTERNAL           NUMROC
00234 *     ..
00235 *     .. External Subroutines ..
00236       EXTERNAL           BLACS_GRIDINFO, INFOG2L, SLASET,
00237      $                   SLAQR3, DESCINIT, PSGEMM, PSGEMR2D, SGEMM,
00238      $                   SLAMOV, SGESD2D, SGERV2D, SGEBS2D, SGEBR2D,
00239      $                   IGEBS2D, IGEBR2D
00240 *     ..
00241 *     .. Intrinsic Functions ..
00242       INTRINSIC          MAX, MIN, MOD
00243 *     ..
00244 *     .. Executable Statements ..
00245 *
00246       INFO = 0
00247 *
00248       IF( N.EQ.0 )
00249      $   RETURN
00250 *
00251 *     NODE (IAFIRST,JAFIRST) OWNS A(1,1)
00252 *
00253       HBL = DESCA( MB_ )
00254       CONTXT = DESCA( CTXT_ )
00255       LDA = DESCA( LLD_ )
00256       IAFIRST = DESCA( RSRC_ )
00257       JAFIRST = DESCA( CSRC_ )
00258       LDZ = DESCZ( LLD_ )
00259       CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
00260       NODE = MYROW*NPCOL + MYCOL
00261       LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
00262       RIGHT = MOD( MYCOL+1, NPCOL )
00263       UP = MOD( MYROW+NPROW-1, NPROW )
00264       DOWN = MOD( MYROW+1, NPROW )
00265 *
00266 *     I1 and I2 are the indices of the first row and last column of A
00267 *     to which transformations must be applied.
00268 *
00269       I = KBOT
00270       L = KTOP
00271       IF( WANTT ) THEN
00272          I1 = 1
00273          I2 = N
00274          LTOP = 1
00275       ELSE
00276          I1 = L
00277          I2 = I
00278          LTOP = L
00279       END IF
00280 *
00281 *     Begin Aggressive Early Deflation.
00282 *
00283       DBLK = NW
00284       CALL INFOG2L( I-DBLK+1, I-DBLK+1, DESCA, NPROW, NPCOL, MYROW,
00285      $     MYCOL, IROW, ICOL, II, JJ )
00286       IF ( MYROW .EQ. II ) THEN
00287          CALL DESCINIT( DESCT, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
00288      $        LDT, INFO )
00289          CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
00290      $        LDV, INFO )
00291       ELSE
00292          CALL DESCINIT( DESCT, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
00293      $        1, INFO )
00294          CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
00295      $        1, INFO )
00296       END IF
00297       CALL PSGEMR2D( DBLK, DBLK, A, I-DBLK+1, I-DBLK+1, DESCA, T, 1, 1,
00298      $     DESCT, CONTXT )
00299       IF ( MYROW .EQ. II .AND. MYCOL .EQ. JJ ) THEN
00300          CALL SLASET( 'All', DBLK, DBLK, ZERO, ONE, V, LDV )
00301          CALL SLAQR3( .TRUE., .TRUE., DBLK, 1, DBLK, DBLK-1, T, LDT, 1,
00302      $        DBLK, V, LDV, NS, ND, WR, WI, WORK, DBLK, DBLK,
00303      $        WORK( DBLK*DBLK+1 ), DBLK, DBLK, WORK( 2*DBLK*DBLK+1 ),
00304      $        DBLK, WORK( 3*DBLK*DBLK+1 ), LWORK-3*DBLK*DBLK )
00305          CALL SGEBS2D( CONTXT, 'All', ' ', DBLK, DBLK, V, LDV )
00306          CALL IGEBS2D( CONTXT, 'All', ' ', 1, 1, ND, 1 )
00307       ELSE
00308          CALL SGEBR2D( CONTXT, 'All', ' ', DBLK, DBLK, V, LDV, II, JJ )
00309          CALL IGEBR2D( CONTXT, 'All', ' ', 1, 1, ND, 1, II, JJ )
00310       END IF
00311 *
00312       IF( ND .GT. 0 ) THEN
00313 *
00314 *        Copy the local matrix back to the diagonal block.
00315 *
00316          CALL PSGEMR2D( DBLK, DBLK, T, 1, 1, DESCT, A, I-DBLK+1,
00317      $        I-DBLK+1, DESCA, CONTXT )
00318 *
00319 *        Update T and Z.
00320 *
00321          IF( MOD( I-DBLK, HBL )+DBLK .LE. HBL ) THEN
00322 *
00323 *           Simplest case: the deflation window is located on one
00324 *           processor.
00325 *           Call SGEMM directly to perform the update.
00326 *
00327             HSTEP = LWORK / DBLK
00328             VSTEP = HSTEP
00329 *
00330 *           Update horizontal slab in A.
00331 *
00332             IF( WANTT ) THEN
00333                CALL INFOG2L( I-DBLK+1, I+1, DESCA, NPROW, NPCOL, MYROW,
00334      $              MYCOL, IROW, ICOL, II, JJ )
00335                IF( MYROW .EQ. II ) THEN
00336                   ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
00337                   DO 10 KKCOL = ICOL, ICOL1, HSTEP
00338                      KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
00339                      CALL SGEMM( 'T', 'N', DBLK, KLN, DBLK, ONE, V,
00340      $                    LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO, WORK,
00341      $                    DBLK )
00342                      CALL SLAMOV( 'A', DBLK, KLN, WORK, DBLK,
00343      $                    A( IROW+(KKCOL-1)*LDA ), LDA )
00344    10             CONTINUE
00345                END IF
00346             END IF
00347 *
00348 *           Update vertical slab in A.
00349 *
00350             CALL INFOG2L( LTOP, I-DBLK+1, DESCA, NPROW, NPCOL, MYROW,
00351      $           MYCOL, IROW, ICOL, II, JJ )
00352             IF( MYCOL .EQ. JJ ) THEN
00353                CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
00354      $              MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00355                IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00356                DO 20 KKROW = IROW, IROW1, VSTEP
00357                   KLN = MIN( VSTEP, IROW1-KKROW+1 )
00358                   CALL SGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
00359      $                 A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ZERO, WORK,
00360      $                 KLN )
00361                   CALL SLAMOV( 'A', KLN, DBLK, WORK, KLN,
00362      $                 A( KKROW+(ICOL-1)*LDA ), LDA )
00363    20          CONTINUE
00364             END IF
00365 *
00366 *           Update vertical slab in Z.
00367 *
00368             IF( WANTZ ) THEN
00369                CALL INFOG2L( ILOZ, I-DBLK+1, DESCZ, NPROW, NPCOL, MYROW,
00370      $              MYCOL, IROW, ICOL, II, JJ )
00371                IF( MYCOL .EQ. JJ ) THEN
00372                   CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
00373      $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00374                   IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00375                   DO 30 KKROW = IROW, IROW1, VSTEP
00376                      KLN = MIN( VSTEP, IROW1-KKROW+1 )
00377                      CALL SGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
00378      $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
00379      $                    WORK, KLN )
00380                      CALL SLAMOV( 'A', KLN, DBLK, WORK, KLN,
00381      $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ )
00382    30             CONTINUE
00383                END IF
00384             END IF
00385 *
00386          ELSE IF( MOD( I-DBLK, HBL )+DBLK .LE. 2*HBL ) THEN
00387 *
00388 *           More complicated case: the deflation window lay on a 2x2
00389 *           processor mesh.
00390 *           Call SGEMM locally and communicate by pair.
00391 *
00392             D1 = HBL - MOD( I-DBLK, HBL )
00393             D2 = DBLK - D1
00394             HSTEP = LWORK / DBLK
00395             VSTEP = HSTEP
00396 *
00397 *           Update horizontal slab in A.
00398 *
00399             IF( WANTT ) THEN
00400                CALL INFOG2L( I-DBLK+1, I+1, DESCA, NPROW, NPCOL, MYROW,
00401      $              MYCOL, IROW, ICOL, II, JJ )
00402                IF( MYROW .EQ. UP ) THEN
00403                   IF( MYROW .EQ. II ) THEN
00404                      ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
00405                      DO 40 KKCOL = ICOL, ICOL1, HSTEP
00406                         KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
00407                         CALL SGEMM( 'T', 'N', DBLK, KLN, DBLK, ONE, V,
00408      $                       DBLK, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO,
00409      $                       WORK, DBLK )
00410                         CALL SLAMOV( 'A', DBLK, KLN, WORK, DBLK,
00411      $                       A( IROW+(KKCOL-1)*LDA ), LDA )
00412    40                CONTINUE
00413                   END IF
00414                ELSE
00415                   IF( MYROW .EQ. II ) THEN
00416                      ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
00417                      DO 50 KKCOL = ICOL, ICOL1, HSTEP
00418                         KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
00419                         CALL SGEMM( 'T', 'N', D2, KLN, D1, ONE,
00420      $                       V( 1, D1+1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
00421      $                       LDA, ZERO, WORK( D1+1 ), DBLK )
00422                         CALL SGESD2D( CONTXT, D2, KLN, WORK( D1+1 ),
00423      $                       DBLK, DOWN, MYCOL )
00424                         CALL SGERV2D( CONTXT, D1, KLN, WORK, DBLK, DOWN,
00425      $                       MYCOL )
00426                         CALL SGEMM( 'T', 'N', D1, KLN, D1, ONE,
00427      $                       V, LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
00428      $                       WORK, DBLK )
00429                         CALL SLAMOV( 'A', D1, KLN, WORK, DBLK,
00430      $                       A( IROW+(KKCOL-1)*LDA ), LDA )
00431    50                CONTINUE
00432                   ELSE IF( UP .EQ. II ) THEN
00433                      ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
00434                      DO 60 KKCOL = ICOL, ICOL1, HSTEP
00435                         KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
00436                         CALL SGEMM( 'T', 'N', D1, KLN, D2, ONE,
00437      $                       V( D1+1, 1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
00438      $                       LDA, ZERO, WORK, DBLK )
00439                         CALL SGESD2D( CONTXT, D1, KLN, WORK, DBLK, UP,
00440      $                       MYCOL )
00441                         CALL SGERV2D( CONTXT, D2, KLN, WORK( D1+1 ),
00442      $                       DBLK, UP, MYCOL )
00443                         CALL SGEMM( 'T', 'N', D2, KLN, D2, ONE,
00444      $                       V( D1+1, D1+1 ), LDV,
00445      $                       A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
00446      $                       WORK( D1+1 ), DBLK )
00447                         CALL SLAMOV( 'A', D2, KLN, WORK( D1+1 ), DBLK,
00448      $                       A( IROW+(KKCOL-1)*LDA ), LDA )
00449    60                CONTINUE
00450                   END IF
00451                END IF
00452             END IF
00453 *
00454 *           Update vertical slab in A.
00455 *
00456             CALL INFOG2L( LTOP, I-DBLK+1, DESCA, NPROW, NPCOL, MYROW,
00457      $           MYCOL, IROW, ICOL, II, JJ )
00458             IF( MYCOL .EQ. LEFT ) THEN
00459                IF( MYCOL .EQ. JJ ) THEN
00460                   CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
00461      $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00462                   IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00463                   DO 70 KKROW = IROW, IROW1, VSTEP
00464                      KLN = MIN( VSTEP, IROW1-KKROW+1 )
00465                      CALL SGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
00466      $                    A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ZERO,
00467      $                    WORK, KLN )
00468                      CALL SLAMOV( 'A', KLN, DBLK, WORK, KLN,
00469      $                    A( KKROW+(ICOL-1)*LDA ), LDA )
00470    70             CONTINUE
00471                END IF
00472             ELSE
00473                IF( MYCOL .EQ. JJ ) THEN
00474                   CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
00475      $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00476                   IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00477                   DO 80 KKROW = IROW, IROW1, VSTEP
00478                      KLN = MIN( VSTEP, IROW1-KKROW+1 )
00479                      CALL SGEMM( 'N', 'N', KLN, D2, D1, ONE,
00480      $                    A( KKROW+(ICOL-1)*LDA ), LDA,
00481      $                    V( 1, D1+1 ), LDV, ZERO, WORK( 1+D1*KLN ),
00482      $                    KLN )
00483                      CALL SGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
00484      $                    KLN, MYROW, RIGHT )
00485                      CALL SGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
00486      $                    RIGHT )
00487                      CALL SGEMM( 'N', 'N', KLN, D1, D1, ONE,
00488      $                    A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ONE,
00489      $                    WORK, KLN )
00490                      CALL SLAMOV( 'A', KLN, D1, WORK, KLN,
00491      $                    A( KKROW+(ICOL-1)*LDA ), LDA )
00492    80             CONTINUE
00493                ELSE IF ( LEFT .EQ. JJ ) THEN
00494                   CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
00495      $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00496                   IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00497                   DO 90 KKROW = IROW, IROW1, VSTEP
00498                      KLN = MIN( VSTEP, IROW1-KKROW+1 )
00499                      CALL SGEMM( 'N', 'N', KLN, D1, D2, ONE,
00500      $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, 1 ),
00501      $                    LDV, ZERO, WORK, KLN )
00502                      CALL SGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
00503      $                    LEFT )
00504                      CALL SGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
00505      $                    KLN, MYROW, LEFT )
00506                      CALL SGEMM( 'N', 'N', KLN, D2, D2, ONE,
00507      $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, D1+1 ),
00508      $                    LDV, ONE, WORK( 1+D1*KLN ), KLN )
00509                      CALL SLAMOV( 'A', KLN, D2, WORK( 1+D1*KLN ), KLN,
00510      $                    A( KKROW+(ICOL-1)*LDA ), LDA )
00511    90             CONTINUE
00512                END IF
00513             END IF
00514 *
00515 *           Update vertical slab in Z.
00516 *
00517             IF( WANTZ ) THEN
00518                CALL INFOG2L( ILOZ, I-DBLK+1, DESCZ, NPROW, NPCOL, MYROW,
00519      $              MYCOL, IROW, ICOL, II, JJ )
00520                IF( MYCOL .EQ. LEFT ) THEN
00521                   IF( MYCOL .EQ. JJ ) THEN
00522                      CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
00523      $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00524                      IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00525                      DO 100 KKROW = IROW, IROW1, VSTEP
00526                         KLN = MIN( VSTEP, IROW1-KKROW+1 )
00527                         CALL SGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
00528      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
00529      $                       WORK, KLN )
00530                         CALL SLAMOV( 'A', KLN, DBLK, WORK, KLN,
00531      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ )
00532   100                CONTINUE
00533                   END IF
00534                ELSE
00535                   IF( MYCOL .EQ. JJ ) THEN
00536                      CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
00537      $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00538                      IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00539                      DO 110 KKROW = IROW, IROW1, VSTEP
00540                         KLN = MIN( VSTEP, IROW1-KKROW+1 )
00541                         CALL SGEMM( 'N', 'N', KLN, D2, D1, ONE,
00542      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,
00543      $                       V( 1, D1+1 ), LDV, ZERO, WORK( 1+D1*KLN ),
00544      $                       KLN )
00545                         CALL SGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
00546      $                       KLN, MYROW, RIGHT )
00547                         CALL SGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
00548      $                       RIGHT )
00549                         CALL SGEMM( 'N', 'N', KLN, D1, D1, ONE,
00550      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ONE,
00551      $                       WORK, KLN )
00552                         CALL SLAMOV( 'A', KLN, D1, WORK, KLN,
00553      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ )
00554   110                CONTINUE
00555                   ELSE IF( LEFT .EQ. JJ ) THEN
00556                      CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
00557      $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
00558                      IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
00559                      DO 120 KKROW = IROW, IROW1, VSTEP
00560                         KLN = MIN( VSTEP, IROW1-KKROW+1 )
00561                         CALL SGEMM( 'N', 'N', KLN, D1, D2, ONE,
00562      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,
00563      $                       V( D1+1, 1 ), LDV, ZERO, WORK, KLN )
00564                         CALL SGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
00565      $                       LEFT )
00566                         CALL SGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
00567      $                       KLN, MYROW, LEFT )
00568                         CALL SGEMM( 'N', 'N', KLN, D2, D2, ONE,
00569      $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,
00570      $                       V( D1+1, D1+1 ), LDV, ONE,
00571      $                       WORK( 1+D1*KLN ), KLN )
00572                         CALL SLAMOV( 'A', KLN, D2, WORK( 1+D1*KLN ),
00573      $                       KLN, Z( KKROW+(ICOL-1)*LDZ ), LDZ )
00574   120                CONTINUE
00575                   END IF
00576                END IF
00577             END IF
00578 *
00579          ELSE
00580 *
00581 *           Most complicated case: the deflation window lay across the
00582 *           border of the processor mesh.
00583 *           Treat V as a distributed matrix and call PSGEMM.
00584 *
00585             HSTEP = LWORK / DBLK * NPCOL
00586             VSTEP = LWORK / DBLK * NPROW
00587             LLDTMP = NUMROC( DBLK, DBLK, MYROW, 0, NPROW )
00588             LLDTMP = MAX( 1, LLDTMP )
00589             CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, 0, 0, CONTXT,
00590      $           LLDTMP, INFO )
00591             CALL DESCINIT( DESCWH, DBLK, HSTEP, DBLK, LWORK / DBLK, 0,
00592      $           0, CONTXT, LLDTMP, INFO )
00593 *
00594 *           Update horizontal slab in A.
00595 *
00596             IF( WANTT ) THEN
00597                DO 130 KKCOL = I+1, N, HSTEP
00598                   KLN = MIN( HSTEP, N-KKCOL+1 )
00599                   CALL PSGEMM( 'T', 'N', DBLK, KLN, DBLK, ONE, V, 1, 1,
00600      $                 DESCV, A, I-DBLK+1, KKCOL, DESCA, ZERO, WORK, 1,
00601      $                 1, DESCWH )
00602                   CALL PSGEMR2D( DBLK, KLN, WORK, 1, 1, DESCWH, A,
00603      $                 I-DBLK+1, KKCOL, DESCA, CONTXT )
00604   130          CONTINUE
00605             END IF
00606 *
00607 *           Update vertical slab in A.
00608 *
00609             DO 140 KKROW = LTOP, I-DBLK, VSTEP
00610                KLN = MIN( VSTEP, I-DBLK-KKROW+1 )
00611                LLDTMP = NUMROC( KLN, LWORK / DBLK, MYROW, 0, NPROW )
00612                LLDTMP = MAX( 1, LLDTMP )
00613                CALL DESCINIT( DESCWV, KLN, DBLK, LWORK / DBLK, DBLK, 0,
00614      $              0, CONTXT, LLDTMP, INFO )
00615                CALL PSGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE, A, KKROW,
00616      $              I-DBLK+1, DESCA, V, 1, 1, DESCV, ZERO, WORK, 1, 1,
00617      $              DESCWV )
00618                CALL PSGEMR2D( KLN, DBLK, WORK, 1, 1, DESCWV, A, KKROW,
00619      $              I-DBLK+1, DESCA, CONTXT )
00620   140       CONTINUE
00621 *
00622 *           Update vertical slab in Z.
00623 *
00624             IF( WANTZ ) THEN
00625                DO 150 KKROW = ILOZ, IHIZ, VSTEP
00626                   KLN = MIN( VSTEP, IHIZ-KKROW+1 )
00627                   LLDTMP = NUMROC( KLN, LWORK / DBLK, MYROW, 0, NPROW )
00628                   LLDTMP = MAX( 1, LLDTMP )
00629                   CALL DESCINIT( DESCWV, KLN, DBLK, LWORK / DBLK, DBLK,
00630      $                 0, 0, CONTXT, LLDTMP, INFO )
00631                   CALL PSGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE, Z, KKROW,
00632      $                 I-DBLK+1, DESCZ, V, 1, 1, DESCV, ZERO, WORK, 1,
00633      $                 1, DESCWV )
00634                   CALL PSGEMR2D( KLN, DBLK, WORK, 1, 1, DESCWV, Z,
00635      $                 KKROW, I-DBLK+1, DESCZ, CONTXT )
00636   150          CONTINUE
00637             END IF
00638          END IF
00639 *
00640 *        Extract converged eigenvalues.
00641 *
00642          II = 0
00643   160    CONTINUE
00644             IF( II .EQ. ND-1 .OR. WI( DBLK-II ) .EQ. ZERO ) THEN
00645                IF( NODE .EQ. 0 ) THEN
00646                   SR( I-II ) = WR( DBLK-II )
00647                ELSE
00648                   SR( I-II ) = ZERO
00649                END IF
00650                SI( I-II ) = ZERO
00651                II = II + 1
00652             ELSE
00653                IF( NODE .EQ. 0 ) THEN
00654                   SR( I-II-1 ) = WR( DBLK-II-1 )
00655                   SR( I-II ) = WR( DBLK-II )
00656                   SI( I-II-1 ) = WI( DBLK-II-1 )
00657                   SI( I-II ) = WI( DBLK-II )
00658                ELSE
00659                   SR( I-II-1 ) = ZERO
00660                   SR( I-II ) = ZERO
00661                   SI( I-II-1 ) = ZERO
00662                   SI( I-II ) = ZERO
00663                END IF
00664                II = II + 2
00665             END IF
00666          IF( II .LT. ND ) GOTO 160
00667       END IF
00668 *
00669 *     END OF PSLAQR2
00670 *
00671       END