ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaed2.f
Go to the documentation of this file.
00001       SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ,
00002      $                    RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM,
00003      $                    NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN,
00004      $                    NN1, NN2, IB1, IB2 )
00005 *
00006 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00007 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00008 *     and University of California, Berkeley.
00009 *     December 31, 1998
00010 *
00011 *     .. Scalar Arguments ..
00012       INTEGER            DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,
00013      $                   N1, NB, NN, NN1, NN2, NPCOL
00014       DOUBLE PRECISION   RHO
00015 *     ..
00016 *     .. Array Arguments ..
00017       INTEGER            COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),
00018      $                   INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),
00019      $                   PSM( 0: NPCOL-1, 4 )
00020       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ),
00021      $                   Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )
00022 *     ..
00023 *
00024 *  Purpose
00025 *  =======
00026 *
00027 *  PDLAED2 sorts the two sets of eigenvalues together into a single
00028 *  sorted set.  Then it tries to deflate the size of the problem.
00029 *  There are two ways in which deflation can occur:  when two or more
00030 *  eigenvalues are close together or if there is a tiny entry in the
00031 *  Z vector.  For each such occurrence the order of the related secular
00032 *  equation problem is reduced by one.
00033 *
00034 *  Arguments
00035 *  =========
00036 *
00037 *  ICTXT  (global input) INTEGER
00038 *         The BLACS context handle, indicating the global context of
00039 *         the operation on the matrix. The context itself is global.
00040 *
00041 *  K      (output) INTEGER
00042 *         The number of non-deflated eigenvalues, and the order of the
00043 *         related secular equation. 0 <= K <=N.
00044 *
00045 *  N      (input) INTEGER
00046 *         The dimension of the symmetric tridiagonal matrix.  N >= 0.
00047 *
00048 *  N1     (input) INTEGER
00049 *         The location of the last eigenvalue in the leading sub-matrix.
00050 *         min(1,N) < N1 < N.
00051 *
00052 *  NB      (global input) INTEGER
00053 *          The blocking factor used to distribute the columns of the
00054 *          matrix. NB >= 1.
00055 *
00056 *  D      (input/output) DOUBLE PRECISION array, dimension (N)
00057 *         On entry, D contains the eigenvalues of the two submatrices to
00058 *         be combined.
00059 *         On exit, D contains the trailing (N-K) updated eigenvalues
00060 *         (those which were deflated) sorted into increasing order.
00061 *
00062 *  DROW   (global input) INTEGER
00063 *          The process row over which the first row of the matrix D is
00064 *          distributed. 0 <= DROW < NPROW.
00065 *
00066 *  DCOL   (global input) INTEGER
00067 *          The process column over which the first column of the
00068 *          matrix D is distributed. 0 <= DCOL < NPCOL.
00069 *
00070 *  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
00071 *         On entry, Q contains the eigenvectors of two submatrices in
00072 *         the two square blocks with corners at (1,1), (N1,N1)
00073 *         and (N1+1, N1+1), (N,N).
00074 *         On exit, Q contains the trailing (N-K) updated eigenvectors
00075 *         (those which were deflated) in its last N-K columns.
00076 *
00077 *  LDQ    (input) INTEGER
00078 *         The leading dimension of the array Q.  LDQ >= max(1,NQ).
00079 *
00080 *  RHO    (global input/output) DOUBLE PRECISION
00081 *         On entry, the off-diagonal element associated with the rank-1
00082 *         cut which originally split the two submatrices which are now
00083 *         being recombined.
00084 *         On exit, RHO has been modified to the value required by
00085 *         PDLAED3.
00086 *
00087 *  Z      (global input) DOUBLE PRECISION array, dimension (N)
00088 *         On entry, Z contains the updating vector (the last
00089 *         row of the first sub-eigenvector matrix and the first row of
00090 *         the second sub-eigenvector matrix).
00091 *         On exit, the contents of Z have been destroyed by the updating
00092 *         process.
00093 *
00094 *  DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
00095 *         A copy of the first K eigenvalues which will be used by
00096 *         SLAED3 to form the secular equation.
00097 *
00098 *  W      (global output) DOUBLE PRECISION array, dimension (N)
00099 *         The first k values of the final deflation-altered z-vector
00100 *         which will be passed to SLAED3.
00101 *
00102 *  Q2     (output) DOUBLE PRECISION array, dimension (LDQ2, NQ)
00103 *         A copy of the first K eigenvectors which will be used by
00104 *
00105 *  LDQ2    (input) INTEGER
00106 *         The leading dimension of the array Q2.
00107 *
00108 *  QBUF   (workspace) DOUBLE PRECISION array, dimension 3*N
00109 *
00110 *  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)
00111 *
00112 *  PSM    (workspace) INTEGER array, dimension( NPCOL, 4)
00113 *
00114 *  NPCOL   (global input) INTEGER
00115 *          The total number of columns over which the distributed
00116 *           submatrix is distributed.
00117 *
00118 *  INDX   (workspace) INTEGER array, dimension (N)
00119 *         The permutation used to sort the contents of DLAMDA into
00120 *         ascending order.
00121 *
00122 *  INDXC  (output) INTEGER array, dimension (N)
00123 *         The permutation used to arrange the columns of the deflated
00124 *         Q matrix into three groups:  the first group contains non-zero
00125 *         elements only at and above N1, the second contains
00126 *         non-zero elements only below N1, and the third is dense.
00127 *
00128 *  INDXP  (workspace) INTEGER array, dimension (N)
00129 *         The permutation used to place deflated values of D at the end
00130 *         of the array.  INDXP(1:K) points to the nondeflated D-values
00131 *         and INDXP(K+1:N) points to the deflated eigenvalues.
00132 *
00133 *  INDCOL (workspace) INTEGER array, dimension (N)
00134 *
00135 *  COLTYP (workspace/output) INTEGER array, dimension (N)
00136 *         During execution, a label which will indicate which of the
00137 *         following types a column in the Q2 matrix is:
00138 *         1 : non-zero in the upper half only;
00139 *         2 : dense;
00140 *         3 : non-zero in the lower half only;
00141 *         4 : deflated.
00142 *
00143 *  NN     (global output) INTEGER, the order of matrix U, (PDLAED1).
00144 *  NN1    (global output) INTEGER, the order of matrix Q1, (PDLAED1).
00145 *  NN2    (global output) INTEGER, the order of matrix Q2, (PDLAED1).
00146 *  IB1    (global output) INTEGER, pointeur on Q1, (PDLAED1).
00147 *  IB2    (global output) INTEGER, pointeur on Q2, (PDLAED1).
00148 *
00149 *  =====================================================================
00150 *
00151 *     .. Parameters ..
00152       DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
00153       PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
00154      $                   TWO = 2.0D0, EIGHT = 8.0D0 )
00155 *     ..
00156 *     .. Local Scalars ..
00157       INTEGER            COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,
00158      $                   JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,
00159      $                   NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ
00160       DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
00161 *     ..
00162 *     .. External Functions ..
00163       INTEGER            IDAMAX, INDXG2L, INDXL2G, NUMROC
00164       DOUBLE PRECISION   DLAPY2, PDLAMCH
00165       EXTERNAL           IDAMAX, INDXG2L, INDXL2G, NUMROC, PDLAMCH,
00166      $                   DLAPY2
00167 *     ..
00168 *     .. External Subroutines ..
00169       EXTERNAL           BLACS_GRIDINFO, BLACS_PINFO, DCOPY, DGERV2D,
00170      $                   DGESD2D, DLAPST, DROT, DSCAL, INFOG1L
00171 *     ..
00172 *     .. Intrinsic Functions ..
00173       INTRINSIC          ABS, MAX, MIN, MOD, SQRT
00174 *     ..
00175 *     .. External Functions ..
00176 *     ..
00177 *     .. Local Arrays ..
00178       INTEGER            PTT( 4 )
00179 *     ..
00180 *     .. Executable Statements ..
00181 *
00182 *     Quick return if possible
00183 *
00184       IF( N.EQ.0 )
00185      $   RETURN
00186 *
00187       CALL BLACS_PINFO( IAM, NPROCS )
00188       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00189       NP = NUMROC( N, NB, MYROW, DROW, NPROW )
00190 *
00191       N2 = N - N1
00192       N1P1 = N1 + 1
00193 *
00194       IF( RHO.LT.ZERO ) THEN
00195          CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
00196       END IF
00197 *
00198 *     Normalize z so that norm(z) = 1.  Since z is the concatenation of
00199 *     two normalized vectors, norm2(z) = sqrt(2).
00200 *
00201       T = ONE / SQRT( TWO )
00202       CALL DSCAL( N, T, Z, 1 )
00203 *
00204 *     RHO = ABS( norm(z)**2 * RHO )
00205 *
00206       RHO = ABS( TWO*RHO )
00207 *
00208 *     Calculate the allowable deflation tolerance
00209 *
00210       IMAX = IDAMAX( N, Z, 1 )
00211       JMAX = IDAMAX( N, D, 1 )
00212       EPS = PDLAMCH( ICTXT, 'Epsilon' )
00213       TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
00214 *
00215 *     If the rank-1 modifier is small enough, no more needs to be done
00216 *     except to reorganize Q so that its columns correspond with the
00217 *     elements in D.
00218 *
00219       IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
00220          K = 0
00221          GO TO 220
00222       END IF
00223 *
00224 *     If there are multiple eigenvalues then the problem deflates.  Here
00225 *     the number of equal eigenvalues are found.  As each equal
00226 *     eigenvalue is found, an elementary reflector is computed to rotate
00227 *     the corresponding eigensubspace so that the corresponding
00228 *     components of Z are zero in this new basis.
00229 *
00230 *
00231       CALL DLAPST( 'I', N, D, INDX, INFO )
00232 *
00233       DO 10 I = 1, N1
00234          COLTYP( I ) = 1
00235    10 CONTINUE
00236       DO 20 I = N1P1, N
00237          COLTYP( I ) = 3
00238    20 CONTINUE
00239       COL = DCOL
00240       DO 40 I = 1, N, NB
00241          DO 30 J = 0, NB - 1
00242             IF( I+J.LE.N )
00243      $         INDCOL( I+J ) = COL
00244    30    CONTINUE
00245          COL = MOD( COL+1, NPCOL )
00246    40 CONTINUE
00247 *
00248       K = 0
00249       K2 = N + 1
00250       DO 50 J = 1, N
00251          NJ = INDX( J )
00252          IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
00253 *
00254 *           Deflate due to small z component.
00255 *
00256             K2 = K2 - 1
00257             COLTYP( NJ ) = 4
00258             INDXP( K2 ) = NJ
00259             IF( J.EQ.N )
00260      $         GO TO 80
00261          ELSE
00262             PJ = NJ
00263             GO TO 60
00264          END IF
00265    50 CONTINUE
00266    60 CONTINUE
00267       J = J + 1
00268       NJ = INDX( J )
00269       IF( J.GT.N )
00270      $   GO TO 80
00271       IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
00272 *
00273 *        Deflate due to small z component.
00274 *
00275          K2 = K2 - 1
00276          COLTYP( NJ ) = 4
00277          INDXP( K2 ) = NJ
00278       ELSE
00279 *
00280 *        Check if eigenvalues are close enough to allow deflation.
00281 *
00282          S = Z( PJ )
00283          C = Z( NJ )
00284 *
00285 *        Find sqrt(a**2+b**2) without overflow or
00286 *        destructive underflow.
00287 *
00288          TAU = DLAPY2( C, S )
00289          T = D( NJ ) - D( PJ )
00290          C = C / TAU
00291          S = -S / TAU
00292          IF( ABS( T*C*S ).LE.TOL ) THEN
00293 *
00294 *           Deflation is possible.
00295 *
00296             Z( NJ ) = TAU
00297             Z( PJ ) = ZERO
00298             IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
00299      $         COLTYP( NJ ) = 2
00300             COLTYP( PJ ) = 4
00301             CALL INFOG1L( NJ, NB, NPCOL, MYCOL, DCOL, NJJ, NJCOL )
00302             CALL INFOG1L( PJ, NB, NPCOL, MYCOL, DCOL, PJJ, PJCOL )
00303             IF( INDCOL( PJ ).EQ.INDCOL( NJ ) .AND. MYCOL.EQ.NJCOL ) THEN
00304                CALL DROT( NP, Q( 1, PJJ ), 1, Q( 1, NJJ ), 1, C, S )
00305             ELSE IF( MYCOL.EQ.PJCOL ) THEN
00306                CALL DGESD2D( ICTXT, NP, 1, Q( 1, PJJ ), NP, MYROW,
00307      $                       NJCOL )
00308                CALL DGERV2D( ICTXT, NP, 1, QBUF, NP, MYROW, NJCOL )
00309                CALL DROT( NP, Q( 1, PJJ ), 1, QBUF, 1, C, S )
00310             ELSE IF( MYCOL.EQ.NJCOL ) THEN
00311                CALL DGESD2D( ICTXT, NP, 1, Q( 1, NJJ ), NP, MYROW,
00312      $                       PJCOL )
00313                CALL DGERV2D( ICTXT, NP, 1, QBUF, NP, MYROW, PJCOL )
00314                CALL DROT( NP, QBUF, 1, Q( 1, NJJ ), 1, C, S )
00315             END IF
00316             T = D( PJ )*C**2 + D( NJ )*S**2
00317             D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
00318             D( PJ ) = T
00319             K2 = K2 - 1
00320             I = 1
00321    70       CONTINUE
00322             IF( K2+I.LE.N ) THEN
00323                IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
00324                   INDXP( K2+I-1 ) = INDXP( K2+I )
00325                   INDXP( K2+I ) = PJ
00326                   I = I + 1
00327                   GO TO 70
00328                ELSE
00329                   INDXP( K2+I-1 ) = PJ
00330                END IF
00331             ELSE
00332                INDXP( K2+I-1 ) = PJ
00333             END IF
00334             PJ = NJ
00335          ELSE
00336             K = K + 1
00337             DLAMDA( K ) = D( PJ )
00338             W( K ) = Z( PJ )
00339             INDXP( K ) = PJ
00340             PJ = NJ
00341          END IF
00342       END IF
00343       GO TO 60
00344    80 CONTINUE
00345 *
00346 *     Record the last eigenvalue.
00347 *
00348       K = K + 1
00349       DLAMDA( K ) = D( PJ )
00350       W( K ) = Z( PJ )
00351       INDXP( K ) = PJ
00352 *
00353 *     Count up the total number of the various types of columns, then
00354 *     form a permutation which positions the four column types into
00355 *     four uniform groups (although one or more of these groups may be
00356 *     empty).
00357 *
00358       DO 100 J = 1, 4
00359          DO 90 I = 0, NPCOL - 1
00360             CTOT( I, J ) = 0
00361    90    CONTINUE
00362          PTT( J ) = 0
00363   100 CONTINUE
00364       DO 110 J = 1, N
00365          CT = COLTYP( J )
00366          COL = INDCOL( J )
00367          CTOT( COL, CT ) = CTOT( COL, CT ) + 1
00368   110 CONTINUE
00369 *
00370 *     PSM(*) = Position in SubMatrix (of types 1 through 4)
00371 *
00372       DO 120 COL = 0, NPCOL - 1
00373          PSM( COL, 1 ) = 1
00374          PSM( COL, 2 ) = 1 + CTOT( COL, 1 )
00375          PSM( COL, 3 ) = PSM( COL, 2 ) + CTOT( COL, 2 )
00376          PSM( COL, 4 ) = PSM( COL, 3 ) + CTOT( COL, 3 )
00377   120 CONTINUE
00378       PTT( 1 ) = 1
00379       DO 140 I = 2, 4
00380          CT = 0
00381          DO 130 J = 0, NPCOL - 1
00382             CT = CT + CTOT( J, I-1 )
00383   130    CONTINUE
00384          PTT( I ) = PTT( I-1 ) + CT
00385   140 CONTINUE
00386 *
00387 *     Fill out the INDXC array so that the permutation which it induces
00388 *     will place all type-1 columns first, all type-2 columns next,
00389 *     then all type-3's, and finally all type-4's.
00390 *
00391       DO 150 J = 1, N
00392          JS = INDXP( J )
00393          COL = INDCOL( JS )
00394          CT = COLTYP( JS )
00395          I = INDXL2G( PSM( COL, CT ), NB, COL, DCOL, NPCOL )
00396          INDX( J ) = I
00397          INDXC( PTT( CT ) ) = I
00398          PSM( COL, CT ) = PSM( COL, CT ) + 1
00399          PTT( CT ) = PTT( CT ) + 1
00400   150 CONTINUE
00401 *
00402 *
00403       DO 160 J = 1, N
00404          JS = INDXP( J )
00405          JJS = INDXG2L( JS, NB, J, J, NPCOL )
00406          COL = INDCOL( JS )
00407          IF( COL.EQ.MYCOL ) THEN
00408             I = INDX( J )
00409             JJQ2 = INDXG2L( I, NB, J, J, NPCOL )
00410             CALL DCOPY( NP, Q( 1, JJS ), 1, Q2( 1, JJQ2 ), 1 )
00411          END IF
00412   160 CONTINUE
00413 *
00414 *
00415 *     The deflated eigenvalues and their corresponding vectors go back
00416 *     into the last N - K slots of D and Q respectively.
00417 *
00418       CALL DCOPY( N, D, 1, Z, 1 )
00419       DO 170 J = K + 1, N
00420          JS = INDXP( J )
00421          I = INDX( J )
00422          D( I ) = Z( JS )
00423   170 CONTINUE
00424 *
00425       PTT( 1 ) = 1
00426       DO 190 I = 2, 4
00427          CT = 0
00428          DO 180 J = 0, NPCOL - 1
00429             CT = CT + CTOT( J, I-1 )
00430   180    CONTINUE
00431          PTT( I ) = PTT( I-1 ) + CT
00432   190 CONTINUE
00433 *
00434 *
00435       IB1 = INDXC( 1 )
00436       IE1 = IB1
00437       IB2 = INDXC( PTT( 2 ) )
00438       IE2 = IB2
00439       DO 200 I = 2, PTT( 3 ) - 1
00440          IB1 = MIN( IB1, INDXC( I ) )
00441          IE1 = MAX( IE1, INDXC( I ) )
00442   200 CONTINUE
00443       DO 210 I = PTT( 2 ), PTT( 4 ) - 1
00444          IB2 = MIN( IB2, INDXC( I ) )
00445          IE2 = MAX( IE2, INDXC( I ) )
00446   210 CONTINUE
00447       NN1 = IE1 - IB1 + 1
00448       NN2 = IE2 - IB2 + 1
00449       NN = MAX( IE1, IE2 ) - MIN( IB1, IB2 ) + 1
00450   220 CONTINUE
00451       RETURN
00452 *
00453 *     End of PDLAED2
00454 *
00455       END