de/d62/pdlaed2_8f_source.html

      SUBROUTINE pdlaed2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ,

     $                    RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM,

     $                    NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN,

     $                    NN1, NN2, IB1, IB2 )

*

*  -- ScaLAPACK auxiliary routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     December 31, 1998

*

*     .. Scalar Arguments ..

      INTEGER            DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,

     $                   N1, NB, NN, NN1, NN2, NPCOL

      DOUBLE PRECISION   RHO

*     ..

*     .. Array Arguments ..

      INTEGER            COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),

     $                   INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),

     $                   PSM( 0: NPCOL-1, 4 )

      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ),

     $                   Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )

*     ..

*

*  Purpose

*  =======

*

*  PDLAED2 sorts the two sets of eigenvalues together into a single

*  sorted set.  Then it tries to deflate the size of the problem.

*  There are two ways in which deflation can occur:  when two or more

*  eigenvalues are close together or if there is a tiny entry in the

*  Z vector.  For each such occurrence the order of the related secular

*  equation problem is reduced by one.

*

*  Arguments

*  =========

*

*  ICTXT  (global input) INTEGER

*         The BLACS context handle, indicating the global context of

*         the operation on the matrix. The context itself is global.

*

*  K      (output) INTEGER

*         The number of non-deflated eigenvalues, and the order of the

*         related secular equation. 0 <= K <=N.

*

*  N      (input) INTEGER

*         The dimension of the symmetric tridiagonal matrix.  N >= 0.

*

*  N1     (input) INTEGER

*         The location of the last eigenvalue in the leading sub-matrix.

*         min(1,N) < N1 < N.

*

*  NB      (global input) INTEGER

*          The blocking factor used to distribute the columns of the

*          matrix. NB >= 1.

*

*  D      (input/output) DOUBLE PRECISION array, dimension (N)

*         On entry, D contains the eigenvalues of the two submatrices to

*         be combined.

*         On exit, D contains the trailing (N-K) updated eigenvalues

*         (those which were deflated) sorted into increasing order.

*

*  DROW   (global input) INTEGER

*          The process row over which the first row of the matrix D is

*          distributed. 0 <= DROW < NPROW.

*

*  DCOL   (global input) INTEGER

*          The process column over which the first column of the

*          matrix D is distributed. 0 <= DCOL < NPCOL.

*

*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)

*         On entry, Q contains the eigenvectors of two submatrices in

*         the two square blocks with corners at (1,1), (N1,N1)

*         and (N1+1, N1+1), (N,N).

*         On exit, Q contains the trailing (N-K) updated eigenvectors

*         (those which were deflated) in its last N-K columns.

*

*  LDQ    (input) INTEGER

*         The leading dimension of the array Q.  LDQ >= max(1,NQ).

*

*  RHO    (global input/output) DOUBLE PRECISION

*         On entry, the off-diagonal element associated with the rank-1

*         cut which originally split the two submatrices which are now

*         being recombined.

*         On exit, RHO has been modified to the value required by

*         PDLAED3.

*

*  Z      (global input) DOUBLE PRECISION array, dimension (N)

*         On entry, Z contains the updating vector (the last

*         row of the first sub-eigenvector matrix and the first row of

*         the second sub-eigenvector matrix).

*         On exit, the contents of Z have been destroyed by the updating

*         process.

*

*  DLAMDA (global output) DOUBLE PRECISION array, dimension (N)

*         A copy of the first K eigenvalues which will be used by

*         SLAED3 to form the secular equation.

*

*  W      (global output) DOUBLE PRECISION array, dimension (N)

*         The first k values of the final deflation-altered z-vector

*         which will be passed to SLAED3.

*

*  Q2     (output) DOUBLE PRECISION array, dimension (LDQ2, NQ)

*         A copy of the first K eigenvectors which will be used by

*

*  LDQ2    (input) INTEGER

*         The leading dimension of the array Q2.

*

*  QBUF   (workspace) DOUBLE PRECISION array, dimension 3*N

*

*  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)

*

*  PSM    (workspace) INTEGER array, dimension( NPCOL, 4)

*

*  NPCOL   (global input) INTEGER

*          The total number of columns over which the distributed

*           submatrix is distributed.

*

*  INDX   (workspace) INTEGER array, dimension (N)

*         The permutation used to sort the contents of DLAMDA into

*         ascending order.

*

*  INDXC  (output) INTEGER array, dimension (N)

*         The permutation used to arrange the columns of the deflated

*         Q matrix into three groups:  the first group contains non-zero

*         elements only at and above N1, the second contains

*         non-zero elements only below N1, and the third is dense.

*

*  INDXP  (workspace) INTEGER array, dimension (N)

*         The permutation used to place deflated values of D at the end

*         of the array.  INDXP(1:K) points to the nondeflated D-values

*         and INDXP(K+1:N) points to the deflated eigenvalues.

*

*  INDCOL (workspace) INTEGER array, dimension (N)

*

*  COLTYP (workspace/output) INTEGER array, dimension (N)

*         During execution, a label which will indicate which of the

*         following types a column in the Q2 matrix is:

*         1 : non-zero in the upper half only;

*         2 : dense;

*         3 : non-zero in the lower half only;

*         4 : deflated.

*

*  NN     (global output) INTEGER, the order of matrix U, (PDLAED1).

*  NN1    (global output) INTEGER, the order of matrix Q1, (PDLAED1).

*  NN2    (global output) INTEGER, the order of matrix Q2, (PDLAED1).

*  IB1    (global output) INTEGER, pointeur on Q1, (PDLAED1).

*  IB2    (global output) INTEGER, pointeur on Q2, (PDLAED1).

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT

      PARAMETER          ( MONE = -1.0d0, zero = 0.0d0, one = 1.0d0,

     $                   two = 2.0d0, eight = 8.0d0 )

*     ..

*     .. Local Scalars ..

      INTEGER            COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,

     $                   JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,

     $                   NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ

      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL

*     ..

*     .. External Functions ..

      INTEGER            IDAMAX, INDXG2L, INDXL2G, NUMROC

      DOUBLE PRECISION   DLAPY2, PDLAMCH

      EXTERNAL           IDAMAX, INDXG2L, INDXL2G, NUMROC, PDLAMCH,

     $                   dlapy2

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, blacs_pinfo, dcopy, dgerv2d,

     $                   dgesd2d, dlapst, drot, dscal, infog1l

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min, mod, sqrt

*     ..

*     .. External Functions ..

*     ..

*     .. Local Arrays ..

      INTEGER            PTT( 4 )

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible

*

      IF( n.EQ.0 )

     $   RETURN

*

      CALL blacs_pinfo( iam, nprocs )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

      np = numroc( n, nb, myrow, drow, nprow )

*

      n2 = n - n1

      n1p1 = n1 + 1

*

      IF( rho.LT.zero ) THEN

         CALL dscal( n2, mone, z( n1p1 ), 1 )

      END IF

*

*     Normalize z so that norm(z) = 1.  Since z is the concatenation of

*     two normalized vectors, norm2(z) = sqrt(2).

*

      t = one / sqrt( two )

      CALL dscal( n, t, z, 1 )

*

*     RHO = ABS( norm(z)**2 * RHO )

*

      rho = abs( two*rho )

*

*     Calculate the allowable deflation tolerance

*

      imax = idamax( n, z, 1 )

      jmax = idamax( n, d, 1 )

      eps = pdlamch( ictxt, 'Epsilon' )

      tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )

*

*     If the rank-1 modifier is small enough, no more needs to be done

*     except to reorganize Q so that its columns correspond with the

*     elements in D.

*

      IF( rho*abs( z( imax ) ).LE.tol ) THEN

         k = 0

         GO TO 220

      END IF

*

*     If there are multiple eigenvalues then the problem deflates.  Here

*     the number of equal eigenvalues are found.  As each equal

*     eigenvalue is found, an elementary reflector is computed to rotate

*     the corresponding eigensubspace so that the corresponding

*     components of Z are zero in this new basis.

*

*

      CALL dlapst( 'I', n, d, indx, info )

*

      DO 10 i = 1, n1

         coltyp( i ) = 1

   10 CONTINUE

      DO 20 i = n1p1, n

         coltyp( i ) = 3

   20 CONTINUE

      col = dcol

      DO 40 i = 1, n, nb

         DO 30 j = 0, nb - 1

            IF( i+j.LE.n )

     $         indcol( i+j ) = col

   30    CONTINUE

         col = mod( col+1, npcol )

   40 CONTINUE

*

      k = 0

      k2 = n + 1

      DO 50 j = 1, n

         nj = indx( j )

         IF( rho*abs( z( nj ) ).LE.tol ) THEN

*

*           Deflate due to small z component.

*

            k2 = k2 - 1

            coltyp( nj ) = 4

            indxp( k2 ) = nj

            IF( j.EQ.n )

     $         GO TO 80

         ELSE

            pj = nj

            GO TO 60

         END IF

   50 CONTINUE

   60 CONTINUE

      j = j + 1

      nj = indx( j )

      IF( j.GT.n )

     $   GO TO 80

      IF( rho*abs( z( nj ) ).LE.tol ) THEN

*

*        Deflate due to small z component.

*

         k2 = k2 - 1

         coltyp( nj ) = 4

         indxp( k2 ) = nj

      ELSE

*

*        Check if eigenvalues are close enough to allow deflation.

*

         s = z( pj )

         c = z( nj )

*

*        Find sqrt(a**2+b**2) without overflow or

*        destructive underflow.

*

         tau = dlapy2( c, s )

         t = d( nj ) - d( pj )

         c = c / tau

         s = -s / tau

         IF( abs( t*c*s ).LE.tol ) THEN

*

*           Deflation is possible.

*

            z( nj ) = tau

            z( pj ) = zero

            IF( coltyp( nj ).NE.coltyp( pj ) )

     $         coltyp( nj ) = 2

            coltyp( pj ) = 4

            CALL infog1l( nj, nb, npcol, mycol, dcol, njj, njcol )

            CALL infog1l( pj, nb, npcol, mycol, dcol, pjj, pjcol )

            IF( indcol( pj ).EQ.indcol( nj ) .AND. mycol.EQ.njcol ) THEN

               CALL drot( np, q( 1, pjj ), 1, q( 1, njj ), 1, c, s )

            ELSE IF( mycol.EQ.pjcol ) THEN

               CALL dgesd2d( ictxt, np, 1, q( 1, pjj ), np, myrow,

     $                       njcol )

               CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, njcol )

               CALL drot( np, q( 1, pjj ), 1, qbuf, 1, c, s )

            ELSE IF( mycol.EQ.njcol ) THEN

               CALL dgesd2d( ictxt, np, 1, q( 1, njj ), np, myrow,

     $                       pjcol )

               CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, pjcol )

               CALL drot( np, qbuf, 1, q( 1, njj ), 1, c, s )

            END IF

            t = d( pj )*c**2 + d( nj )*s**2

            d( nj ) = d( pj )*s**2 + d( nj )*c**2

            d( pj ) = t

            k2 = k2 - 1

            i = 1

   70       CONTINUE

            IF( k2+i.LE.n ) THEN

               IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN

                  indxp( k2+i-1 ) = indxp( k2+i )

                  indxp( k2+i ) = pj

                  i = i + 1

                  GO TO 70

               ELSE

                  indxp( k2+i-1 ) = pj

               END IF

            ELSE

               indxp( k2+i-1 ) = pj

            END IF

            pj = nj

         ELSE

            k = k + 1

            dlamda( k ) = d( pj )

            w( k ) = z( pj )

            indxp( k ) = pj

            pj = nj

         END IF

      END IF

      GO TO 60

   80 CONTINUE

*

*     Record the last eigenvalue.

*

      k = k + 1

      dlamda( k ) = d( pj )

      w( k ) = z( pj )

      indxp( k ) = pj

*

*     Count up the total number of the various types of columns, then

*     form a permutation which positions the four column types into

*     four uniform groups (although one or more of these groups may be

*     empty).

*

      DO 100 j = 1, 4

         DO 90 i = 0, npcol - 1

            ctot( i, j ) = 0

   90    CONTINUE

         ptt( j ) = 0

  100 CONTINUE

      DO 110 j = 1, n

         ct = coltyp( j )

         col = indcol( j )

         ctot( col, ct ) = ctot( col, ct ) + 1

  110 CONTINUE

*

*     PSM(*) = Position in SubMatrix (of types 1 through 4)

*

      DO 120 col = 0, npcol - 1

         psm( col, 1 ) = 1

         psm( col, 2 ) = 1 + ctot( col, 1 )

         psm( col, 3 ) = psm( col, 2 ) + ctot( col, 2 )

         psm( col, 4 ) = psm( col, 3 ) + ctot( col, 3 )

  120 CONTINUE

      ptt( 1 ) = 1

      DO 140 i = 2, 4

         ct = 0

         DO 130 j = 0, npcol - 1

            ct = ct + ctot( j, i-1 )

  130    CONTINUE

         ptt( i ) = ptt( i-1 ) + ct

  140 CONTINUE

*

*     Fill out the INDXC array so that the permutation which it induces

*     will place all type-1 columns first, all type-2 columns next,

*     then all type-3's, and finally all type-4's.

*

      DO 150 j = 1, n

         js = indxp( j )

         col = indcol( js )

         ct = coltyp( js )

         i = indxl2g( psm( col, ct ), nb, col, dcol, npcol )

         indx( j ) = i

         indxc( ptt( ct ) ) = i

         psm( col, ct ) = psm( col, ct ) + 1

         ptt( ct ) = ptt( ct ) + 1

  150 CONTINUE

*

*

      DO 160 j = 1, n

         js = indxp( j )

         jjs = indxg2l( js, nb, j, j, npcol )

         col = indcol( js )

         IF( col.EQ.mycol ) THEN

            i = indx( j )

            jjq2 = indxg2l( i, nb, j, j, npcol )

            CALL dcopy( np, q( 1, jjs ), 1, q2( 1, jjq2 ), 1 )

         END IF

  160 CONTINUE

*

*

*     The deflated eigenvalues and their corresponding vectors go back

*     into the last N - K slots of D and Q respectively.

*

      CALL dcopy( n, d, 1, z, 1 )

      DO 170 j = k + 1, n

         js = indxp( j )

         i = indx( j )

         d( i ) = z( js )

  170 CONTINUE

*

      ptt( 1 ) = 1

      DO 190 i = 2, 4

         ct = 0

         DO 180 j = 0, npcol - 1

            ct = ct + ctot( j, i-1 )

  180    CONTINUE

         ptt( i ) = ptt( i-1 ) + ct

  190 CONTINUE

*

*

      ib1 = indxc( 1 )

      ie1 = ib1

      ib2 = indxc( ptt( 2 ) )

      ie2 = ib2

      DO 200 i = 2, ptt( 3 ) - 1

         ib1 = min( ib1, indxc( i ) )

         ie1 = max( ie1, indxc( i ) )

  200 CONTINUE

      DO 210 i = ptt( 2 ), ptt( 4 ) - 1

         ib2 = min( ib2, indxc( i ) )

         ie2 = max( ie2, indxc( i ) )

  210 CONTINUE

      nn1 = ie1 - ib1 + 1

      nn2 = ie2 - ib2 + 1

      nn = max( ie1, ie2 ) - min( ib1, ib2 ) + 1

  220 CONTINUE

      RETURN

*

*     End of PDLAED2

*

      END