◆ pdlaed2()

subroutine pdlaed2	(	integer	ictxt,
		integer	k,
		integer	n,
		integer	n1,
		integer	nb,
		double precision, dimension( * )	d,
		integer	drow,
		integer	dcol,
		double precision, dimension( ldq, * )	q,
		integer	ldq,
		double precision	rho,
		double precision, dimension( * )	z,
		double precision, dimension( * )	w,
		double precision, dimension( * )	dlamda,
		double precision, dimension( ldq2, * )	q2,
		integer	ldq2,
		double precision, dimension( * )	qbuf,
		integer, dimension( 0: npcol-1, 4 )	ctot,
		integer, dimension( 0: npcol-1, 4 )	psm,
		integer	npcol,
		integer, dimension( * )	indx,
		integer, dimension( * )	indxc,
		integer, dimension( * )	indxp,
		integer, dimension( n )	indcol,
		integer, dimension( * )	coltyp,
		integer	nn,
		integer	nn1,
		integer	nn2,
		integer	ib1,
		integer	ib2
	)
Definition at line 1 of file pdlaed2.f.
*
*  -- ScaLAPACK auxiliary routine (version 1.7) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     December 31, 1998
*
*     .. Scalar Arguments ..
      INTEGER            DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,
     $                   N1, NB, NN, NN1, NN2, NPCOL
      DOUBLE PRECISION   RHO
*     ..
*     .. Array Arguments ..
      INTEGER            COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),
     $                   INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),
     $                   PSM( 0: NPCOL-1, 4 )
      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ),
     $                   Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )
*     ..
*
*  Purpose
*  =======
*
*  PDLAED2 sorts the two sets of eigenvalues together into a single
*  sorted set.  Then it tries to deflate the size of the problem.
*  There are two ways in which deflation can occur:  when two or more
*  eigenvalues are close together or if there is a tiny entry in the
*  Z vector.  For each such occurrence the order of the related secular
*  equation problem is reduced by one.
*
*  Arguments
*  =========
*
*  ICTXT  (global input) INTEGER
*         The BLACS context handle, indicating the global context of
*         the operation on the matrix. The context itself is global.
*
*  K      (output) INTEGER
*         The number of non-deflated eigenvalues, and the order of the
*         related secular equation. 0 <= K <=N.
*
*  N      (input) INTEGER
*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
*
*  N1     (input) INTEGER
*         The location of the last eigenvalue in the leading sub-matrix.
*         min(1,N) < N1 < N.
*
*  NB      (global input) INTEGER
*          The blocking factor used to distribute the columns of the
*          matrix. NB >= 1.
*
*  D      (input/output) DOUBLE PRECISION array, dimension (N)
*         On entry, D contains the eigenvalues of the two submatrices to
*         be combined.
*         On exit, D contains the trailing (N-K) updated eigenvalues
*         (those which were deflated) sorted into increasing order.
*
*  DROW   (global input) INTEGER
*          The process row over which the first row of the matrix D is
*          distributed. 0 <= DROW < NPROW.
*
*  DCOL   (global input) INTEGER
*          The process column over which the first column of the
*          matrix D is distributed. 0 <= DCOL < NPCOL.
*
*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
*         On entry, Q contains the eigenvectors of two submatrices in
*         the two square blocks with corners at (1,1), (N1,N1)
*         and (N1+1, N1+1), (N,N).
*         On exit, Q contains the trailing (N-K) updated eigenvectors
*         (those which were deflated) in its last N-K columns.
*
*  LDQ    (input) INTEGER
*         The leading dimension of the array Q.  LDQ >= max(1,NQ).
*
*  RHO    (global input/output) DOUBLE PRECISION
*         On entry, the off-diagonal element associated with the rank-1
*         cut which originally split the two submatrices which are now
*         being recombined.
*         On exit, RHO has been modified to the value required by
*         PDLAED3.
*
*  Z      (global input) DOUBLE PRECISION array, dimension (N)
*         On entry, Z contains the updating vector (the last
*         row of the first sub-eigenvector matrix and the first row of
*         the second sub-eigenvector matrix).
*         On exit, the contents of Z have been destroyed by the updating
*         process.
*
*  DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
*         A copy of the first K eigenvalues which will be used by
*         SLAED3 to form the secular equation.
*
*  W      (global output) DOUBLE PRECISION array, dimension (N)
*         The first k values of the final deflation-altered z-vector
*         which will be passed to SLAED3.
*
*  Q2     (output) DOUBLE PRECISION array, dimension (LDQ2, NQ)
*         A copy of the first K eigenvectors which will be used by
*
*  LDQ2    (input) INTEGER
*         The leading dimension of the array Q2.
*
*  QBUF   (workspace) DOUBLE PRECISION array, dimension 3*N
*
*  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)
*
*  PSM    (workspace) INTEGER array, dimension( NPCOL, 4)
*
*  NPCOL   (global input) INTEGER
*          The total number of columns over which the distributed
*           submatrix is distributed.
*
*  INDX   (workspace) INTEGER array, dimension (N)
*         The permutation used to sort the contents of DLAMDA into
*         ascending order.
*
*  INDXC  (output) INTEGER array, dimension (N)
*         The permutation used to arrange the columns of the deflated
*         Q matrix into three groups:  the first group contains non-zero
*         elements only at and above N1, the second contains
*         non-zero elements only below N1, and the third is dense.
*
*  INDXP  (workspace) INTEGER array, dimension (N)
*         The permutation used to place deflated values of D at the end
*         of the array.  INDXP(1:K) points to the nondeflated D-values
*         and INDXP(K+1:N) points to the deflated eigenvalues.
*
*  INDCOL (workspace) INTEGER array, dimension (N)
*
*  COLTYP (workspace/output) INTEGER array, dimension (N)
*         During execution, a label which will indicate which of the
*         following types a column in the Q2 matrix is:
*         1 : non-zero in the upper half only;
*         2 : dense;
*         3 : non-zero in the lower half only;
*         4 : deflated.
*
*  NN     (global output) INTEGER, the order of matrix U, (PDLAED1).
*  NN1    (global output) INTEGER, the order of matrix Q1, (PDLAED1).
*  NN2    (global output) INTEGER, the order of matrix Q2, (PDLAED1).
*  IB1    (global output) INTEGER, pointeur on Q1, (PDLAED1).
*  IB2    (global output) INTEGER, pointeur on Q2, (PDLAED1).
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
      parameter( mone = -1.0d0, zero = 0.0d0, one = 1.0d0,
     $                   two = 2.0d0, eight = 8.0d0 )
*     ..
*     .. Local Scalars ..
      INTEGER            COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,
     $                   JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,
     $                   NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ
      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
*     ..
*     .. External Functions ..
      INTEGER            IDAMAX, INDXG2L, INDXL2G, NUMROC
      DOUBLE PRECISION   DLAPY2, PDLAMCH
      EXTERNAL           idamax, indxg2l, indxl2g, numroc, pdlamch,
     $                   dlapy2
*     ..
*     .. External Subroutines ..
      EXTERNAL           blacs_gridinfo, blacs_pinfo, dcopy, dgerv2d,
     $                   dgesd2d, dlapst, drot, dscal, infog1l
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          abs, max, min, mod, sqrt
*     ..
*     .. External Functions ..
*     ..
*     .. Local Arrays ..
      INTEGER            PTT( 4 )
*     ..
*     .. Executable Statements ..
*
*     Quick return if possible
*
      IF( n.EQ.0 )
     $   RETURN
*
      CALL blacs_pinfo( iam, nprocs )
      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
      np = numroc( n, nb, myrow, drow, nprow )
*
      n2 = n - n1
      n1p1 = n1 + 1
*
      IF( rho.LT.zero ) THEN
         CALL dscal( n2, mone, z( n1p1 ), 1 )
      END IF
*
*     Normalize z so that norm(z) = 1.  Since z is the concatenation of
*     two normalized vectors, norm2(z) = sqrt(2).
*
      t = one / sqrt( two )
      CALL dscal( n, t, z, 1 )
*
*     RHO = ABS( norm(z)**2 * RHO )
*
      rho = abs( two*rho )
*
*     Calculate the allowable deflation tolerance
*
      imax = idamax( n, z, 1 )
      jmax = idamax( n, d, 1 )
      eps = pdlamch( ictxt, 'Epsilon' )
      tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )
*
*     If the rank-1 modifier is small enough, no more needs to be done
*     except to reorganize Q so that its columns correspond with the
*     elements in D.
*
      IF( rho*abs( z( imax ) ).LE.tol ) THEN
         k = 0
         GO TO 220
      END IF
*
*     If there are multiple eigenvalues then the problem deflates.  Here
*     the number of equal eigenvalues are found.  As each equal
*     eigenvalue is found, an elementary reflector is computed to rotate
*     the corresponding eigensubspace so that the corresponding
*     components of Z are zero in this new basis.
*
*
      CALL dlapst( 'I', n, d, indx, info )
*
      DO 10 i = 1, n1
         coltyp( i ) = 1
   10 CONTINUE
      DO 20 i = n1p1, n
         coltyp( i ) = 3
   20 CONTINUE
      col = dcol
      DO 40 i = 1, n, nb
         DO 30 j = 0, nb - 1
            IF( i+j.LE.n )
     $         indcol( i+j ) = col
   30    CONTINUE
         col = mod( col+1, npcol )
   40 CONTINUE
*
      k = 0
      k2 = n + 1
      DO 50 j = 1, n
         nj = indx( j )
         IF( rho*abs( z( nj ) ).LE.tol ) THEN
*
*           Deflate due to small z component.
*
            k2 = k2 - 1
            coltyp( nj ) = 4
            indxp( k2 ) = nj
            IF( j.EQ.n )
     $         GO TO 80
         ELSE
            pj = nj
            GO TO 60
         END IF
   50 CONTINUE
   60 CONTINUE
      j = j + 1
      nj = indx( j )
      IF( j.GT.n )
     $   GO TO 80
      IF( rho*abs( z( nj ) ).LE.tol ) THEN
*
*        Deflate due to small z component.
*
         k2 = k2 - 1
         coltyp( nj ) = 4
         indxp( k2 ) = nj
      ELSE
*
*        Check if eigenvalues are close enough to allow deflation.
*
         s = z( pj )
         c = z( nj )
*
*        Find sqrt(a**2+b**2) without overflow or
*        destructive underflow.
*
         tau = dlapy2( c, s )
         t = d( nj ) - d( pj )
         c = c / tau
         s = -s / tau
         IF( abs( t*c*s ).LE.tol ) THEN
*
*           Deflation is possible.
*
            z( nj ) = tau
            z( pj ) = zero
            IF( coltyp( nj ).NE.coltyp( pj ) )
     $         coltyp( nj ) = 2
            coltyp( pj ) = 4
            CALL infog1l( nj, nb, npcol, mycol, dcol, njj, njcol )
            CALL infog1l( pj, nb, npcol, mycol, dcol, pjj, pjcol )
            IF( indcol( pj ).EQ.indcol( nj ) .AND. mycol.EQ.njcol ) THEN
               CALL drot( np, q( 1, pjj ), 1, q( 1, njj ), 1, c, s )
            ELSE IF( mycol.EQ.pjcol ) THEN
               CALL dgesd2d( ictxt, np, 1, q( 1, pjj ), np, myrow,
     $                       njcol )
               CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, njcol )
               CALL drot( np, q( 1, pjj ), 1, qbuf, 1, c, s )
            ELSE IF( mycol.EQ.njcol ) THEN
               CALL dgesd2d( ictxt, np, 1, q( 1, njj ), np, myrow,
     $                       pjcol )
               CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, pjcol )
               CALL drot( np, qbuf, 1, q( 1, njj ), 1, c, s )
            END IF
            t = d( pj )*c**2 + d( nj )*s**2
            d( nj ) = d( pj )*s**2 + d( nj )*c**2
            d( pj ) = t
            k2 = k2 - 1
            i = 1
   70       CONTINUE
            IF( k2+i.LE.n ) THEN
               IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN
                  indxp( k2+i-1 ) = indxp( k2+i )
                  indxp( k2+i ) = pj
                  i = i + 1
                  GO TO 70
               ELSE
                  indxp( k2+i-1 ) = pj
               END IF
            ELSE
               indxp( k2+i-1 ) = pj
            END IF
            pj = nj
         ELSE
            k = k + 1
            dlamda( k ) = d( pj )
            w( k ) = z( pj )
            indxp( k ) = pj
            pj = nj
         END IF
      END IF
      GO TO 60
   80 CONTINUE
*
*     Record the last eigenvalue.
*
      k = k + 1
      dlamda( k ) = d( pj )
      w( k ) = z( pj )
      indxp( k ) = pj
*
*     Count up the total number of the various types of columns, then
*     form a permutation which positions the four column types into
*     four uniform groups (although one or more of these groups may be
*     empty).
*
      DO 100 j = 1, 4
         DO 90 i = 0, npcol - 1
            ctot( i, j ) = 0
   90    CONTINUE
         ptt( j ) = 0
  100 CONTINUE
      DO 110 j = 1, n
         ct = coltyp( j )
         col = indcol( j )
         ctot( col, ct ) = ctot( col, ct ) + 1
  110 CONTINUE
*
*     PSM(*) = Position in SubMatrix (of types 1 through 4)
*
      DO 120 col = 0, npcol - 1
         psm( col, 1 ) = 1
         psm( col, 2 ) = 1 + ctot( col, 1 )
         psm( col, 3 ) = psm( col, 2 ) + ctot( col, 2 )
         psm( col, 4 ) = psm( col, 3 ) + ctot( col, 3 )
  120 CONTINUE
      ptt( 1 ) = 1
      DO 140 i = 2, 4
         ct = 0
         DO 130 j = 0, npcol - 1
            ct = ct + ctot( j, i-1 )
  130    CONTINUE
         ptt( i ) = ptt( i-1 ) + ct
  140 CONTINUE
*
*     Fill out the INDXC array so that the permutation which it induces
*     will place all type-1 columns first, all type-2 columns next,
*     then all type-3's, and finally all type-4's.
*
      DO 150 j = 1, n
         js = indxp( j )
         col = indcol( js )
         ct = coltyp( js )
         i = indxl2g( psm( col, ct ), nb, col, dcol, npcol )
         indx( j ) = i
         indxc( ptt( ct ) ) = i
         psm( col, ct ) = psm( col, ct ) + 1
         ptt( ct ) = ptt( ct ) + 1
  150 CONTINUE
*
*
      DO 160 j = 1, n
         js = indxp( j )
         jjs = indxg2l( js, nb, j, j, npcol )
         col = indcol( js )
         IF( col.EQ.mycol ) THEN
            i = indx( j )
            jjq2 = indxg2l( i, nb, j, j, npcol )
            CALL dcopy( np, q( 1, jjs ), 1, q2( 1, jjq2 ), 1 )
         END IF
  160 CONTINUE
*
*
*     The deflated eigenvalues and their corresponding vectors go back
*     into the last N - K slots of D and Q respectively.
*
      CALL dcopy( n, d, 1, z, 1 )
      DO 170 j = k + 1, n
         js = indxp( j )
         i = indx( j )
         d( i ) = z( js )
  170 CONTINUE
*
      ptt( 1 ) = 1
      DO 190 i = 2, 4
         ct = 0
         DO 180 j = 0, npcol - 1
            ct = ct + ctot( j, i-1 )
  180    CONTINUE
         ptt( i ) = ptt( i-1 ) + ct
  190 CONTINUE
*
*
      ib1 = indxc( 1 )
      ie1 = ib1
      ib2 = indxc( ptt( 2 ) )
      ie2 = ib2
      DO 200 i = 2, ptt( 3 ) - 1
         ib1 = min( ib1, indxc( i ) )
         ie1 = max( ie1, indxc( i ) )
  200 CONTINUE
      DO 210 i = ptt( 2 ), ptt( 4 ) - 1
         ib2 = min( ib2, indxc( i ) )
         ie2 = max( ie2, indxc( i ) )
  210 CONTINUE
      nn1 = ie1 - ib1 + 1
      nn2 = ie2 - ib2 + 1
      nn = max( ie1, ie2 ) - min( ib1, ib2 ) + 1
  220 CONTINUE
      RETURN
*
*     End of PDLAED2
*
Here is the call graph for this function:
Here is the caller graph for this function: