dc/dc0/pzsepqtq_8f_source.html

*

*

      SUBROUTINE pzsepqtq( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC,

     $                     DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK,

     $                     QTQNRM, INFO, RES )

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 1, 1997

*

*     .. Scalar Arguments ..

      INTEGER            IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES

      DOUBLE PRECISION   QTQNRM, THRESH

*     ..

*     .. Array Arguments ..

*

      INTEGER            DESCC( * ), DESCQ( * ), ICLUSTR( * ),

     $                   PROCDIST( * )

      DOUBLE PRECISION   GAP( * ), WORK( * )

      COMPLEX*16         C( * ), Q( * )

*     ..

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Purpose

*  =======

*

*  Compute |I - QT * Q| / (ulp * n)

*

*  Arguments

*  =========

*

*     NP = number of local rows in C

*     NQ = number of local columns in C and Q

*

*  MS      (global input) INTEGER

*          Matrix size.

*          The number of global rows in Q

*

*  NV      (global input) INTEGER

*          Number of eigenvectors

*          The number of global columns in C and Q

*

*  THRESH  (global input) DOUBLE PRECISION

*          A test will count as "failed" if the "error", computed as

*          described below, exceeds THRESH.  Note that the error

*          is scaled to be O(1), so THRESH should be a reasonably

*          small multiple of 1, e.g., 10 or 100.  In particular,

*          it should not depend on the precision (single vs. double)

*          or the size of the matrix.  It must be at least zero.

*

*  Q       (local input) COMPLEX*16 array,

*          global dimension (MS, NV), local dimension (LDQ, NQ)

*

*          Contains the eigenvectors as computed by PZSTEIN

*

*  IQ      (global input) INTEGER

*          Q's global row index, which points to the beginning of the

*          submatrix which is to be operated on.

*

*  JQ      (global input) INTEGER

*          Q's global column index, which points to the beginning of

*          the submatrix which is to be operated on.

*

*  DESCQ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Q.

*

*  C       (local workspace)  COMPLEX*16 array,

*          global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ)

*

*          Accumulator for computing I - QT * Q

*

*  IC      (global input) INTEGER

*          C's global row index, which points to the beginning of the

*          submatrix which is to be operated on.

*

*  JC      (global input) INTEGER

*          C's global column index, which points to the beginning of

*          the submatrix which is to be operated on.

*

*  DESCC   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix C.

*

*  W       (input) DOUBLE PRECISION array, dimension (NV)

*          All procesors have an identical copy of W()

*

*          Contains the computed eigenvalues

*

*  PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1)

*          Identifies which eigenvectors are the last to be computed

*          by a given process

*

*  ICLUSTR (global input) INTEGER array dimension (2*P)

*          This input array contains indices of eigenvectors

*          corresponding to a cluster of eigenvalues that could not be

*          orthogonalized due to insufficient workspace.

*          This should be the output of PZSTEIN.

*

*  GAP     (global input) DOUBLE PRECISION array, dimension (P)

*          This input array contains the gap between eigenvalues whose

*          eigenvectors could not be orthogonalized.

*

*  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)

*

*  LWORK   (local input) INTEGER

*          The length of the array WORK.

*          LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 )

*          Where:

*          NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW )

*          MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL )

*

*  QTQNRM  (global output) DOUBLE PRECISION

*          |QTQ -I| / EPS

*

*  RES     (global output) INTEGER

*          0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH

*          1 if the test fails  i.e. |I - QT * Q| / (ulp * n) > THRESH

*

*

*     .. Parameters ..

*

      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,

     $                   MB_, NB_, RSRC_, CSRC_, LLD_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                   ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                   rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      COMPLEX*16         ZERO, ONE, NEGONE

      PARAMETER          ( ZERO = 0.0d+0, one = 1.0d+0,

     $                   negone = -1.0d+0 )

*     ..

*     .. Intrinsic Functions ..

*

      INTRINSIC          dble, dcmplx, max

*     ..

*     .. Local Scalars ..

      INTEGER            CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN,

     $                   MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW

      DOUBLE PRECISION   NORM, QTQNRM2, ULP

*     ..

*     .. External Functions ..

      INTEGER            NUMROC

      DOUBLE PRECISION   PDLAMCH, PZLANGE

      EXTERNAL           numroc, pdlamch, pzlange

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, chk1mat, pxerbla, pzgemm,

     $                   pzlaset, pzmatadd

*     ..

*     .. Executable Statements ..

*       This is just to keep ftnchek happy

      IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*

     $    rsrc_.LT.0 )RETURN

*

*

      res = 0

      ulp = pdlamch( descc( ctxt_ ), 'P' )

*

      CALL blacs_gridinfo( descc( ctxt_ ), nprow, npcol, myrow, mycol )

      info = 0

      CALL chk1mat( ms, 1, ms, 2, iq, jq, descq, 7, info )

      CALL chk1mat( nv, 1, ms, 2, ic, jc, descc, 11, info )

*

      IF( info.EQ.0 ) THEN

         np0 = numroc( nv, descc( mb_ ), 0, 0, nprow )

         mq0 = numroc( nv, descc( nb_ ), 0, 0, npcol )

*

         lwmin = 2 + max( descc( mb_ ), 2 )*( 2*np0+mq0 )

*

         IF( iq.NE.1 ) THEN

            info = -5

         ELSE IF( jq.NE.1 ) THEN

            info = -6

         ELSE IF( ic.NE.1 ) THEN

            info = -9

         ELSE IF( jc.NE.1 ) THEN

            info = -10

         ELSE IF( lwork.LT.lwmin ) THEN

            info = -16

         END IF

      END IF

*

      IF( info.NE.0 ) THEN

         CALL pxerbla( descc( ctxt_ ), 'PZSEPQTQ', -info )

         RETURN

      END IF

*

*     C = Identity matrix

*

      CALL pzlaset( 'A', nv, nv, zero, one, c, ic, jc, descc )

*

*     C = C - QT * Q

*

      IF( nv*ms.GT.0 ) THEN

         CALL pzgemm( 'Conjugate transpose', 'N', nv, nv, ms, negone, q,

     $                1, 1, descq, q, 1, 1, descq, one, c, 1, 1, descc )

      END IF

*

*     Allow for poorly orthogonalized eigenvectors for large clusters

*

      norm = pzlange( '1', nv, nv, c, 1, 1, descc, work )

      qtqnrm = norm / ( dble( max( ms, 1 ) )*ulp )

*

      cluster = 1

   10 CONTINUE

      DO 20 firstp = 1, nprow*npcol

         IF( procdist( firstp ).GE.iclustr( 2*( cluster-1 )+1 ) )

     $      GO TO 30

   20 CONTINUE

   30 CONTINUE

*

      imin = iclustr( 2*cluster-1 )

      jmax = iclustr( 2*cluster )

*

*

      IF( imin.EQ.0 )

     $   GO TO 60

*

      DO 40 nextp = firstp, nprow*npcol

         imax = procdist( nextp )

         jmin = imax + 1

*

*

         CALL pzmatadd( imax-imin+1, jmax-jmin+1, zero, c, imin, jmin,

     $                  descc, dcmplx( gap( cluster ) / 0.01d+0 ), c,

     $                  imin, jmin, descc )

         CALL pzmatadd( jmax-jmin+1, imax-imin+1, zero, c, jmin, imin,

     $                  descc, dcmplx( gap( cluster ) / 0.01d+0 ), c,

     $                  jmin, imin, descc )

         imin = imax

*

         IF( iclustr( 2*cluster ).LT.procdist( nextp+1 ) )

     $      GO TO 50

   40 CONTINUE

   50 CONTINUE

*

      cluster = cluster + 1

      GO TO 10

   60 CONTINUE

*

*     Compute the norm of C

*

      norm = pzlange( '1', nv, nv, c, 1, 1, descc, work )

*

      qtqnrm2 = norm / ( dble( max( ms, 1 ) )*ulp )

*

      IF( qtqnrm2.GT.thresh ) THEN

         res = 1

         qtqnrm = qtqnrm2

      END IF

      RETURN

*

*     End of PZSEPQTQ

*

      END