ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzsepqtq.f
Go to the documentation of this file.
00001 *
00002 *
00003       SUBROUTINE PZSEPQTQ( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC,
00004      $                     DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK,
00005      $                     QTQNRM, INFO, RES )
00006 *
00007 *  -- ScaLAPACK routine (version 1.7) --
00008 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00009 *     and University of California, Berkeley.
00010 *     May 1, 1997
00011 *
00012 *     .. Scalar Arguments ..
00013       INTEGER            IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES
00014       DOUBLE PRECISION   QTQNRM, THRESH
00015 *     ..
00016 *     .. Array Arguments ..
00017 *
00018       INTEGER            DESCC( * ), DESCQ( * ), ICLUSTR( * ),
00019      $                   PROCDIST( * )
00020       DOUBLE PRECISION   GAP( * ), WORK( * )
00021       COMPLEX*16         C( * ), Q( * )
00022 *     ..
00023 *
00024 *  Notes
00025 *  =====
00026 *
00027 *  Each global data object is described by an associated description
00028 *  vector.  This vector stores the information required to establish
00029 *  the mapping between an object element and its corresponding process
00030 *  and memory location.
00031 *
00032 *  Let A be a generic term for any 2D block cyclicly distributed array.
00033 *  Such a global array has an associated description vector DESCA.
00034 *  In the following comments, the character _ should be read as
00035 *  "of the global array".
00036 *
00037 *  NOTATION        STORED IN      EXPLANATION
00038 *  --------------- -------------- --------------------------------------
00039 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00040 *                                 DTYPE_A = 1.
00041 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00042 *                                 the BLACS process grid A is distribu-
00043 *                                 ted over. The context itself is glo-
00044 *                                 bal, but the handle (the integer
00045 *                                 value) may vary.
00046 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00047 *                                 array A.
00048 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00049 *                                 array A.
00050 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00051 *                                 the rows of the array.
00052 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00053 *                                 the columns of the array.
00054 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00055 *                                 row of the array A is distributed.
00056 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00057 *                                 first column of the array A is
00058 *                                 distributed.
00059 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00060 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00061 *
00062 *  Let K be the number of rows or columns of a distributed matrix,
00063 *  and assume that its process grid has dimension p x q.
00064 *  LOCr( K ) denotes the number of elements of K that a process
00065 *  would receive if K were distributed over the p processes of its
00066 *  process column.
00067 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00068 *  process would receive if K were distributed over the q processes of
00069 *  its process row.
00070 *  The values of LOCr() and LOCc() may be determined via a call to the
00071 *  ScaLAPACK tool function, NUMROC:
00072 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00073 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00074 *  An upper bound for these quantities may be computed by:
00075 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00076 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00077 *
00078 *  Purpose
00079 *  =======
00080 *
00081 *  Compute |I - QT * Q| / (ulp * n)
00082 *
00083 *  Arguments
00084 *  =========
00085 *
00086 *     NP = number of local rows in C
00087 *     NQ = number of local columns in C and Q
00088 *
00089 *  MS      (global input) INTEGER
00090 *          Matrix size.
00091 *          The number of global rows in Q
00092 *
00093 *  NV      (global input) INTEGER
00094 *          Number of eigenvectors
00095 *          The number of global columns in C and Q
00096 *
00097 *  THRESH  (global input) DOUBLE PRECISION
00098 *          A test will count as "failed" if the "error", computed as
00099 *          described below, exceeds THRESH.  Note that the error
00100 *          is scaled to be O(1), so THRESH should be a reasonably
00101 *          small multiple of 1, e.g., 10 or 100.  In particular,
00102 *          it should not depend on the precision (single vs. double)
00103 *          or the size of the matrix.  It must be at least zero.
00104 *
00105 *  Q       (local input) COMPLEX*16 array,
00106 *          global dimension (MS, NV), local dimension (LDQ, NQ)
00107 *
00108 *          Contains the eigenvectors as computed by PZSTEIN
00109 *
00110 *  IQ      (global input) INTEGER
00111 *          Q's global row index, which points to the beginning of the
00112 *          submatrix which is to be operated on.
00113 *
00114 *  JQ      (global input) INTEGER
00115 *          Q's global column index, which points to the beginning of
00116 *          the submatrix which is to be operated on.
00117 *
00118 *  DESCQ   (global and local input) INTEGER array of dimension DLEN_.
00119 *          The array descriptor for the distributed matrix Q.
00120 *
00121 *  C       (local workspace)  COMPLEX*16 array,
00122 *          global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ)
00123 *
00124 *          Accumulator for computing I - QT * Q
00125 *
00126 *  IC      (global input) INTEGER
00127 *          C's global row index, which points to the beginning of the
00128 *          submatrix which is to be operated on.
00129 *
00130 *  JC      (global input) INTEGER
00131 *          C's global column index, which points to the beginning of
00132 *          the submatrix which is to be operated on.
00133 *
00134 *  DESCC   (global and local input) INTEGER array of dimension DLEN_.
00135 *          The array descriptor for the distributed matrix C.
00136 *
00137 *  W       (input) DOUBLE PRECISION array, dimension (NV)
00138 *          All procesors have an identical copy of W()
00139 *
00140 *          Contains the computed eigenvalues
00141 *
00142 *  PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1)
00143 *          Identifies which eigenvectors are the last to be computed
00144 *          by a given process
00145 *
00146 *  ICLUSTR (global input) INTEGER array dimension (2*P)
00147 *          This input array contains indices of eigenvectors
00148 *          corresponding to a cluster of eigenvalues that could not be
00149 *          orthogonalized due to insufficient workspace.
00150 *          This should be the output of PZSTEIN.
00151 *
00152 *  GAP     (global input) DOUBLE PRECISION array, dimension (P)
00153 *          This input array contains the gap between eigenvalues whose
00154 *          eigenvectors could not be orthogonalized.
00155 *
00156 *  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
00157 *
00158 *  LWORK   (local input) INTEGER
00159 *          The length of the array WORK.
00160 *          LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 )
00161 *          Where:
00162 *          NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW )
00163 *          MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL )
00164 *
00165 *  QTQNRM  (global output) DOUBLE PRECISION
00166 *          |QTQ -I| / EPS
00167 *
00168 *  RES     (global output) INTEGER
00169 *          0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH
00170 *          1 if the test fails  i.e. |I - QT * Q| / (ulp * n) > THRESH
00171 *
00172 *
00173 *     .. Parameters ..
00174 *
00175       INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
00176      $                   MB_, NB_, RSRC_, CSRC_, LLD_
00177       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00178      $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00179      $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00180       COMPLEX*16         ZERO, ONE, NEGONE
00181       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0,
00182      $                   NEGONE = -1.0D+0 )
00183 *     ..
00184 *     .. Intrinsic Functions ..
00185 *
00186       INTRINSIC          DBLE, DCMPLX, MAX
00187 *     ..
00188 *     .. Local Scalars ..
00189       INTEGER            CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN,
00190      $                   MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW
00191       DOUBLE PRECISION   NORM, QTQNRM2, ULP
00192 *     ..
00193 *     .. External Functions ..
00194       INTEGER            NUMROC
00195       DOUBLE PRECISION   PDLAMCH, PZLANGE
00196       EXTERNAL           NUMROC, PDLAMCH, PZLANGE
00197 *     ..
00198 *     .. External Subroutines ..
00199       EXTERNAL           BLACS_GRIDINFO, CHK1MAT, PXERBLA, PZGEMM,
00200      $                   PZLASET, PZMATADD
00201 *     ..
00202 *     .. Executable Statements ..
00203 *       This is just to keep ftnchek happy
00204       IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
00205      $    RSRC_.LT.0 )RETURN
00206 *
00207 *
00208       RES = 0
00209       ULP = PDLAMCH( DESCC( CTXT_ ), 'P' )
00210 *
00211       CALL BLACS_GRIDINFO( DESCC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL )
00212       INFO = 0
00213       CALL CHK1MAT( MS, 1, MS, 2, IQ, JQ, DESCQ, 7, INFO )
00214       CALL CHK1MAT( NV, 1, MS, 2, IC, JC, DESCC, 11, INFO )
00215 *
00216       IF( INFO.EQ.0 ) THEN
00217          NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW )
00218          MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL )
00219 *
00220          LWMIN = 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 )
00221 *
00222          IF( IQ.NE.1 ) THEN
00223             INFO = -5
00224          ELSE IF( JQ.NE.1 ) THEN
00225             INFO = -6
00226          ELSE IF( IC.NE.1 ) THEN
00227             INFO = -9
00228          ELSE IF( JC.NE.1 ) THEN
00229             INFO = -10
00230          ELSE IF( LWORK.LT.LWMIN ) THEN
00231             INFO = -16
00232          END IF
00233       END IF
00234 *
00235       IF( INFO.NE.0 ) THEN
00236          CALL PXERBLA( DESCC( CTXT_ ), 'PZSEPQTQ', -INFO )
00237          RETURN
00238       END IF
00239 *
00240 *     C = Identity matrix
00241 *
00242       CALL PZLASET( 'A', NV, NV, ZERO, ONE, C, IC, JC, DESCC )
00243 *
00244 *     C = C - QT * Q
00245 *
00246       IF( NV*MS.GT.0 ) THEN
00247          CALL PZGEMM( 'Conjugate transpose', 'N', NV, NV, MS, NEGONE, Q,
00248      $                1, 1, DESCQ, Q, 1, 1, DESCQ, ONE, C, 1, 1, DESCC )
00249       END IF
00250 *
00251 *     Allow for poorly orthogonalized eigenvectors for large clusters
00252 *
00253       NORM = PZLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK )
00254       QTQNRM = NORM / ( DBLE( MAX( MS, 1 ) )*ULP )
00255 *
00256       CLUSTER = 1
00257    10 CONTINUE
00258       DO 20 FIRSTP = 1, NPROW*NPCOL
00259          IF( PROCDIST( FIRSTP ).GE.ICLUSTR( 2*( CLUSTER-1 )+1 ) )
00260      $      GO TO 30
00261    20 CONTINUE
00262    30 CONTINUE
00263 *
00264       IMIN = ICLUSTR( 2*CLUSTER-1 )
00265       JMAX = ICLUSTR( 2*CLUSTER )
00266 *
00267 *
00268       IF( IMIN.EQ.0 )
00269      $   GO TO 60
00270 *
00271       DO 40 NEXTP = FIRSTP, NPROW*NPCOL
00272          IMAX = PROCDIST( NEXTP )
00273          JMIN = IMAX + 1
00274 *
00275 *
00276          CALL PZMATADD( IMAX-IMIN+1, JMAX-JMIN+1, ZERO, C, IMIN, JMIN,
00277      $                  DESCC, DCMPLX( GAP( CLUSTER ) / 0.01D+0 ), C,
00278      $                  IMIN, JMIN, DESCC )
00279          CALL PZMATADD( JMAX-JMIN+1, IMAX-IMIN+1, ZERO, C, JMIN, IMIN,
00280      $                  DESCC, DCMPLX( GAP( CLUSTER ) / 0.01D+0 ), C,
00281      $                  JMIN, IMIN, DESCC )
00282          IMIN = IMAX
00283 *
00284          IF( ICLUSTR( 2*CLUSTER ).LT.PROCDIST( NEXTP+1 ) )
00285      $      GO TO 50
00286    40 CONTINUE
00287    50 CONTINUE
00288 *
00289       CLUSTER = CLUSTER + 1
00290       GO TO 10
00291    60 CONTINUE
00292 *
00293 *     Compute the norm of C
00294 *
00295       NORM = PZLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK )
00296 *
00297       QTQNRM2 = NORM / ( DBLE( MAX( MS, 1 ) )*ULP )
00298 *
00299       IF( QTQNRM2.GT.THRESH ) THEN
00300          RES = 1
00301          QTQNRM = QTQNRM2
00302       END IF
00303       RETURN
00304 *
00305 *     End of PZSEPQTQ
00306 *
00307       END