ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pcdblaschk.f
Go to the documentation of this file.
00001       SUBROUTINE PCDBLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X,
00002      $                       IX, JX, DESCX, IASEED, A, IA, JA, DESCA,
00003      $                       IBSEED, ANORM, RESID, WORK, WORKSIZ )
00004 *
00005 *
00006 *  -- ScaLAPACK routine (version 1.7) --
00007 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00008 *     and University of California, Berkeley.
00009 *     November 15, 1997
00010 *
00011 *     .. Scalar Arguments ..
00012       CHARACTER          SYMM, TRANS, UPLO
00013       INTEGER            BWL, BWU, IA, IASEED, IBSEED,
00014      $                   IX, JA, JX, N, NRHS, WORKSIZ
00015       REAL               ANORM, RESID
00016 *     ..
00017 *     .. Array Arguments ..
00018       INTEGER            DESCA( * ), DESCX( * )
00019       COMPLEX            A( * ), WORK( * ), X( * )
00020 *     .. External Functions ..
00021       LOGICAL            LSAME
00022 *     ..
00023 *
00024 *  Purpose
00025 *  =======
00026 *
00027 *  PCDBLASCHK computes the residual
00028 *  || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N)
00029 *  to check the accuracy of the factorization and solve steps in the
00030 *  LU and Cholesky decompositions, where sub( A ) denotes
00031 *  A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1).
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Arguments
00088 *  =========
00089 *
00090 *  SYMM      (global input) CHARACTER
00091 *          if SYMM = 'H', sub( A ) is a hermitian distributed band
00092 *          matrix, otherwise sub( A ) is a general distributed matrix.
00093 *
00094 *  UPLO    (global input) CHARACTER
00095 *          if SYMM = 'H', then
00096 *            if UPLO = 'L', the lower half of the matrix is stored
00097 *            if UPLO = 'U', the upper half of the matrix is stored
00098 *          if SYMM != 'S' or 'H', then
00099 *            if UPLO = 'D', the matrix is stable during factorization
00100 *                           without interchanges
00101 *            if UPLO != 'D', the matrix is general
00102 *
00103 *  TRANS   if TRANS= 'C', A 'Conjugate transpose' is used as the
00104 *            coefficient matrix in the solve.
00105 *
00106 *  N       (global input) INTEGER
00107 *          The number of columns to be operated on, i.e. the number of
00108 *          columns of the distributed submatrix sub( A ). N >= 0.
00109 *
00110 *  NRHS    (global input) INTEGER
00111 *          The number of right-hand-sides, i.e the number of columns
00112 *          of the distributed matrix sub( X ). NRHS >= 1.
00113 *
00114 *  X       (local input) COMPLEX pointer into the local memory
00115 *          to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array
00116 *          contains the local pieces of the answer vector(s) sub( X ) of
00117 *          sub( A ) sub( X ) - B, split up over a column of processes.
00118 *
00119 *  IX      (global input) INTEGER
00120 *          The row index in the global array X indicating the first
00121 *          row of sub( X ).
00122 *
00123 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00124 *          The array descriptor for the distributed matrix X.
00125 *
00126 *  IASEED  (global input) INTEGER
00127 *          The seed number to generate the original matrix Ao.
00128 *
00129 *  JA      (global input) INTEGER
00130 *          The column index in the global array A indicating the
00131 *          first column of sub( A ).
00132 *
00133 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00134 *          The array descriptor for the distributed matrix A.
00135 *
00136 *  IBSEED  (global input) INTEGER
00137 *          The seed number to generate the original matrix B.
00138 *
00139 *  ANORM   (global input) REAL
00140 *          The 1-norm or infinity norm of the distributed matrix
00141 *          sub( A ).
00142 *
00143 *  RESID   (global output) REAL
00144 *          The residual error:
00145 *          ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N).
00146 *
00147 *  WORK    (local workspace) COMPLEX array, dimension (LWORK)
00148 *          IF SYMM='S'
00149 *          LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB
00150 *          IF SYMM!='S' or 'H'
00151 *          LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB
00152 *
00153 *  WORKSIZ (local input) size of WORK.
00154 *
00155 *  =====================================================================
00156 *
00157 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00158 *    Current address: Lawrence Livermore National Labs.
00159 *  This version released: August, 2001.
00160 *
00161 *  =====================================================================
00162 *
00163 *     .. Parameters ..
00164       COMPLEX            ZERO, ONE
00165       PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
00166      $                     ZERO = ( 0.0E+0, 0.0E+0 ) )
00167       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00168      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00169       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00170      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00171      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00172       INTEGER            INT_ONE
00173       PARAMETER          ( INT_ONE = 1 )
00174 *     ..
00175 *     .. Local Scalars ..
00176       INTEGER            IACOL, IAROW, ICTXT,
00177      $                   IIA, IIX, IPB, IPW,
00178      $                   IXCOL, IXROW, J, JJA, JJX, LDA,
00179      $                   MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ
00180       INTEGER            BW, INFO, IPPRODUCT, WORK_MIN
00181       REAL               DIVISOR, EPS, RESID1, NORMX
00182 *     ..
00183 *     .. Local Arrays ..
00184 *     ..
00185 *     .. External Subroutines ..
00186       EXTERNAL           BLACS_GRIDINFO, CGAMX2D, CGEMM, CGSUM2D,
00187      $                   CLASET, PBCTRAN, PCMATGEN, SGEBR2D,
00188      $                   SGEBS2D, SGERV2D, SGESD2D
00189 *     ..
00190 *     .. External Functions ..
00191       INTEGER            ICAMAX, NUMROC
00192       REAL               PSLAMCH
00193       EXTERNAL           ICAMAX, NUMROC, PSLAMCH
00194 *     ..
00195 *     .. Intrinsic Functions ..
00196       INTRINSIC          ABS, MAX, MIN, MOD, REAL
00197 *     ..
00198 *     .. Executable Statements ..
00199 *
00200 *     Get needed initial parameters
00201 *
00202       ICTXT = DESCA( CTXT_ )
00203       NB = DESCA( NB_ )
00204 *
00205       IF( LSAME( SYMM, 'H' ) ) THEN
00206          BW = BWL
00207          WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB
00208       ELSE
00209          BW = MAX(BWL, BWU)
00210          WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB
00211       ENDIF
00212 *
00213       IF ( WORKSIZ .LT. WORK_MIN ) THEN
00214        CALL PXERBLA( ICTXT, 'PCBLASCHK', -18 )
00215        RETURN
00216       END IF
00217 *
00218       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00219 *
00220       EPS = PSLAMCH( ICTXT, 'eps' )
00221       RESID = 0.0E+0
00222       DIVISOR = ANORM * EPS * REAL( N )
00223 *
00224       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
00225      $              IAROW, IACOL )
00226       CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX,
00227      $              IXROW, IXCOL )
00228       NP = NUMROC( (BWL+BWU+1), DESCA( MB_ ), MYROW, 0, NPROW )
00229       NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL )
00230 *
00231       IPB = 1
00232       IPPRODUCT = 1 + DESCA( NB_ )
00233       IPW = 1 + 2*DESCA( NB_ )
00234 *
00235       LDA = DESCA( LLD_ )
00236 *
00237 *     Regenerate A
00238 *
00239                IF( LSAME( SYMM, 'H' )) THEN
00240                  CALL PCBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1,
00241      $                           DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0,
00242      $                           IASEED, MYROW, MYCOL, NPROW, NPCOL )
00243                ELSE
00244 *
00245                  CALL PCBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N,
00246      $                           DESCA( MB_ ), DESCA( NB_ ), A,
00247      $                           DESCA( LLD_ ), 0, 0, IASEED, MYROW,
00248      $                           MYCOL, NPROW, NPCOL )
00249                ENDIF
00250 *
00251 *     Loop over the rhs
00252 *
00253       RESID = 0.0
00254 *
00255       DO 40 J = 1, NRHS
00256 *
00257 *           Multiply A * current column of X
00258 *
00259 *
00260             CALL PCGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA,
00261      $                     1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX,
00262      $                     WORK( IPPRODUCT ), WORK( IPW ),
00263      $                     (MAX(BWL,BWU)+2)*MAX(BWL,BWU), INFO )
00264 *
00265 *
00266 *           Regenerate column of B
00267 *
00268             CALL PCMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ),
00269      $                     DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ),
00270      $                     WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ),
00271      $                     DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL,
00272      $                     MYROW, NPCOL, NPROW )
00273 *
00274 *           Figure || A * X - B || & || X ||
00275 *
00276             CALL PCAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1,
00277      $                   WORK( IPB ), 1, 1, DESCX, 1 )
00278 *
00279             CALL PSCNRM2( N, NORMX,
00280      $           X, 1, J, DESCX, 1 )
00281 *
00282             CALL PSCNRM2( N, RESID1,
00283      $           WORK( IPB ), 1, 1, DESCX, 1 )
00284 *
00285 *
00286 *           Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N)
00287 *
00288             RESID1 = RESID1 / ( NORMX*DIVISOR )
00289 *
00290             RESID = MAX( RESID, RESID1 )
00291 *
00292    40 CONTINUE
00293 *
00294       RETURN
00295 *
00296 *     End of PCBLASCHK
00297 *
00298       END