ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pddblaschk.f
Go to the documentation of this file.
00001       SUBROUTINE PDDBLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X,
00002      $                       IX, JX, DESCX, IASEED, A, IA, JA, DESCA,
00003      $                       IBSEED, ANORM, RESID, WORK, WORKSIZ )
00004 *
00005 *
00006 *  -- ScaLAPACK routine (version 1.7) --
00007 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00008 *     and University of California, Berkeley.
00009 *     November 15, 1997
00010 *
00011 *     .. Scalar Arguments ..
00012       CHARACTER          SYMM, TRANS, UPLO
00013       INTEGER            BWL, BWU, IA, IASEED, IBSEED,
00014      $                   IX, JA, JX, N, NRHS, WORKSIZ
00015       DOUBLE PRECISION   ANORM, RESID
00016 *     ..
00017 *     .. Array Arguments ..
00018       INTEGER            DESCA( * ), DESCX( * )
00019       DOUBLE PRECISION   A( * ), WORK( * ), X( * )
00020 *     .. External Functions ..
00021       LOGICAL            LSAME
00022 *     ..
00023 *
00024 *  Purpose
00025 *  =======
00026 *
00027 *  PDDBLASCHK computes the residual
00028 *  || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N)
00029 *  to check the accuracy of the factorization and solve steps in the
00030 *  LU and Cholesky decompositions, where sub( A ) denotes
00031 *  A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1).
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Arguments
00088 *  =========
00089 *
00090 *  SYMM      (global input) CHARACTER
00091 *          if SYMM = 'S', sub( A ) is a symmetric distributed band
00092 *          matrix, otherwise sub( A ) is a general distributed matrix.
00093 *
00094 *  UPLO    (global input) CHARACTER
00095 *          if SYMM = 'S', then
00096 *            if UPLO = 'L', the lower half of the matrix is stored
00097 *            if UPLO = 'U', the upper half of the matrix is stored
00098 *          if SYMM != 'S' or 'H', then
00099 *            if UPLO = 'D', the matrix is stable during factorization
00100 *                           without interchanges
00101 *            if UPLO != 'D', the matrix is general
00102 *
00103 *  TRANS   if TRANS= 'T', A 'Transpose' is used as the
00104 *            coefficient matrix in the solve.
00105 *
00106 *  N       (global input) INTEGER
00107 *          The number of columns to be operated on, i.e. the number of
00108 *          columns of the distributed submatrix sub( A ). N >= 0.
00109 *
00110 *  NRHS    (global input) INTEGER
00111 *          The number of right-hand-sides, i.e the number of columns
00112 *          of the distributed matrix sub( X ). NRHS >= 1.
00113 *
00114 *  X       (local input) DOUBLE PRECISION pointer into the local memory
00115 *          to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array
00116 *          contains the local pieces of the answer vector(s) sub( X ) of
00117 *          sub( A ) sub( X ) - B, split up over a column of processes.
00118 *
00119 *  IX      (global input) INTEGER
00120 *          The row index in the global array X indicating the first
00121 *          row of sub( X ).
00122 *
00123 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00124 *          The array descriptor for the distributed matrix X.
00125 *
00126 *  IASEED  (global input) INTEGER
00127 *          The seed number to generate the original matrix Ao.
00128 *
00129 *  JA      (global input) INTEGER
00130 *          The column index in the global array A indicating the
00131 *          first column of sub( A ).
00132 *
00133 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00134 *          The array descriptor for the distributed matrix A.
00135 *
00136 *  IBSEED  (global input) INTEGER
00137 *          The seed number to generate the original matrix B.
00138 *
00139 *  ANORM   (global input) DOUBLE PRECISION
00140 *          The 1-norm or infinity norm of the distributed matrix
00141 *          sub( A ).
00142 *
00143 *  RESID   (global output) DOUBLE PRECISION
00144 *          The residual error:
00145 *          ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N).
00146 *
00147 *  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
00148 *          IF SYMM='S'
00149 *          LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB
00150 *          IF SYMM!='S' or 'H'
00151 *          LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB
00152 *
00153 *  WORKSIZ (local input) size of WORK.
00154 *
00155 *  =====================================================================
00156 *
00157 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00158 *    Current address: Lawrence Livermore National Labs.
00159 *  This version released: August, 2001.
00160 *
00161 *  =====================================================================
00162 *
00163 *     .. Parameters ..
00164       DOUBLE PRECISION   ZERO, ONE
00165       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
00166       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00167      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00168       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00169      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00170      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00171       INTEGER            INT_ONE
00172       PARAMETER          ( INT_ONE = 1 )
00173 *     ..
00174 *     .. Local Scalars ..
00175       INTEGER            IACOL, IAROW, ICTXT,
00176      $                   IIA, IIX, IPB, IPW,
00177      $                   IXCOL, IXROW, J, JJA, JJX, LDA,
00178      $                   MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ
00179       INTEGER            BW, INFO, IPPRODUCT, WORK_MIN
00180       DOUBLE PRECISION   DIVISOR, EPS, RESID1, NORMX
00181 *     ..
00182 *     .. Local Arrays ..
00183 *     ..
00184 *     .. External Subroutines ..
00185       EXTERNAL           BLACS_GRIDINFO, DGAMX2D, DGEBR2D,
00186      $                   DGEBS2D, DGEMM, DGERV2D, DGESD2D,
00187      $                   DGSUM2D, DLASET, PBDTRAN, PDMATGEN
00188 *     ..
00189 *     .. External Functions ..
00190       INTEGER            IDAMAX, NUMROC
00191       DOUBLE PRECISION   PDLAMCH
00192       EXTERNAL           IDAMAX, NUMROC, PDLAMCH
00193 *     ..
00194 *     .. Intrinsic Functions ..
00195       INTRINSIC          ABS, DBLE, MAX, MIN, MOD
00196 *     ..
00197 *     .. Executable Statements ..
00198 *
00199 *     Get needed initial parameters
00200 *
00201       ICTXT = DESCA( CTXT_ )
00202       NB = DESCA( NB_ )
00203 *
00204       IF( LSAME( SYMM, 'S' ) ) THEN
00205          BW = BWL
00206          WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB
00207       ELSE
00208          BW = MAX(BWL, BWU)
00209          WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB
00210       ENDIF
00211 *
00212       IF ( WORKSIZ .LT. WORK_MIN ) THEN
00213        CALL PXERBLA( ICTXT, 'PDBLASCHK', -18 )
00214        RETURN
00215       END IF
00216 *
00217       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00218 *
00219       EPS = PDLAMCH( ICTXT, 'eps' )
00220       RESID = 0.0D+0
00221       DIVISOR = ANORM * EPS * DBLE( N )
00222 *
00223       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
00224      $              IAROW, IACOL )
00225       CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX,
00226      $              IXROW, IXCOL )
00227       NP = NUMROC( (BWL+BWU+1), DESCA( MB_ ), MYROW, 0, NPROW )
00228       NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL )
00229 *
00230       IPB = 1
00231       IPPRODUCT = 1 + DESCA( NB_ )
00232       IPW = 1 + 2*DESCA( NB_ )
00233 *
00234       LDA = DESCA( LLD_ )
00235 *
00236 *     Regenerate A
00237 *
00238                IF( LSAME( SYMM, 'S' )) THEN
00239                  CALL PDBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1,
00240      $                           DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0,
00241      $                           IASEED, MYROW, MYCOL, NPROW, NPCOL )
00242                ELSE
00243 *
00244                  CALL PDBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N,
00245      $                           DESCA( MB_ ), DESCA( NB_ ), A,
00246      $                           DESCA( LLD_ ), 0, 0, IASEED, MYROW,
00247      $                           MYCOL, NPROW, NPCOL )
00248                ENDIF
00249 *
00250 *     Loop over the rhs
00251 *
00252       RESID = 0.0
00253 *
00254       DO 40 J = 1, NRHS
00255 *
00256 *           Multiply A * current column of X
00257 *
00258 *
00259             CALL PDGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA,
00260      $                     1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX,
00261      $                     WORK( IPPRODUCT ), WORK( IPW ),
00262      $                     (MAX(BWL,BWU)+2)*MAX(BWL,BWU), INFO )
00263 *
00264 *
00265 *           Regenerate column of B
00266 *
00267             CALL PDMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ),
00268      $                     DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ),
00269      $                     WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ),
00270      $                     DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL,
00271      $                     MYROW, NPCOL, NPROW )
00272 *
00273 *           Figure || A * X - B || & || X ||
00274 *
00275             CALL PDAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1,
00276      $                   WORK( IPB ), 1, 1, DESCX, 1 )
00277 *
00278             CALL PDNRM2( N, NORMX,
00279      $           X, 1, J, DESCX, 1 )
00280 *
00281             CALL PDNRM2( N, RESID1,
00282      $           WORK( IPB ), 1, 1, DESCX, 1 )
00283 *
00284 *
00285 *           Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N)
00286 *
00287             RESID1 = RESID1 / ( NORMX*DIVISOR )
00288 *
00289             RESID = MAX( RESID, RESID1 )
00290 *
00291    40 CONTINUE
00292 *
00293       RETURN
00294 *
00295 *     End of PDBLASCHK
00296 *
00297       END