ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdptlaschk.f
Go to the documentation of this file.
00001       SUBROUTINE PDPTLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX,
00002      $                       DESCX, IASEED, A, IA, JA, DESCA, IBSEED,
00003      $                       ANORM, RESID, WORK, WORKSIZ )
00004 *
00005 *
00006 *  -- ScaLAPACK routine (version 1.7) --
00007 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00008 *     and University of California, Berkeley.
00009 *     November 15, 1997
00010 *
00011 *     .. Scalar Arguments ..
00012       CHARACTER          SYMM, UPLO
00013       INTEGER            BWL, BWU, IA, IASEED, IBSEED,
00014      $                   IX, JA, JX, N, NRHS, WORKSIZ
00015       DOUBLE PRECISION   ANORM, RESID
00016 *     ..
00017 *     .. Array Arguments ..
00018       INTEGER            DESCA( * ), DESCX( * )
00019       DOUBLE PRECISION   A( * ), WORK( * ), X( * )
00020 *     .. External Functions ..
00021       LOGICAL            LSAME
00022 *     ..
00023 *
00024 *  Purpose
00025 *  =======
00026 *
00027 *  PDPTLASCHK computes the residual
00028 *  || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N)
00029 *  to check the accuracy of the factorization and solve steps in the
00030 *  LU and Cholesky decompositions, where sub( A ) denotes
00031 *  A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1).
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Arguments
00088 *  =========
00089 *
00090 *  SYMM      (global input) CHARACTER
00091 *          if SYMM = 'S', sub( A ) is a symmetric distributed band
00092 *          matrix, otherwise sub( A ) is a general distributed matrix.
00093 *
00094 *  UPLO    (global input) CHARACTER
00095 *          if SYMM = 'S', then
00096 *            if UPLO = 'L', the lower half of the matrix is stored
00097 *            if UPLO = 'U', the upper half of the matrix is stored
00098 *          if SYMM != 'S' or 'H', then
00099 *            if UPLO = 'D', the matrix is stable during factorization
00100 *                           without interchanges
00101 *            if UPLO != 'D', the matrix is general
00102 *
00103 *  N       (global input) INTEGER
00104 *          The number of columns to be operated on, i.e. the number of
00105 *          columns of the distributed submatrix sub( A ). N >= 0.
00106 *
00107 *  NRHS    (global input) INTEGER
00108 *          The number of right-hand-sides, i.e the number of columns
00109 *          of the distributed matrix sub( X ). NRHS >= 1.
00110 *
00111 *  X       (local input) DOUBLE PRECISION pointer into the local memory
00112 *          to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array
00113 *          contains the local pieces of the answer vector(s) sub( X ) of
00114 *          sub( A ) sub( X ) - B, split up over a column of processes.
00115 *
00116 *  IX      (global input) INTEGER
00117 *          The row index in the global array X indicating the first
00118 *          row of sub( X ).
00119 *
00120 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00121 *          The array descriptor for the distributed matrix X.
00122 *
00123 *  IASEED  (global input) INTEGER
00124 *          The seed number to generate the original matrix Ao.
00125 *
00126 *  JA      (global input) INTEGER
00127 *          The column index in the global array A indicating the
00128 *          first column of sub( A ).
00129 *
00130 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00131 *          The array descriptor for the distributed matrix A.
00132 *
00133 *  IBSEED  (global input) INTEGER
00134 *          The seed number to generate the original matrix B.
00135 *
00136 *  ANORM   (global input) DOUBLE PRECISION
00137 *          The 1-norm or infinity norm of the distributed matrix
00138 *          sub( A ).
00139 *
00140 *  RESID   (global output) DOUBLE PRECISION
00141 *          The residual error:
00142 *          ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N).
00143 *
00144 *  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
00145 *          IF SYMM='S'
00146 *          LWORK >= max(5,NB)+2*NB
00147 *          IF SYMM!='S' or 'H'
00148 *          LWORK >= max(5,NB)+2*NB
00149 *
00150 *  WORKSIZ (local input) size of WORK.
00151 *
00152 *  =====================================================================
00153 *
00154 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00155 *    Current address: Lawrence Livermore National Labs.
00156 *  This version released: August, 2001.
00157 *
00158 *  =====================================================================
00159 *
00160 *     .. Parameters ..
00161       DOUBLE PRECISION   ZERO, ONE
00162       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
00163       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00164      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00165       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00166      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00167      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00168       INTEGER            INT_ONE
00169       PARAMETER          ( INT_ONE = 1 )
00170 *     ..
00171 *     .. Local Scalars ..
00172       INTEGER            IACOL, IAROW, ICTXT,
00173      $                   IIA, IIX, IPB, IPW,
00174      $                   IXCOL, IXROW, J, JJA, JJX, LDA,
00175      $                   MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ
00176       INTEGER            I, START
00177       INTEGER            BW, INFO, IPPRODUCT, WORK_MIN
00178       DOUBLE PRECISION   DIVISOR, EPS, RESID1, NORMX
00179 *     ..
00180 *     .. Local Arrays ..
00181 *     ..
00182 *     .. External Subroutines ..
00183       EXTERNAL           BLACS_GRIDINFO, DGAMX2D, DGEBR2D,
00184      $                   DGEBS2D, DGEMM, DGERV2D, DGESD2D,
00185      $                   DGSUM2D, DLASET, PBDTRAN, PDMATGEN
00186 *     ..
00187 *     .. External Functions ..
00188       INTEGER            IDAMAX, NUMROC
00189       DOUBLE PRECISION   PDLAMCH
00190       EXTERNAL           IDAMAX, NUMROC, PDLAMCH
00191 *     ..
00192 *     .. Intrinsic Functions ..
00193       INTRINSIC          ABS, DBLE, MAX, MIN, MOD
00194 *     ..
00195 *     .. Executable Statements ..
00196 *
00197 *     Get needed initial parameters
00198 *
00199       ICTXT = DESCA( CTXT_ )
00200       NB = DESCA( NB_ )
00201 *
00202       IF( LSAME( SYMM, 'S' ) ) THEN
00203          BW = BWL
00204          START = 1
00205          WORK_MIN = MAX(5,NB)+2*NB
00206       ELSE
00207          BW = MAX(BWL, BWU)
00208          IF( LSAME( UPLO, 'D' )) THEN
00209             START = 1
00210          ELSE
00211             START = 2
00212          ENDIF
00213          WORK_MIN = MAX(5,NB)+2*NB
00214       ENDIF
00215 *
00216       IF ( WORKSIZ .LT. WORK_MIN ) THEN
00217        CALL PXERBLA( ICTXT, 'PDTLASCHK', -18 )
00218        RETURN
00219       END IF
00220 *
00221       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00222 *
00223       EPS = PDLAMCH( ICTXT, 'eps' )
00224       RESID = 0.0D+0
00225       DIVISOR = ANORM * EPS * DBLE( N )
00226 *
00227       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
00228      $              IAROW, IACOL )
00229       CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX,
00230      $              IXROW, IXCOL )
00231       NP = NUMROC( (2), DESCA( MB_ ), MYROW, 0, NPROW )
00232       NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL )
00233 *
00234       IPB = 1
00235       IPPRODUCT = 1 + DESCA( NB_ )
00236       IPW = 1 + 2*DESCA( NB_ )
00237 *
00238       LDA = DESCA( LLD_ )
00239 *
00240 *     Regenerate A
00241 *
00242                IF( LSAME( SYMM, 'S' )) THEN
00243                  CALL PDBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1,
00244      $                           DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0,
00245      $                           IASEED, MYROW, MYCOL, NPROW, NPCOL )
00246                ELSE
00247 *
00248                  CALL PDBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N,
00249      $                           DESCA( MB_ ), DESCA( NB_ ), A,
00250      $                           DESCA( LLD_ ), 0, 0, IASEED, MYROW,
00251      $                           MYCOL, NPROW, NPCOL )
00252                ENDIF
00253             IF( LSAME( UPLO, 'U' ) ) THEN
00254 *
00255 *
00256 *           Matrix formed above has the diagonals shifted from what was
00257 *              input to the tridiagonal routine. Shift them back.
00258 *
00259 *              Send elements to neighboring processors
00260 *
00261                IF( MYCOL.LT.NPCOL-1 ) THEN
00262                   CALL DGESD2D( ICTXT, 1, 1,
00263      $                     A( START+( DESCA( NB_ )-1 )*LDA ),
00264      $                     LDA, MYROW, MYCOL+1 )
00265                ENDIF
00266 *
00267 *              Shift local elements
00268 *
00269                DO 230 I=DESCA( NB_ )-1,0,-1
00270                   A( START+(I+1)*LDA ) = A( START+(I)*LDA )
00271   230          CONTINUE
00272 *
00273 *              Receive elements from neighboring processors
00274 *
00275                IF( MYCOL.GT.0 ) THEN
00276                   CALL DGERV2D( ICTXT, 1, 1, A( START), LDA,
00277      $                               MYROW, MYCOL-1 )
00278                ENDIF
00279 *
00280              ENDIF
00281 *
00282 *     Loop over the rhs
00283 *
00284       RESID = 0.0
00285 *
00286       DO 40 J = 1, NRHS
00287 *
00288 *           Multiply A * current column of X
00289 *
00290 *
00291             CALL PDPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA,
00292      $          1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX,
00293      $          WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO )
00294 *
00295 *
00296 *           Regenerate column of B
00297 *
00298             CALL PDMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ),
00299      $                     DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ),
00300      $                     WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ),
00301      $                     DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL,
00302      $                     MYROW, NPCOL, NPROW )
00303 *
00304 *           Figure || A * X - B || & || X ||
00305 *
00306             CALL PDAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1,
00307      $                   WORK( IPB ), 1, 1, DESCX, 1 )
00308 *
00309             CALL PDNRM2( N, NORMX,
00310      $           X, 1, J, DESCX, 1 )
00311 *
00312             CALL PDNRM2( N, RESID1,
00313      $           WORK( IPB ), 1, 1, DESCX, 1 )
00314 *
00315 *
00316 *           Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N)
00317 *
00318             RESID1 = RESID1 / ( NORMX*DIVISOR )
00319 *
00320             RESID = MAX( RESID, RESID1 )
00321 *
00322    40 CONTINUE
00323 *
00324       RETURN
00325 *
00326 *     End of PDTLASCHK
00327 *
00328       END