SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ psmmch2()

subroutine psmmch2 ( integer  ictxt,
character*1  uplo,
character*1  trans,
integer  n,
integer  k,
real  alpha,
real, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
real, dimension( * )  b,
integer  ib,
integer  jb,
integer, dimension( * )  descb,
real  beta,
real, dimension( * )  c,
real, dimension( * )  pc,
integer  ic,
integer  jc,
integer, dimension( * )  descc,
real, dimension( * )  ct,
real, dimension( * )  g,
real  err,
integer  info 
)

Definition at line 5993 of file psblastst.f.

5996*
5997* -- PBLAS test routine (version 2.0) --
5998* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5999* and University of California, Berkeley.
6000* April 1, 1998
6001*
6002* .. Scalar Arguments ..
6003 CHARACTER*1 TRANS, UPLO
6004 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6005 REAL ALPHA, BETA, ERR
6006* ..
6007* .. Array Arguments ..
6008 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6009 REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
6010 $ PC( * )
6011* ..
6012*
6013* Purpose
6014* =======
6015*
6016* PSMMCH2 checks the results of the computational tests.
6017*
6018* Notes
6019* =====
6020*
6021* A description vector is associated with each 2D block-cyclicly dis-
6022* tributed matrix. This vector stores the information required to
6023* establish the mapping between a matrix entry and its corresponding
6024* process and memory location.
6025*
6026* In the following comments, the character _ should be read as
6027* "of the distributed matrix". Let A be a generic term for any 2D
6028* block cyclicly distributed matrix. Its description vector is DESCA:
6029*
6030* NOTATION STORED IN EXPLANATION
6031* ---------------- --------------- ------------------------------------
6032* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6033* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6034* the NPROW x NPCOL BLACS process grid
6035* A is distributed over. The context
6036* itself is global, but the handle
6037* (the integer value) may vary.
6038* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6039* ted matrix A, M_A >= 0.
6040* N_A (global) DESCA( N_ ) The number of columns in the distri-
6041* buted matrix A, N_A >= 0.
6042* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6043* block of the matrix A, IMB_A > 0.
6044* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6045* left block of the matrix A,
6046* INB_A > 0.
6047* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6048* bute the last M_A-IMB_A rows of A,
6049* MB_A > 0.
6050* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6051* bute the last N_A-INB_A columns of
6052* A, NB_A > 0.
6053* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6054* row of the matrix A is distributed,
6055* NPROW > RSRC_A >= 0.
6056* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6057* first column of A is distributed.
6058* NPCOL > CSRC_A >= 0.
6059* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6060* array storing the local blocks of
6061* the distributed matrix A,
6062* IF( Lc( 1, N_A ) > 0 )
6063* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6064* ELSE
6065* LLD_A >= 1.
6066*
6067* Let K be the number of rows of a matrix A starting at the global in-
6068* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6069* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6070* receive if these K rows were distributed over NPROW processes. If K
6071* is the number of columns of a matrix A starting at the global index
6072* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6073* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6074* these K columns were distributed over NPCOL processes.
6075*
6076* The values of Lr() and Lc() may be determined via a call to the func-
6077* tion PB_NUMROC:
6078* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6079* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6080*
6081* Arguments
6082* =========
6083*
6084* ICTXT (local input) INTEGER
6085* On entry, ICTXT specifies the BLACS context handle, indica-
6086* ting the global context of the operation. The context itself
6087* is global, but the value of ICTXT is local.
6088*
6089* UPLO (global input) CHARACTER*1
6090* On entry, UPLO specifies which part of C should contain the
6091* result.
6092*
6093* TRANS (global input) CHARACTER*1
6094* On entry, TRANS specifies whether the matrices A and B have
6095* to be transposed or not before computing the matrix-matrix
6096* product.
6097*
6098* N (global input) INTEGER
6099* On entry, N specifies the order the submatrix operand C. N
6100* must be at least zero.
6101*
6102* K (global input) INTEGER
6103* On entry, K specifies the number of columns (resp. rows) of A
6104* and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6105* least zero.
6106*
6107* ALPHA (global input) REAL
6108* On entry, ALPHA specifies the scalar alpha.
6109*
6110* A (local input) REAL array
6111* On entry, A is an array of dimension (DESCA( M_ ),*). This
6112* array contains a local copy of the initial entire matrix PA.
6113*
6114* IA (global input) INTEGER
6115* On entry, IA specifies A's global row index, which points to
6116* the beginning of the submatrix sub( A ).
6117*
6118* JA (global input) INTEGER
6119* On entry, JA specifies A's global column index, which points
6120* to the beginning of the submatrix sub( A ).
6121*
6122* DESCA (global and local input) INTEGER array
6123* On entry, DESCA is an integer array of dimension DLEN_. This
6124* is the array descriptor for the matrix A.
6125*
6126* B (local input) REAL array
6127* On entry, B is an array of dimension (DESCB( M_ ),*). This
6128* array contains a local copy of the initial entire matrix PB.
6129*
6130* IB (global input) INTEGER
6131* On entry, IB specifies B's global row index, which points to
6132* the beginning of the submatrix sub( B ).
6133*
6134* JB (global input) INTEGER
6135* On entry, JB specifies B's global column index, which points
6136* to the beginning of the submatrix sub( B ).
6137*
6138* DESCB (global and local input) INTEGER array
6139* On entry, DESCB is an integer array of dimension DLEN_. This
6140* is the array descriptor for the matrix B.
6141*
6142* BETA (global input) REAL
6143* On entry, BETA specifies the scalar beta.
6144*
6145* C (local input/local output) REAL array
6146* On entry, C is an array of dimension (DESCC( M_ ),*). This
6147* array contains a local copy of the initial entire matrix PC.
6148*
6149* PC (local input) REAL array
6150* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6151* array contains the local pieces of the matrix PC.
6152*
6153* IC (global input) INTEGER
6154* On entry, IC specifies C's global row index, which points to
6155* the beginning of the submatrix sub( C ).
6156*
6157* JC (global input) INTEGER
6158* On entry, JC specifies C's global column index, which points
6159* to the beginning of the submatrix sub( C ).
6160*
6161* DESCC (global and local input) INTEGER array
6162* On entry, DESCC is an integer array of dimension DLEN_. This
6163* is the array descriptor for the matrix C.
6164*
6165* CT (workspace) REAL array
6166* On entry, CT is an array of dimension at least MAX(M,N,K). CT
6167* holds a copy of the current column of C.
6168*
6169* G (workspace) REAL array
6170* On entry, G is an array of dimension at least MAX(M,N,K). G
6171* is used to compute the gauges.
6172*
6173* ERR (global output) REAL
6174* On exit, ERR specifies the largest error in absolute value.
6175*
6176* INFO (global output) INTEGER
6177* On exit, if INFO <> 0, the result is less than half accurate.
6178*
6179* -- Written on April 1, 1998 by
6180* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6181*
6182* =====================================================================
6183*
6184* .. Parameters ..
6185 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6186 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6187 $ RSRC_
6188 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
6189 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6190 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6191 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6192 REAL ZERO, ONE
6193 parameter( zero = 0.0e+0, one = 1.0e+0 )
6194* ..
6195* .. Local Scalars ..
6196 LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
6197 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6198 $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6199 $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6200 $ NPCOL, NPROW
6201 REAL EPS, ERRI
6202* ..
6203* .. External Subroutines ..
6204 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
6205* ..
6206* .. External Functions ..
6207 LOGICAL LSAME
6208 REAL PSLAMCH
6209 EXTERNAL lsame, pslamch
6210* ..
6211* .. Intrinsic Functions ..
6212 INTRINSIC abs, max, min, mod, sqrt
6213* ..
6214* .. Executable Statements ..
6215*
6216 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6217*
6218 eps = pslamch( ictxt, 'eps' )
6219*
6220 upper = lsame( uplo, 'U' )
6221 notran = lsame( trans, 'N' )
6222 tran = lsame( trans, 'T' )
6223*
6224 lda = max( 1, desca( m_ ) )
6225 ldb = max( 1, descb( m_ ) )
6226 ldc = max( 1, descc( m_ ) )
6227*
6228* Compute expected result in C using data in A, B and C.
6229* Compute gauges in G. This part of the computation is performed
6230* by every process in the grid.
6231*
6232 DO 140 j = 1, n
6233*
6234 IF( upper ) THEN
6235 ibeg = 1
6236 iend = j
6237 ELSE
6238 ibeg = j
6239 iend = n
6240 END IF
6241*
6242 DO 10 i = 1, n
6243 ct( i ) = zero
6244 g( i ) = zero
6245 10 CONTINUE
6246*
6247 IF( notran ) THEN
6248 DO 30 kk = 1, k
6249 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6250 ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6251 DO 20 i = ibeg, iend
6252 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6253 ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6254 ct( i ) = ct( i ) + alpha * (
6255 $ a( ioffan ) * b( ioffbk ) +
6256 $ b( ioffbn ) * a( ioffak ) )
6257 g( i ) = g( i ) + abs( alpha ) * (
6258 $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6259 $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6260 20 CONTINUE
6261 30 CONTINUE
6262 ELSE IF( tran ) THEN
6263 DO 50 kk = 1, k
6264 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6265 ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6266 DO 40 i = ibeg, iend
6267 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6268 ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6269 ct( i ) = ct( i ) + alpha * (
6270 $ a( ioffan ) * b( ioffbk ) +
6271 $ b( ioffbn ) * a( ioffak ) )
6272 g( i ) = g( i ) + abs( alpha ) * (
6273 $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6274 $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6275 40 CONTINUE
6276 50 CONTINUE
6277 END IF
6278*
6279 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6280*
6281 DO 100 i = ibeg, iend
6282 ct( i ) = ct( i ) + beta * c( ioffc )
6283 g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
6284 c( ioffc ) = ct( i )
6285 ioffc = ioffc + 1
6286 100 CONTINUE
6287*
6288* Compute the error ratio for this result.
6289*
6290 err = zero
6291 info = 0
6292 ldpc = descc( lld_ )
6293 ioffc = ic + ( jc + j - 2 ) * ldc
6294 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6295 $ iic, jjc, icrow, iccol )
6296 icurrow = icrow
6297 rowrep = ( icrow.EQ.-1 )
6298 colrep = ( iccol.EQ.-1 )
6299*
6300 IF( mycol.EQ.iccol .OR. colrep ) THEN
6301*
6302 ibb = descc( imb_ ) - ic + 1
6303 IF( ibb.LE.0 )
6304 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6305 ibb = min( ibb, n )
6306 in = ic + ibb - 1
6307*
6308 DO 110 i = ic, in
6309*
6310 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6311 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6312 $ c( ioffc ) ) / eps
6313 IF( g( i-ic+1 ).NE.zero )
6314 $ erri = erri / g( i-ic+1 )
6315 err = max( err, erri )
6316 IF( err*sqrt( eps ).GE.one )
6317 $ info = 1
6318 iic = iic + 1
6319 END IF
6320*
6321 ioffc = ioffc + 1
6322*
6323 110 CONTINUE
6324*
6325 icurrow = mod( icurrow+1, nprow )
6326*
6327 DO 130 i = in+1, ic+n-1, descc( mb_ )
6328 ibb = min( ic+n-i, descc( mb_ ) )
6329*
6330 DO 120 kk = 0, ibb-1
6331*
6332 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6333 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6334 $ c( ioffc ) )/eps
6335 IF( g( i+kk-ic+1 ).NE.zero )
6336 $ erri = erri / g( i+kk-ic+1 )
6337 err = max( err, erri )
6338 IF( err*sqrt( eps ).GE.one )
6339 $ info = 1
6340 iic = iic + 1
6341 END IF
6342*
6343 ioffc = ioffc + 1
6344*
6345 120 CONTINUE
6346*
6347 icurrow = mod( icurrow+1, nprow )
6348*
6349 130 CONTINUE
6350*
6351 END IF
6352*
6353* If INFO = 0, all results are at least half accurate.
6354*
6355 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6356 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6357 $ mycol )
6358 IF( info.NE.0 )
6359 $ GO TO 150
6360*
6361 140 CONTINUE
6362*
6363 150 CONTINUE
6364*
6365 RETURN
6366*
6367* End of PSMMCH2
6368*
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: