SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ psmmch1()

subroutine psmmch1 ( integer  ictxt,
character*1  uplo,
character*1  trans,
integer  n,
integer  k,
real  alpha,
real, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
real  beta,
real, dimension( * )  c,
real, dimension( * )  pc,
integer  ic,
integer  jc,
integer, dimension( * )  descc,
real, dimension( * )  ct,
real, dimension( * )  g,
real  err,
integer  info 
)

Definition at line 5646 of file psblastst.f.

5649*
5650* -- PBLAS test routine (version 2.0) --
5651* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5652* and University of California, Berkeley.
5653* April 1, 1998
5654*
5655* .. Scalar Arguments ..
5656 CHARACTER*1 TRANS, UPLO
5657 INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5658 REAL ALPHA, BETA, ERR
5659* ..
5660* .. Array Arguments ..
5661 INTEGER DESCA( * ), DESCC( * )
5662 REAL A( * ), C( * ), CT( * ), G( * ), PC( * )
5663* ..
5664*
5665* Purpose
5666* =======
5667*
5668* PSMMCH1 checks the results of the computational tests.
5669*
5670* Notes
5671* =====
5672*
5673* A description vector is associated with each 2D block-cyclicly dis-
5674* tributed matrix. This vector stores the information required to
5675* establish the mapping between a matrix entry and its corresponding
5676* process and memory location.
5677*
5678* In the following comments, the character _ should be read as
5679* "of the distributed matrix". Let A be a generic term for any 2D
5680* block cyclicly distributed matrix. Its description vector is DESCA:
5681*
5682* NOTATION STORED IN EXPLANATION
5683* ---------------- --------------- ------------------------------------
5684* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5685* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5686* the NPROW x NPCOL BLACS process grid
5687* A is distributed over. The context
5688* itself is global, but the handle
5689* (the integer value) may vary.
5690* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5691* ted matrix A, M_A >= 0.
5692* N_A (global) DESCA( N_ ) The number of columns in the distri-
5693* buted matrix A, N_A >= 0.
5694* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5695* block of the matrix A, IMB_A > 0.
5696* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5697* left block of the matrix A,
5698* INB_A > 0.
5699* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5700* bute the last M_A-IMB_A rows of A,
5701* MB_A > 0.
5702* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5703* bute the last N_A-INB_A columns of
5704* A, NB_A > 0.
5705* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5706* row of the matrix A is distributed,
5707* NPROW > RSRC_A >= 0.
5708* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5709* first column of A is distributed.
5710* NPCOL > CSRC_A >= 0.
5711* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5712* array storing the local blocks of
5713* the distributed matrix A,
5714* IF( Lc( 1, N_A ) > 0 )
5715* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5716* ELSE
5717* LLD_A >= 1.
5718*
5719* Let K be the number of rows of a matrix A starting at the global in-
5720* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5721* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5722* receive if these K rows were distributed over NPROW processes. If K
5723* is the number of columns of a matrix A starting at the global index
5724* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5725* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5726* these K columns were distributed over NPCOL processes.
5727*
5728* The values of Lr() and Lc() may be determined via a call to the func-
5729* tion PB_NUMROC:
5730* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5731* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5732*
5733* Arguments
5734* =========
5735*
5736* ICTXT (local input) INTEGER
5737* On entry, ICTXT specifies the BLACS context handle, indica-
5738* ting the global context of the operation. The context itself
5739* is global, but the value of ICTXT is local.
5740*
5741* UPLO (global input) CHARACTER*1
5742* On entry, UPLO specifies which part of C should contain the
5743* result.
5744*
5745* TRANS (global input) CHARACTER*1
5746* On entry, TRANS specifies whether the matrix A has to be
5747* transposed or not before computing the matrix-matrix product.
5748*
5749* N (global input) INTEGER
5750* On entry, N specifies the order the submatrix operand C. N
5751* must be at least zero.
5752*
5753* K (global input) INTEGER
5754* On entry, K specifies the number of columns (resp. rows) of A
5755* when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5756* zero.
5757*
5758* ALPHA (global input) REAL
5759* On entry, ALPHA specifies the scalar alpha.
5760*
5761* A (local input) REAL array
5762* On entry, A is an array of dimension (DESCA( M_ ),*). This
5763* array contains a local copy of the initial entire matrix PA.
5764*
5765* IA (global input) INTEGER
5766* On entry, IA specifies A's global row index, which points to
5767* the beginning of the submatrix sub( A ).
5768*
5769* JA (global input) INTEGER
5770* On entry, JA specifies A's global column index, which points
5771* to the beginning of the submatrix sub( A ).
5772*
5773* DESCA (global and local input) INTEGER array
5774* On entry, DESCA is an integer array of dimension DLEN_. This
5775* is the array descriptor for the matrix A.
5776*
5777* BETA (global input) REAL
5778* On entry, BETA specifies the scalar beta.
5779*
5780* C (local input/local output) REAL array
5781* On entry, C is an array of dimension (DESCC( M_ ),*). This
5782* array contains a local copy of the initial entire matrix PC.
5783*
5784* PC (local input) REAL array
5785* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5786* array contains the local pieces of the matrix PC.
5787*
5788* IC (global input) INTEGER
5789* On entry, IC specifies C's global row index, which points to
5790* the beginning of the submatrix sub( C ).
5791*
5792* JC (global input) INTEGER
5793* On entry, JC specifies C's global column index, which points
5794* to the beginning of the submatrix sub( C ).
5795*
5796* DESCC (global and local input) INTEGER array
5797* On entry, DESCC is an integer array of dimension DLEN_. This
5798* is the array descriptor for the matrix C.
5799*
5800* CT (workspace) REAL array
5801* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5802* holds a copy of the current column of C.
5803*
5804* G (workspace) REAL array
5805* On entry, G is an array of dimension at least MAX(M,N,K). G
5806* is used to compute the gauges.
5807*
5808* ERR (global output) REAL
5809* On exit, ERR specifies the largest error in absolute value.
5810*
5811* INFO (global output) INTEGER
5812* On exit, if INFO <> 0, the result is less than half accurate.
5813*
5814* -- Written on April 1, 1998 by
5815* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5816*
5817* =====================================================================
5818*
5819* .. Parameters ..
5820 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5821 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5822 $ RSRC_
5823 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
5824 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5825 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5826 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5827 REAL ZERO, ONE
5828 parameter( zero = 0.0e+0, one = 1.0e+0 )
5829* ..
5830* .. Local Scalars ..
5831 LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
5832 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5833 $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5834 $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5835 REAL EPS, ERRI
5836* ..
5837* .. External Subroutines ..
5838 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5839* ..
5840* .. External Functions ..
5841 LOGICAL LSAME
5842 REAL PSLAMCH
5843 EXTERNAL lsame, pslamch
5844* ..
5845* .. Intrinsic Functions ..
5846 INTRINSIC abs, max, min, mod, sqrt
5847* ..
5848* .. Executable Statements ..
5849*
5850 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5851*
5852 eps = pslamch( ictxt, 'eps' )
5853*
5854 upper = lsame( uplo, 'U' )
5855 notran = lsame( trans, 'N' )
5856 tran = lsame( trans, 'T' )
5857*
5858 lda = max( 1, desca( m_ ) )
5859 ldc = max( 1, descc( m_ ) )
5860*
5861* Compute expected result in C using data in A, B and C.
5862* Compute gauges in G. This part of the computation is performed
5863* by every process in the grid.
5864*
5865 DO 140 j = 1, n
5866*
5867 IF( upper ) THEN
5868 ibeg = 1
5869 iend = j
5870 ELSE
5871 ibeg = j
5872 iend = n
5873 END IF
5874*
5875 DO 10 i = 1, n
5876 ct( i ) = zero
5877 g( i ) = zero
5878 10 CONTINUE
5879*
5880 IF( notran ) THEN
5881 DO 30 kk = 1, k
5882 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
5883 DO 20 i = ibeg, iend
5884 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
5885 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5886 g( i ) = g( i ) + abs( a( ioffak ) ) *
5887 $ abs( a( ioffan ) )
5888 20 CONTINUE
5889 30 CONTINUE
5890 ELSE IF( tran ) THEN
5891 DO 50 kk = 1, k
5892 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
5893 DO 40 i = ibeg, iend
5894 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
5895 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5896 g( i ) = g( i ) + abs( a( ioffak ) ) *
5897 $ abs( a( ioffan ) )
5898 40 CONTINUE
5899 50 CONTINUE
5900 END IF
5901*
5902 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
5903*
5904 DO 100 i = ibeg, iend
5905 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5906 g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5907 c( ioffc ) = ct( i )
5908 ioffc = ioffc + 1
5909 100 CONTINUE
5910*
5911* Compute the error ratio for this result.
5912*
5913 err = zero
5914 info = 0
5915 ldpc = descc( lld_ )
5916 ioffc = ic + ( jc + j - 2 ) * ldc
5917 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5918 $ iic, jjc, icrow, iccol )
5919 icurrow = icrow
5920 rowrep = ( icrow.EQ.-1 )
5921 colrep = ( iccol.EQ.-1 )
5922*
5923 IF( mycol.EQ.iccol .OR. colrep ) THEN
5924*
5925 ibb = descc( imb_ ) - ic + 1
5926 IF( ibb.LE.0 )
5927 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5928 ibb = min( ibb, n )
5929 in = ic + ibb - 1
5930*
5931 DO 110 i = ic, in
5932*
5933 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5934 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5935 $ c( ioffc ) ) / eps
5936 IF( g( i-ic+1 ).NE.zero )
5937 $ erri = erri / g( i-ic+1 )
5938 err = max( err, erri )
5939 IF( err*sqrt( eps ).GE.one )
5940 $ info = 1
5941 iic = iic + 1
5942 END IF
5943*
5944 ioffc = ioffc + 1
5945*
5946 110 CONTINUE
5947*
5948 icurrow = mod( icurrow+1, nprow )
5949*
5950 DO 130 i = in+1, ic+n-1, descc( mb_ )
5951 ibb = min( ic+n-i, descc( mb_ ) )
5952*
5953 DO 120 kk = 0, ibb-1
5954*
5955 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5956 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5957 $ c( ioffc ) )/eps
5958 IF( g( i+kk-ic+1 ).NE.zero )
5959 $ erri = erri / g( i+kk-ic+1 )
5960 err = max( err, erri )
5961 IF( err*sqrt( eps ).GE.one )
5962 $ info = 1
5963 iic = iic + 1
5964 END IF
5965*
5966 ioffc = ioffc + 1
5967*
5968 120 CONTINUE
5969*
5970 icurrow = mod( icurrow+1, nprow )
5971*
5972 130 CONTINUE
5973*
5974 END IF
5975*
5976* If INFO = 0, all results are at least half accurate.
5977*
5978 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5979 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5980 $ mycol )
5981 IF( info.NE.0 )
5982 $ GO TO 150
5983*
5984 140 CONTINUE
5985*
5986 150 CONTINUE
5987*
5988 RETURN
5989*
5990* End of PSMMCH1
5991*
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: