◆ psmmch2()

subroutine psmmch2	(	integer	ictxt,
		character*1	uplo,
		character*1	trans,
		integer	n,
		integer	k,
		real	alpha,
		real, dimension( * )	a,
		integer	ia,
		integer	ja,
		integer, dimension( * )	desca,
		real, dimension( * )	b,
		integer	ib,
		integer	jb,
		integer, dimension( * )	descb,
		real	beta,
		real, dimension( * )	c,
		real, dimension( * )	pc,
		integer	ic,
		integer	jc,
		integer, dimension( * )	descc,
		real, dimension( * )	ct,
		real, dimension( * )	g,
		real	err,
		integer	info
	)
Definition at line 5993 of file psblastst.f.
*
*  -- PBLAS test routine (version 2.0) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     April 1, 1998
*
*     .. Scalar Arguments ..
      CHARACTER*1        TRANS, UPLO
      INTEGER            IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
      REAL               ALPHA, BETA, ERR
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCB( * ), DESCC( * )
      REAL               A( * ), B( * ), C( * ), CT( * ), G( * ),
     $                   PC( * )
*     ..
*
*  Purpose
*  =======
*
*  PSMMCH2 checks the results of the computational tests.
*
*  Notes
*  =====
*
*  A description  vector  is associated with each 2D block-cyclicly dis-
*  tributed matrix.  This  vector  stores  the  information  required to
*  establish the  mapping  between a  matrix entry and its corresponding
*  process and memory location.
*
*  In  the  following  comments,   the character _  should  be  read  as
*  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESCA:
*
*  NOTATION         STORED IN       EXPLANATION
*  ---------------- --------------- ------------------------------------
*  DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
*  CTXT_A  (global) DESCA( CTXT_  ) The BLACS context handle, indicating
*                                   the NPROW x NPCOL BLACS process grid
*                                   A  is distributed over.  The context
*                                   itself  is  global,  but  the handle
*                                   (the integer value) may vary.
*  M_A     (global) DESCA( M_     ) The  number of rows in the distribu-
*                                   ted matrix A, M_A >= 0.
*  N_A     (global) DESCA( N_     ) The number of columns in the distri-
*                                   buted matrix A, N_A >= 0.
*  IMB_A   (global) DESCA( IMB_   ) The number of rows of the upper left
*                                   block of the matrix A, IMB_A > 0.
*  INB_A   (global) DESCA( INB_   ) The  number  of columns of the upper
*                                   left   block   of   the   matrix  A,
*                                   INB_A > 0.
*  MB_A    (global) DESCA( MB_    ) The blocking factor used to  distri-
*                                   bute the last  M_A-IMB_A rows of  A,
*                                   MB_A > 0.
*  NB_A    (global) DESCA( NB_    ) The blocking factor used to  distri-
*                                   bute the last  N_A-INB_A  columns of
*                                   A, NB_A > 0.
*  RSRC_A  (global) DESCA( RSRC_  ) The process row over which the first
*                                   row of the matrix  A is distributed,
*                                   NPROW > RSRC_A >= 0.
*  CSRC_A  (global) DESCA( CSRC_  ) The  process  column  over which the
*                                   first  column of  A  is distributed.
*                                   NPCOL > CSRC_A >= 0.
*  LLD_A   (local)  DESCA( LLD_   ) The  leading  dimension of the local
*                                   array  storing  the  local blocks of
*                                   the distributed matrix A,
*                                   IF( Lc( 1, N_A ) > 0 )
*                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
*                                   ELSE
*                                      LLD_A >= 1.
*
*  Let K be the number of  rows of a matrix A starting at the global in-
*  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
*  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
*  receive if these K rows were distributed over NPROW processes.  If  K
*  is the number of columns of a matrix  A  starting at the global index
*  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
*  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
*  these K columns were distributed over NPCOL processes.
*
*  The values of Lr() and Lc() may be determined via a call to the func-
*  tion PB_NUMROC:
*  Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
*  Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
*
*  Arguments
*  =========
*
*  ICTXT   (local input) INTEGER
*          On entry,  ICTXT  specifies the BLACS context handle, indica-
*          ting the global  context of the operation. The context itself
*          is global, but the value of ICTXT is local.
*
*  UPLO    (global input) CHARACTER*1
*          On entry,  UPLO  specifies which part of C should contain the
*          result.
*
*  TRANS   (global input) CHARACTER*1
*          On entry,  TRANS  specifies whether the matrices A and B have
*          to  be  transposed  or not before computing the matrix-matrix
*          product.
*
*  N       (global input) INTEGER
*          On entry, N  specifies  the order  the submatrix operand C. N
*          must be at least zero.
*
*  K       (global input) INTEGER
*          On entry, K specifies the number of columns (resp. rows) of A
*          and B when  TRANS = 'N' (resp. TRANS <> 'N').  K  must  be at
*          least zero.
*
*  ALPHA   (global input) REAL
*          On entry, ALPHA specifies the scalar alpha.
*
*  A       (local input) REAL array
*          On entry, A is an array of  dimension  (DESCA( M_ ),*).  This
*          array contains a local copy of the initial entire matrix PA.
*
*  IA      (global input) INTEGER
*          On entry, IA  specifies A's global row index, which points to
*          the beginning of the submatrix sub( A ).
*
*  JA      (global input) INTEGER
*          On entry, JA  specifies A's global column index, which points
*          to the beginning of the submatrix sub( A ).
*
*  DESCA   (global and local input) INTEGER array
*          On entry, DESCA  is an integer array of dimension DLEN_. This
*          is the array descriptor for the matrix A.
*
*  B       (local input) REAL array
*          On entry, B is an array of  dimension  (DESCB( M_ ),*).  This
*          array contains a local copy of the initial entire matrix PB.
*
*  IB      (global input) INTEGER
*          On entry, IB  specifies B's global row index, which points to
*          the beginning of the submatrix sub( B ).
*
*  JB      (global input) INTEGER
*          On entry, JB  specifies B's global column index, which points
*          to the beginning of the submatrix sub( B ).
*
*  DESCB   (global and local input) INTEGER array
*          On entry, DESCB  is an integer array of dimension DLEN_. This
*          is the array descriptor for the matrix B.
*
*  BETA    (global input) REAL
*          On entry, BETA specifies the scalar beta.
*
*  C       (local input/local output) REAL array
*          On entry, C is an array of  dimension  (DESCC( M_ ),*).  This
*          array contains a local copy of the initial entire matrix PC.
*
*  PC      (local input) REAL array
*          On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
*          array contains the local pieces of the matrix PC.
*
*  IC      (global input) INTEGER
*          On entry, IC  specifies C's global row index, which points to
*          the beginning of the submatrix sub( C ).
*
*  JC      (global input) INTEGER
*          On entry, JC  specifies C's global column index, which points
*          to the beginning of the submatrix sub( C ).
*
*  DESCC   (global and local input) INTEGER array
*          On entry, DESCC  is an integer array of dimension DLEN_. This
*          is the array descriptor for the matrix C.
*
*  CT      (workspace) REAL array
*          On entry, CT is an array of dimension at least MAX(M,N,K). CT
*          holds a copy of the current column of C.
*
*  G       (workspace) REAL array
*          On entry, G  is  an array of dimension at least MAX(M,N,K). G
*          is used to compute the gauges.
*
*  ERR     (global output) REAL
*          On exit, ERR specifies the largest error in absolute value.
*
*  INFO    (global output) INTEGER
*          On exit, if INFO <> 0, the result is less than half accurate.
*
*  -- Written on April 1, 1998 by
*     Antoine Petitet, University  of  Tennessee, Knoxville 37996, USA.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
     $                   DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
     $                   RSRC_
      parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
     $                   dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
     $                   imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
     $                   rsrc_ = 9, csrc_ = 10, lld_ = 11 )
      REAL               ZERO, ONE
      parameter( zero = 0.0e+0, one = 1.0e+0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            COLREP, NOTRAN, ROWREP, TRAN, UPPER
      INTEGER            I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
     $                   IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
     $                   JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
     $                   NPCOL, NPROW
      REAL               EPS, ERRI
*     ..
*     .. External Subroutines ..
      EXTERNAL           blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      REAL               PSLAMCH
      EXTERNAL           lsame, pslamch
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          abs, max, min, mod, sqrt
*     ..
*     .. Executable Statements ..
*
      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
*
      eps = pslamch( ictxt, 'eps' )
*
      upper = lsame( uplo, 'U' )
      notran = lsame( trans, 'N' )
      tran = lsame( trans, 'T' )
*
      lda = max( 1, desca( m_ ) )
      ldb = max( 1, descb( m_ ) )
      ldc = max( 1, descc( m_ ) )
*
*     Compute expected result in C using data in A, B and C.
*     Compute gauges in G. This part of the computation is performed
*     by every process in the grid.
*
      DO 140 j = 1, n
*
         IF( upper ) THEN
            ibeg = 1
            iend = j
         ELSE
            ibeg = j
            iend = n
         END IF
*
         DO 10 i = 1, n
            ct( i ) = zero
            g( i )  = zero
   10    CONTINUE
*
         IF( notran ) THEN
            DO 30 kk = 1, k
               ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
               ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
               DO 20 i = ibeg, iend
                  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
                  ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
                  ct( i ) = ct( i ) + alpha * (
     $                      a( ioffan ) * b( ioffbk ) +
     $                      b( ioffbn ) * a( ioffak ) )
                  g( i ) = g( i ) + abs( alpha ) * (
     $                     abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
     $                     abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
   20          CONTINUE
   30       CONTINUE
         ELSE IF( tran ) THEN
            DO 50 kk = 1, k
               ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
               ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
               DO 40 i = ibeg, iend
                  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
                  ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
                  ct( i ) = ct( i ) + alpha * (
     $                      a( ioffan ) * b( ioffbk ) +
     $                      b( ioffbn ) * a( ioffak ) )
                  g( i ) = g( i ) + abs( alpha ) * (
     $                     abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
     $                     abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
   40          CONTINUE
   50       CONTINUE
         END IF
*
         ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
*
         DO 100 i = ibeg, iend
            ct( i ) = ct( i ) + beta * c( ioffc )
            g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
            c( ioffc ) = ct( i )
            ioffc = ioffc + 1
  100    CONTINUE
*
*        Compute the error ratio for this result.
*
         err  = zero
         info = 0
         ldpc = descc( lld_ )
         ioffc = ic + ( jc + j - 2 ) * ldc
         CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
     $                    iic, jjc, icrow, iccol )
         icurrow = icrow
         rowrep  = ( icrow.EQ.-1 )
         colrep  = ( iccol.EQ.-1 )
*
         IF( mycol.EQ.iccol .OR. colrep ) THEN
*
            ibb = descc( imb_ ) - ic + 1
            IF( ibb.LE.0 )
     $         ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
            ibb = min( ibb, n )
            in = ic + ibb - 1
*
            DO 110 i = ic, in
*
               IF( myrow.EQ.icurrow .OR. rowrep ) THEN
                  erri = abs( pc( iic+(jjc-1)*ldpc ) -
     $                        c( ioffc ) ) / eps
                  IF( g( i-ic+1 ).NE.zero )
     $               erri = erri / g( i-ic+1 )
                  err = max( err, erri )
                  IF( err*sqrt( eps ).GE.one )
     $               info = 1
                  iic = iic + 1
               END IF
*
               ioffc = ioffc + 1
*
  110       CONTINUE
*
            icurrow = mod( icurrow+1, nprow )
*
            DO 130 i = in+1, ic+n-1, descc( mb_ )
               ibb = min( ic+n-i, descc( mb_ ) )
*
               DO 120 kk = 0, ibb-1
*
                  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
                     erri = abs( pc( iic+(jjc-1)*ldpc ) -
     $                           c( ioffc ) )/eps
                     IF( g( i+kk-ic+1 ).NE.zero )
     $                  erri = erri / g( i+kk-ic+1 )
                     err = max( err, erri )
                     IF( err*sqrt( eps ).GE.one )
     $                  info = 1
                     iic = iic + 1
                  END IF
*
                  ioffc = ioffc + 1
*
  120          CONTINUE
*
               icurrow = mod( icurrow+1, nprow )
*
  130       CONTINUE
*
         END IF
*
*        If INFO = 0, all results are at least half accurate.
*
         CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
         CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
     $                 mycol )
         IF( info.NE.0 )
     $      GO TO 150
*
  140 CONTINUE
*
  150 CONTINUE
*
      RETURN
*
*     End of PSMMCH2
*
Here is the call graph for this function:
Here is the caller graph for this function: