ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
psqrt16.f
Go to the documentation of this file.
00001       SUBROUTINE PSQRT16( TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX,
00002      $                    JX, DESCX, B, IB, JB, DESCB, RWORK, RESID )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          TRANS
00011       INTEGER            IA, IB, IX, JA, JB, JX, M, N, NRHS
00012       REAL               RESID
00013 *     ..
00014 *     .. Array Arguments ..
00015       INTEGER            DESCA( * ), DESCB( * ), DESCX( * )
00016       REAL               A( * ), B( * ), RWORK( * ), X( * )
00017 *     ..
00018 *
00019 *  Purpose
00020 *  =======
00021 *
00022 *  PSQRT16 computes the residual for a solution of a system of linear
00023 *  equations  sub( A )*sub( X ) = B  or  sub( A' )*sub( X ) = B:
00024 *     RESID = norm(B - sub( A )*sub( X ) ) /
00025 *             ( max(m,n) * norm(sub( A ) ) * norm(sub( X ) ) * EPS ),
00026 *  where EPS is the machine epsilon, sub( A ) denotes
00027 *  A(IA:IA+N-1,JA,JA+N-1), and sub( X ) denotes
00028 *  X(IX:IX+N-1, JX:JX+NRHS-1).
00029 *
00030 *  Notes
00031 *  =====
00032 *
00033 *  Each global data object is described by an associated description
00034 *  vector.  This vector stores the information required to establish
00035 *  the mapping between an object element and its corresponding process
00036 *  and memory location.
00037 *
00038 *  Let A be a generic term for any 2D block cyclicly distributed array.
00039 *  Such a global array has an associated description vector DESCA.
00040 *  In the following comments, the character _ should be read as
00041 *  "of the global array".
00042 *
00043 *  NOTATION        STORED IN      EXPLANATION
00044 *  --------------- -------------- --------------------------------------
00045 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00046 *                                 DTYPE_A = 1.
00047 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00048 *                                 the BLACS process grid A is distribu-
00049 *                                 ted over. The context itself is glo-
00050 *                                 bal, but the handle (the integer
00051 *                                 value) may vary.
00052 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00053 *                                 array A.
00054 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00055 *                                 array A.
00056 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00057 *                                 the rows of the array.
00058 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00059 *                                 the columns of the array.
00060 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00061 *                                 row of the array A is distributed.
00062 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00063 *                                 first column of the array A is
00064 *                                 distributed.
00065 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00066 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00067 *
00068 *  Let K be the number of rows or columns of a distributed matrix,
00069 *  and assume that its process grid has dimension p x q.
00070 *  LOCr( K ) denotes the number of elements of K that a process
00071 *  would receive if K were distributed over the p processes of its
00072 *  process column.
00073 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00074 *  process would receive if K were distributed over the q processes of
00075 *  its process row.
00076 *  The values of LOCr() and LOCc() may be determined via a call to the
00077 *  ScaLAPACK tool function, NUMROC:
00078 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00079 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00080 *  An upper bound for these quantities may be computed by:
00081 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00082 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00083 *
00084 *  Arguments
00085 *  =========
00086 *
00087 *  TRANS   (global input) CHARACTER*1
00088 *          Specifies the form of the system of equations:
00089 *          = 'N':  sub( A )*sub( X ) = sub( B )
00090 *          = 'T':  sub( A' )*sub( X )= sub( B ), where A' is the
00091 *                  transpose of sub( A ).
00092 *          = 'C':  sub( A' )*sub( X )= B, where A' is the transpose
00093 *                  of sub( A ).
00094 *
00095 *  M       (global input) INTEGER
00096 *          The number of rows to be operated on, i.e. the number of rows
00097 *          of the distributed submatrix sub( A ). M >= 0.
00098 *
00099 *  N       (global input) INTEGER
00100 *          The number of columns to be operated on, i.e. the number of
00101 *          columns of the distributed submatrix sub( A ). N >= 0.
00102 *
00103 *  NRHS    (global input) INTEGER
00104 *          The number of right hand sides, i.e., the number of columns
00105 *          of the distributed submatrix sub( B ). NRHS >= 0.
00106 *
00107 *  A       (local input) REAL pointer into the local
00108 *          memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
00109 *          The original M x N matrix A.
00110 *
00111 *  IA      (global input) INTEGER
00112 *          The row index in the global array A indicating the first
00113 *          row of sub( A ).
00114 *
00115 *  JA      (global input) INTEGER
00116 *          The column index in the global array A indicating the
00117 *          first column of sub( A ).
00118 *
00119 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00120 *          The array descriptor for the distributed matrix A.
00121 *
00122 *  X       (local input) REAL pointer into the local
00123 *          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). This
00124 *          array contains the local pieces of the computed solution
00125 *          distributed vectors for the system of linear equations.
00126 *
00127 *  IX      (global input) INTEGER
00128 *          The row index in the global array X indicating the first
00129 *          row of sub( X ).
00130 *
00131 *  JX      (global input) INTEGER
00132 *          The column index in the global array X indicating the
00133 *          first column of sub( X ).
00134 *
00135 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00136 *          The array descriptor for the distributed matrix X.
00137 *
00138 *  B       (local input/local output) REAL pointer into
00139 *          the local memory to an array of dimension
00140 *          (LLD_B,LOCc(JB+NRHS-1)).  On entry, this array contains the
00141 *          local pieces of the distributes right hand side vectors for
00142 *          the system of linear equations. On exit, sub( B ) is over-
00143 *          written with the difference sub( B ) - sub( A )*sub( X ) or
00144 *          sub( B ) - sub( A )'*sub( X ).
00145 *
00146 *  IB      (global input) INTEGER
00147 *          The row index in the global array B indicating the first
00148 *          row of sub( B ).
00149 *
00150 *  JB      (global input) INTEGER
00151 *          The column index in the global array B indicating the
00152 *          first column of sub( B ).
00153 *
00154 *  DESCB   (global and local input) INTEGER array of dimension DLEN_.
00155 *          The array descriptor for the distributed matrix B.
00156 *
00157 *  RWORK   (local workspace) REAL array, dimension (LRWORK)
00158 *          LWORK >= Nq0 if TRANS = 'N', and LRWORK >= Mp0 otherwise.
00159 *
00160 *          where
00161 *
00162 *          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
00163 *          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
00164 *          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
00165 *          Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
00166 *          Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
00167 *
00168 *          INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
00169 *          MYCOL, NPROW and NPCOL can be determined by calling the
00170 *          subroutine BLACS_GRIDINFO.
00171 *
00172 *  RESID   (global output) REAL
00173 *          The maximum over the number of right hand sides of
00174 *          norm( sub( B )- sub( A )*sub( X ) ) /
00175 *          ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ).
00176 *
00177 *  =====================================================================
00178 *
00179 *     .. Parameters ..
00180       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00181      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00182       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00183      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00184      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00185       REAL               ZERO, ONE
00186       PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
00187 *     ..
00188 *     .. Local Scalars ..
00189       INTEGER            ICTXT, IDUMM, J, MYCOL, MYROW, N1, N2, NPCOL,
00190      $                   NPROW
00191       REAL               ANORM, BNORM, EPS, XNORM
00192 *     ..
00193 *     .. Local Arrays ..
00194       REAL               TEMP( 2 )
00195 *     ..
00196 *     .. External Functions ..
00197       LOGICAL            LSAME
00198       REAL               PSLAMCH, PSLANGE
00199       EXTERNAL           LSAME, PSLAMCH, PSLANGE
00200 *     ..
00201 *     .. External Subroutines ..
00202       EXTERNAL           BLACS_GRIDINFO, PSASUM, PSGEMM, SGAMX2D
00203 *     ..
00204 *     .. Intrinsic Functions ..
00205       INTRINSIC          MAX
00206 *     ..
00207 *     .. Executable Statements ..
00208 *
00209 *     Get grid parameters
00210 *
00211       ICTXT = DESCA( CTXT_ )
00212       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00213 *
00214 *     Quick exit if M = 0 or N = 0 or NRHS = 0
00215 *
00216       IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.EQ.0 ) THEN
00217          RESID = ZERO
00218          RETURN
00219       END IF
00220 *
00221       IF( LSAME( TRANS, 'T' ) .OR. LSAME( TRANS, 'C' ) ) THEN
00222          ANORM = PSLANGE( 'I', M, N, A, IA, JA, DESCA, RWORK )
00223          N1 = N
00224          N2 = M
00225       ELSE
00226          ANORM = PSLANGE( '1', M, N, A, IA, JA, DESCA, RWORK )
00227          N1 = M
00228          N2 = N
00229       END IF
00230 *
00231       EPS = PSLAMCH( ICTXT, 'Epsilon' )
00232 *
00233 *     Compute  B - sub( A )*sub( X )  (or  B - sub( A' )*sub( X ) ) and
00234 *     store in B.
00235 *
00236       CALL PSGEMM( TRANS, 'No transpose', N1, NRHS, N2, -ONE, A, IA,
00237      $            JA, DESCA, X, IX, JX, DESCX, ONE, B, IB, JB, DESCB )
00238 *
00239 *     Compute the maximum over the number of right hand sides of
00240 *        norm( sub( B ) - sub( A )*sub( X ) ) /
00241 *        ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ).
00242 *
00243       RESID = ZERO
00244       DO 10 J = 1, NRHS
00245 *
00246          CALL PSASUM( N1, BNORM, B, IB, JB+J-1, DESCB, 1 )
00247          CALL PSASUM( N2, XNORM, X, IX, JX+J-1, DESCX, 1 )
00248 *
00249 *        Only the process columns owning the vector operands will have
00250 *        the correct result, the other will have zero.
00251 *
00252          TEMP( 1 ) = BNORM
00253          TEMP( 2 ) = XNORM
00254          IDUMM = 0
00255          CALL SGAMX2D( ICTXT, 'All', ' ', 2, 1, TEMP, 2, IDUMM, IDUMM,
00256      $                 -1, -1, IDUMM )
00257          BNORM = TEMP( 1 )
00258          XNORM = TEMP( 2 )
00259 *
00260 *        Every processes have ANORM, BNORM and XNORM now.
00261 *
00262          IF( ANORM.EQ.ZERO .AND. BNORM.EQ.ZERO ) THEN
00263             RESID = ZERO
00264          ELSE IF( ANORM.LE.ZERO .OR. XNORM.LE.ZERO ) THEN
00265             RESID = ONE / EPS
00266          ELSE
00267             RESID = MAX( RESID, ( ( BNORM / ANORM ) / XNORM ) /
00268      $              ( MAX( M, N )*EPS ) )
00269          END IF
00270 *
00271    10 CONTINUE
00272 *
00273       RETURN
00274 *
00275 *     End of PSQRT16
00276 *
00277       END