ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdqrt17.f
Go to the documentation of this file.
00001       DOUBLE PRECISION FUNCTION PDQRT17( TRANS, IRESID, M, N, NRHS, A,
00002      $                                   IA, JA, DESCA, X, IX, JX,
00003      $                                   DESCX, B, IB, JB, DESCB, WORK,
00004      $                                   RWORK )
00005 *
00006 *  -- ScaLAPACK routine (version 1.7) --
00007 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00008 *     and University of California, Berkeley.
00009 *     May 1, 1997
00010 *
00011 *     .. Scalar Arguments ..
00012       CHARACTER          TRANS
00013       INTEGER            IA, IB, IRESID, IX, JA, JB, JX, M, N, NRHS
00014 *     ..
00015 *     .. Array Arguments ..
00016       INTEGER            DESCA( * ), DESCB( * ), DESCX( * )
00017       DOUBLE PRECISION   A( * ), B( * ), WORK( * ), X( * )
00018       DOUBLE PRECISION   RWORK( * )
00019 *     ..
00020 *
00021 *  Purpose
00022 *  =======
00023 *
00024 *  PDQRT17 computes the ratio
00025 *
00026 *     || R'*op( sub( A ) ) ||/(||sub( A )||*alpha*max(M,N,NRHS)*eps)
00027 *
00028 *  where R = op( sub( A ) )*sub( X ) - B, op(A) is A or A', and
00029 *
00030 *     alpha = ||B|| if IRESID = 1 (zero-residual problem)
00031 *     alpha = ||R|| if IRESID = 2 (otherwise).
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Arguments
00088 *  =========
00089 *
00090 *  TRANS   (global input) CHARACTER*1
00091 *          Specifies whether or not the transpose of sub( A ) is used.
00092 *          = 'N':  No transpose, op( sub( A ) ) = sub( A ).
00093 *          = 'T':  Transpose, op( sub( A ) ) = sub( A' ).
00094 *
00095 *  IRESID  (global input) INTEGER
00096 *          IRESID = 1 indicates zero-residual problem.
00097 *          IRESID = 2 indicates non-zero residual.
00098 *
00099 *  M       (global input) INTEGER
00100 *          The number of rows to be operated on, i.e. the number of rows
00101 *          of the distributed submatrix sub( A ). M >= 0.
00102 *          If TRANS = 'N', the number of rows of the distributed
00103 *          submatrix sub( B ).
00104 *          If TRANS = 'T', the number of rows of the distributed
00105 *          submatrix sub( X ).
00106 *
00107 *  N       (global input) INTEGER
00108 *          The number of columns to be operated on, i.e. the number of
00109 *          columns of the distributed submatrix sub( A ). N >= 0.
00110 *          If TRANS = 'N', the number of rows of the distributed
00111 *          submatrix sub( X ). Otherwise N is the number of rows of
00112 *          the distributed submatrix sub( B ).
00113 *
00114 *  NRHS    (global input) INTEGER
00115 *          The number of right hand sides, i.e., the number of columns
00116 *          of the distributed submatrices sub( X ) and sub( B ).
00117 *          NRHS >= 0.
00118 *
00119 *  A       (local input) DOUBLE PRECISION pointer into the local memory
00120 *          to an array of dimension (LLD_A,LOCc(JA+N-1)). This array
00121 *          contains the local pieces of the distributed M-by-N
00122 *          submatrix sub( A ).
00123 *
00124 *  IA      (global input) INTEGER
00125 *          The row index in the global array A indicating the first
00126 *          row of sub( A ).
00127 *
00128 *  JA      (global input) INTEGER
00129 *          The column index in the global array A indicating the
00130 *          first column of sub( A ).
00131 *
00132 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00133 *          The array descriptor for the distributed matrix A.
00134 *
00135 *  X       (local input) DOUBLE PRECISION pointer into the local
00136 *          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
00137 *          If TRANS = 'N', this array contains the local pieces of the
00138 *          N-by-NRHS distributed submatrix sub( X ). Otherwise, this
00139 *          array contains the local pieces of the M-by-NRHS distributed
00140 *          submatrix sub( X ).
00141 *
00142 *  IX      (global input) INTEGER
00143 *          The row index in the global array X indicating the first
00144 *          row of sub( X ).
00145 *
00146 *  JX      (global input) INTEGER
00147 *          The column index in the global array X indicating the
00148 *          first column of sub( X ).
00149 *
00150 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00151 *          The array descriptor for the distributed matrix X.
00152 *
00153 *  B       (local input) DOUBLE PRECISION pointer into the local memory
00154 *          to an array of dimension (LLD_B,LOCc(JB+NRHS-1)).
00155 *          If TRANS='N', this array contains the local pieces of the
00156 *          distributed M-by-NRHS submatrix operand sub( B ). Otherwise,
00157 *          this array contains the local pieces of the distributed
00158 *          N-by-NRHS submatrix operand sub( B ).
00159 *
00160 *  IB      (global input) INTEGER
00161 *          The row index in the global array B indicating the first
00162 *          row of sub( B ).
00163 *
00164 *  JB      (global input) INTEGER
00165 *          The column index in the global array B indicating the
00166 *          first column of sub( B ).
00167 *
00168 *  DESCB   (global and local input) INTEGER array of dimension DLEN_.
00169 *          The array descriptor for the distributed matrix B.
00170 *
00171 *  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
00172 *          If TRANS = 'N', LWORK >= Mp0 * NRHSq0 + NRHSp0 * Nq0
00173 *          otherwise       LWORK >= Np0 * NRHSq0 + NRHSp0 * Mq0
00174 *
00175 *  RWORK   (local workspace) DOUBLE PRECISION array, dimension (LRWORK)
00176 *          LRWORK >= Nq0, if TRANS = 'N', and LRWORK >= Mp0 otherwise.
00177 *
00178 *          where
00179 *
00180 *          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
00181 *          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
00182 *          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
00183 *          Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
00184 *          Np0 = NUMROC( N+ICOFFA, NB_A, MYROW, IAROW, NPROW ),
00185 *          Mq0 = NUMROC( M+IROFFA, NB_A, MYCOL, IACOL, NPCOL ),
00186 *          Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
00187 *
00188 *          IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
00189 *          IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
00190 *          IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
00191 *          NRHSp0 = NUMROC( NRHS+ICOFFB, NB_B, MYROW, IBROW, NPROW ),
00192 *          NRHSq0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ).
00193 *
00194 *          INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
00195 *          MYCOL, NPROW and NPCOL can be determined by calling the
00196 *          subroutine BLACS_GRIDINFO.
00197 *
00198 *  =====================================================================
00199 *
00200 *     .. Parameters ..
00201       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00202      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00203       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00204      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00205      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00206       DOUBLE PRECISION   ZERO, ONE
00207       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
00208 *     ..
00209 *     .. Local Scalars ..
00210       INTEGER            IACOL, IBCOL, IBROW, ICOFFB, ICTXT, INFO,
00211      $                   IOFFA, IROFFB, ISCL, IW, IW2, JW, JW2, MYCOL,
00212      $                   NRHSQ, NRHSP, MYROW, NCOLS, NPCOL, NPROW,
00213      $                   NROWS, NROWSP
00214       DOUBLE PRECISION   ERR, NORMA, NORMB, NORMRS, NORMX, SMLNUM
00215 *     ..
00216 *     .. Local Arrays ..
00217       INTEGER            DESCW( DLEN_ ), DESCW2( DLEN_ )
00218 *     ..
00219 *     .. External Functions ..
00220       LOGICAL            LSAME
00221       INTEGER            INDXG2P, NUMROC
00222       DOUBLE PRECISION   PDLAMCH, PDLANGE
00223       EXTERNAL           INDXG2P, LSAME, NUMROC, PDLAMCH, PDLANGE
00224 *     ..
00225 *     .. External Subroutines ..
00226       EXTERNAL           BLACS_GRIDINFO, DESCSET, PDGEMM, PDLACPY,
00227      $                   PDLASCL, PXERBLA
00228 *     ..
00229 *     .. Intrinsic Functions ..
00230       INTRINSIC          DBLE, MAX
00231 *     ..
00232 *     .. Executable Statements ..
00233 *
00234       PDQRT17 = ZERO
00235 *
00236 *     Get grid parameters
00237 *
00238       ICTXT = DESCA( CTXT_ )
00239       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00240 *
00241       INFO = 0
00242       IF( LSAME( TRANS, 'N' ) ) THEN
00243          NROWS = M
00244          NCOLS = N
00245          IOFFA = MOD( JA-1, DESCA( NB_ ) )
00246       ELSE IF( LSAME( TRANS, 'T' ) ) THEN
00247          NROWS = N
00248          NCOLS = M
00249          IOFFA = MOD( IA-1, DESCA( MB_ ) )
00250       ELSE
00251          CALL PXERBLA( ICTXT, 'PDQRT17', -1 )
00252          RETURN
00253       END IF
00254 *
00255       IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.LE.0 )
00256      $   RETURN
00257 *
00258       IROFFB = MOD( IA-1, DESCA( MB_ ) )
00259       ICOFFB = MOD( JA-1, DESCA( NB_ ) )
00260       IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ),
00261      $                 NPROW )
00262       IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
00263      $                 NPCOL )
00264       IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
00265      $                 NPCOL )
00266 *
00267       NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL )
00268       NRHSP = NUMROC( NRHS+IROFFB, DESCB( NB_ ), MYROW, IBROW, NPROW )
00269       NROWSP = NUMROC( NROWS+IROFFB, DESCA( MB_ ), MYROW, IBROW, NPROW )
00270 *
00271 *     Assign array descriptor DESCW for workspace WORK, where DESCW
00272 *     holds a copy of the distributed submatrix sub( B )
00273 *
00274       CALL DESCSET( DESCW, NROWS+IROFFB, NRHS+ICOFFB, DESCB( MB_ ),
00275      $              DESCB( NB_ ), IBROW, IBCOL, ICTXT, MAX( 1,
00276      $              NROWSP ) )
00277 *
00278 *     Assign array descriptor DESCW2 for workspace WORK, where DESCW2
00279 *     holds a copy of the distributed submatrix sub( X**T )
00280 *
00281       CALL DESCSET( DESCW2, NRHS+ICOFFB, NCOLS+IOFFA, DESCX( NB_ ),
00282      $              DESCX( MB_ ), IBROW, IACOL, ICTXT, MAX( 1,
00283      $              NRHSP ) )
00284 *
00285       NORMA = PDLANGE( 'One-norm', M, N, A, IA, JA, DESCA, RWORK )
00286       SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' )
00287       SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'Precision' )
00288       ISCL = 0
00289 *
00290 *     compute residual and scale it
00291 *
00292       IW = 1 + IROFFB
00293       JW = 1 + ICOFFB
00294       CALL PDLACPY( 'All', NROWS, NRHS, B, IB, JB, DESCB, WORK, IW, JW,
00295      $              DESCW )
00296       CALL PDGEMM( TRANS, 'No transpose', NROWS, NRHS, NCOLS, -ONE, A,
00297      $             IA, JA, DESCA, X, IX, JX, DESCX, ONE, WORK, IW, JW,
00298      $             DESCW )
00299       NORMRS = PDLANGE( 'Max', NROWS, NRHS, WORK, IW, JW, DESCW,
00300      $                  RWORK )
00301       IF( NORMRS.GT.SMLNUM ) THEN
00302          ISCL = 1
00303          CALL PDLASCL( 'General', NORMRS, ONE, NROWS, NRHS, WORK,
00304      $                 IW, JW, DESCW, INFO )
00305       END IF
00306 *
00307 *     compute R'*sub( A )
00308 *
00309       IW2 = 1 + ICOFFB
00310       JW2 = 1 + IOFFA
00311       CALL PDGEMM( 'Transpose', TRANS, NRHS, NCOLS, NROWS, ONE, WORK,
00312      $             IW, JW, DESCW, A, IA, JA, DESCA, ZERO,
00313      $             WORK( NROWSP*NRHSQ+1 ), IW2, JW2, DESCW2 )
00314 *
00315 *     compute and properly scale error
00316 *
00317       ERR = PDLANGE( 'One-norm', NRHS, NCOLS, WORK( NROWSP*NRHSQ+1 ),
00318      $               IW2, JW2, DESCW2, RWORK )
00319       IF( NORMA.NE.ZERO )
00320      $   ERR = ERR / NORMA
00321 *
00322       IF( ISCL.EQ.1 )
00323      $   ERR = ERR*NORMRS
00324 *
00325       IF( IRESID.EQ.1 ) THEN
00326          NORMB = PDLANGE( 'One-norm', NROWS, NRHS, B, IB, JB, DESCB,
00327      $                    RWORK )
00328          IF( NORMB.NE.ZERO )
00329      $      ERR = ERR / NORMB
00330       ELSE
00331          NORMX = PDLANGE( 'One-norm', NCOLS, NRHS, X, IX, JX, DESCX,
00332      $                    RWORK )
00333          IF( NORMX.NE.ZERO )
00334      $      ERR = ERR / NORMX
00335       END IF
00336 *
00337       PDQRT17 = ERR / ( PDLAMCH( ICTXT, 'Epsilon' ) *
00338      $                  DBLE( MAX( M, N, NRHS ) ) )
00339 *
00340       RETURN
00341 *
00342 *     End of PDQRT17
00343 *
00344       END