ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pcporfs.f
Go to the documentation of this file.
00001       SUBROUTINE PCPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF,
00002      $                    DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX,
00003      $                    FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO )
00004 *
00005 *  -- ScaLAPACK routine (version 1.7) --
00006 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00007 *     and University of California, Berkeley.
00008 *     November 15, 1997
00009 *
00010 *     .. Scalar Arguments ..
00011       CHARACTER          UPLO
00012       INTEGER            IA, IAF, IB, INFO, IX, JA, JAF, JB, JX,
00013      $                   LRWORK, LWORK, N, NRHS
00014 *     ..
00015 *     .. Array Arguments ..
00016       INTEGER            DESCA( * ), DESCAF( * ), DESCB( * ),
00017      $                   DESCX( * )
00018       COMPLEX            A( * ), AF( * ), B( * ), WORK( * ), X( * )
00019       REAL               BERR( * ), FERR( * ), RWORK( * )
00020 *     ..
00021 *
00022 *  Purpose
00023 *  =======
00024 *
00025 *  PCPORFS improves the computed solution to a system of linear
00026 *  equations when the coefficient matrix is Hermitian positive definite
00027 *  and provides error bounds and backward error estimates for the
00028 *  solutions.
00029 *
00030 *  Notes
00031 *  =====
00032 *
00033 *  Each global data object is described by an associated description
00034 *  vector.  This vector stores the information required to establish
00035 *  the mapping between an object element and its corresponding process
00036 *  and memory location.
00037 *
00038 *  Let A be a generic term for any 2D block cyclicly distributed array.
00039 *  Such a global array has an associated description vector DESCA.
00040 *  In the following comments, the character _ should be read as
00041 *  "of the global array".
00042 *
00043 *  NOTATION        STORED IN      EXPLANATION
00044 *  --------------- -------------- --------------------------------------
00045 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00046 *                                 DTYPE_A = 1.
00047 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00048 *                                 the BLACS process grid A is distribu-
00049 *                                 ted over. The context itself is glo-
00050 *                                 bal, but the handle (the integer
00051 *                                 value) may vary.
00052 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00053 *                                 array A.
00054 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00055 *                                 array A.
00056 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00057 *                                 the rows of the array.
00058 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00059 *                                 the columns of the array.
00060 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00061 *                                 row of the array A is distributed.
00062 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00063 *                                 first column of the array A is
00064 *                                 distributed.
00065 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00066 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00067 *
00068 *  Let K be the number of rows or columns of a distributed matrix,
00069 *  and assume that its process grid has dimension p x q.
00070 *  LOCr( K ) denotes the number of elements of K that a process
00071 *  would receive if K were distributed over the p processes of its
00072 *  process column.
00073 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00074 *  process would receive if K were distributed over the q processes of
00075 *  its process row.
00076 *  The values of LOCr() and LOCc() may be determined via a call to the
00077 *  ScaLAPACK tool function, NUMROC:
00078 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00079 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00080 *  An upper bound for these quantities may be computed by:
00081 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00082 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00083 *
00084 *  In the following comments, sub( A ), sub( X ) and sub( B ) denote
00085 *  respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
00086 *  B(IB:IB+N-1,JB:JB+NRHS-1).
00087 *
00088 *  Arguments
00089 *  =========
00090 *
00091 *  UPLO    (global input) CHARACTER*1
00092 *          Specifies whether the upper or lower triangular part of the
00093 *          Hermitian matrix sub( A ) is stored.
00094 *          = 'U':  Upper triangular
00095 *          = 'L':  Lower triangular
00096 *
00097 *  N       (global input) INTEGER
00098 *          The order of the matrix sub( A ).  N >= 0.
00099 *
00100 *  NRHS    (global input) INTEGER
00101 *          The number of right hand sides, i.e., the number of columns
00102 *          of the matrices sub( B ) and sub( X ).  NRHS >= 0.
00103 *
00104 *  A       (local input) COMPLEX pointer into the local
00105 *          memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ).
00106 *          This array contains the local pieces of the N-by-N Hermitian
00107 *          distributed matrix sub( A ) to be factored.
00108 *          If UPLO = 'U', the leading N-by-N upper triangular part of
00109 *          sub( A ) contains the upper triangular part of the matrix,
00110 *          and its strictly lower triangular part is not referenced.
00111 *          If UPLO = 'L', the leading N-by-N lower triangular part of
00112 *          sub( A ) contains the lower triangular part of the distribu-
00113 *          ted matrix, and its strictly upper triangular part is not
00114 *          referenced.
00115 *
00116 *  IA      (global input) INTEGER
00117 *          The row index in the global array A indicating the first
00118 *          row of sub( A ).
00119 *
00120 *  JA      (global input) INTEGER
00121 *          The column index in the global array A indicating the
00122 *          first column of sub( A ).
00123 *
00124 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00125 *          The array descriptor for the distributed matrix A.
00126 *
00127 *  AF      (local input) COMPLEX pointer into the local memory
00128 *          to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
00129 *          On entry, this array contains the factors L or U from the
00130 *          Cholesky factorization sub( A ) = L*L**H or U**H*U, as
00131 *          computed by PCPOTRF.
00132 *
00133 *  IAF     (global input) INTEGER
00134 *          The row index in the global array AF indicating the first
00135 *          row of sub( AF ).
00136 *
00137 *  JAF     (global input) INTEGER
00138 *          The column index in the global array AF indicating the
00139 *          first column of sub( AF ).
00140 *
00141 *  DESCAF  (global and local input) INTEGER array of dimension DLEN_.
00142 *          The array descriptor for the distributed matrix AF.
00143 *
00144 *  B       (local input) COMPLEX pointer into the local memory
00145 *          to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ).
00146 *          On entry, this array contains the the local pieces of the
00147 *          right hand sides sub( B ).
00148 *
00149 *  IB      (global input) INTEGER
00150 *          The row index in the global array B indicating the first
00151 *          row of sub( B ).
00152 *
00153 *  JB      (global input) INTEGER
00154 *          The column index in the global array B indicating the
00155 *          first column of sub( B ).
00156 *
00157 *  DESCB   (global and local input) INTEGER array of dimension DLEN_.
00158 *          The array descriptor for the distributed matrix B.
00159 *
00160 *  X       (local input) COMPLEX pointer into the local memory
00161 *          to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ).
00162 *          On entry, this array contains the the local pieces of the
00163 *          solution vectors sub( X ). On exit, it contains the
00164 *          improved solution vectors.
00165 *
00166 *  IX      (global input) INTEGER
00167 *          The row index in the global array X indicating the first
00168 *          row of sub( X ).
00169 *
00170 *  JX      (global input) INTEGER
00171 *          The column index in the global array X indicating the
00172 *          first column of sub( X ).
00173 *
00174 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00175 *          The array descriptor for the distributed matrix X.
00176 *
00177 *  FERR    (local output) REAL array of local dimension
00178 *          LOCc(JB+NRHS-1).
00179 *          The estimated forward error bound for each solution vector
00180 *          of sub( X ).  If XTRUE is the true solution corresponding
00181 *          to sub( X ), FERR is an estimated upper bound for the
00182 *          magnitude of the largest element in (sub( X ) - XTRUE)
00183 *          divided by the magnitude of the largest element in sub( X ).
00184 *          The estimate is as reliable as the estimate for RCOND, and
00185 *          is almost always a slight overestimate of the true error.
00186 *          This array is tied to the distributed matrix X.
00187 *
00188 *  BERR    (local output) REAL array of local dimension
00189 *          LOCc(JB+NRHS-1). The componentwise relative backward
00190 *          error of each solution vector (i.e., the smallest re-
00191 *          lative change in any entry of sub( A ) or sub( B )
00192 *          that makes sub( X ) an exact solution).
00193 *          This array is tied to the distributed matrix X.
00194 *
00195 *  WORK    (local workspace/local output) COMPLEX array,
00196 *                                                   dimension (LWORK)
00197 *          On exit, WORK(1) returns the minimal and optimal LWORK.
00198 *
00199 *  LWORK   (local or global input) INTEGER
00200 *          The dimension of the array WORK.
00201 *          LWORK is local input and must be at least
00202 *          LWORK >= 2*LOCr( N + MOD( IA-1, MB_A ) )
00203 *
00204 *          If LWORK = -1, then LWORK is global input and a workspace
00205 *          query is assumed; the routine only calculates the minimum
00206 *          and optimal size for all work arrays. Each of these
00207 *          values is returned in the first entry of the corresponding
00208 *          work array, and no error message is issued by PXERBLA.
00209 *
00210 *  RWORK   (local workspace/local output) REAL array,
00211 *                                                    dimension (LRWORK)
00212 *          On exit, RWORK(1) returns the minimal and optimal LRWORK.
00213 *
00214 *  LRWORK  (local or global input) INTEGER
00215 *          The dimension of the array RWORK.
00216 *          LRWORK is local input and must be at least
00217 *          LRWORK >= LOCr( N + MOD( IB-1, MB_B ) ).
00218 *
00219 *          If LRWORK = -1, then LRWORK is global input and a workspace
00220 *          query is assumed; the routine only calculates the minimum
00221 *          and optimal size for all work arrays. Each of these
00222 *          values is returned in the first entry of the corresponding
00223 *          work array, and no error message is issued by PXERBLA.
00224 *
00225 *
00226 *  INFO    (global output) INTEGER
00227 *          = 0:  successful exit
00228 *          < 0:  If the i-th argument is an array and the j-entry had
00229 *                an illegal value, then INFO = -(i*100+j), if the i-th
00230 *                argument is a scalar and had an illegal value, then
00231 *                INFO = -i.
00232 *
00233 *  Internal Parameters
00234 *  ===================
00235 *
00236 *  ITMAX is the maximum number of steps of iterative refinement.
00237 *
00238 *  Notes
00239 *  =====
00240 *
00241 *  This routine temporarily returns when N <= 1.
00242 *
00243 *  The distributed submatrices op( A ) and op( AF ) (respectively
00244 *  sub( X ) and sub( B ) ) should be distributed the same way on the
00245 *  same processes. These conditions ensure that sub( A ) and sub( AF )
00246 *  (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
00247 *
00248 *  Moreover, this routine requires the distributed submatrices sub( A ),
00249 *  sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
00250 *  i.e., if f(x,y) = MOD( x-1, y ):
00251 *  f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
00252 *  f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
00253 *  f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
00254 *  f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
00255 *
00256 *  =====================================================================
00257 *
00258 *     .. Parameters ..
00259       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00260      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00261       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00262      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00263      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00264       INTEGER            ITMAX
00265       PARAMETER          ( ITMAX = 5 )
00266       REAL               ZERO, RONE, TWO, THREE
00267       PARAMETER          ( ZERO = 0.0E+0, RONE = 1.0E+0, TWO = 2.0E+0,
00268      $                     THREE = 3.0E+0 )
00269       COMPLEX            ONE
00270       PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
00271 *     ..
00272 *     .. Local Scalars ..
00273       LOGICAL            LQUERY, UPPER
00274       INTEGER            COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
00275      $                   IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB,
00276      $                   ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW,
00277      $                   IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB,
00278      $                   IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW,
00279      $                   K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS,
00280      $                   MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ
00281       REAL               EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
00282       COMPLEX            ZDUM
00283 *     ..
00284 *     .. Local Arrays ..
00285       INTEGER            DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
00286 *     ..
00287 *     .. External Functions ..
00288       LOGICAL            LSAME
00289       INTEGER            ICEIL, INDXG2P, NUMROC
00290       REAL               PSLAMCH
00291       EXTERNAL           ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH
00292 *     ..
00293 *     .. External Subroutines ..
00294       EXTERNAL           BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CHK1MAT,
00295      $                   DESCSET, INFOG2L, PCAHEMV, PCAXPY, PCHK2MAT,
00296      $                   PCCOPY, PCHEMV, PCPOTRS, PCLACON,
00297      $                   PXERBLA, SGAMX2D
00298 *     ..
00299 *     .. Intrinsic Functions ..
00300       INTRINSIC          ABS, AIMAG, CMPLX, ICHAR, MAX, MIN, MOD, REAL
00301 *     ..
00302 *     .. Statement Functions ..
00303       REAL               CABS1
00304 *     ..
00305 *     .. Statement Function definitions ..
00306       CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
00307 *     ..
00308 *     .. Executable Statements ..
00309 *
00310 *     Get grid parameters
00311 *
00312       ICTXT = DESCA( CTXT_ )
00313       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00314 *
00315 *     Test the input parameters.
00316 *
00317       INFO = 0
00318       IF( NPROW.EQ.-1 ) THEN
00319          INFO = -(700+CTXT_)
00320       ELSE
00321          CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO )
00322          CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO )
00323          CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, INFO )
00324          CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 19, INFO )
00325          IF( INFO.EQ.0 ) THEN
00326             UPPER = LSAME( UPLO, 'U' )
00327             IROFFA = MOD( IA-1, DESCA( MB_ ) )
00328             ICOFFA = MOD( JA-1, DESCA( NB_ ) )
00329             IROFFAF = MOD( IAF-1, DESCAF( MB_ ) )
00330             ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) )
00331             IROFFB = MOD( IB-1, DESCB( MB_ ) )
00332             ICOFFB = MOD( JB-1, DESCB( NB_ ) )
00333             IROFFX = MOD( IX-1, DESCX( MB_ ) )
00334             ICOFFX = MOD( JX-1, DESCX( NB_ ) )
00335             IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
00336      $                       NPROW )
00337             IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL,
00338      $                        DESCAF( CSRC_ ), NPCOL )
00339             IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW,
00340      $                        DESCAF( RSRC_ ), NPROW )
00341             IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
00342      $                       NPCOL )
00343             CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL,
00344      $                    IIXB, JJXB, IXBROW, IXBCOL )
00345             IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ),
00346      $                       NPROW )
00347             IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ),
00348      $                       NPCOL )
00349             NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW,
00350      $                      NPROW )
00351             LWMIN = 2 * NPMOD
00352             LRWMIN = NPMOD
00353             WORK( 1 ) = CMPLX( REAL( LWMIN ) )
00354             RWORK( 1 ) = REAL( LRWMIN )
00355             LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 )
00356 *
00357             IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
00358                INFO = -1
00359             ELSE IF( N.LT.0 ) THEN
00360                INFO = -2
00361             ELSE IF( NRHS.LT.0 ) THEN
00362                INFO = -3
00363             ELSE IF( IROFFA.NE.0 ) THEN
00364                INFO = -5
00365             ELSE IF( ICOFFA.NE.0 ) THEN
00366                INFO = -6
00367             ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
00368                INFO = -( 700 + NB_ )
00369             ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN
00370                INFO = -( 1100 + MB_ )
00371             ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN
00372                INFO = -9
00373             ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN
00374                INFO = -( 1100 + NB_ )
00375             ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN
00376                INFO = -10
00377             ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN
00378                INFO = -( 1100 + CTXT_ )
00379             ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN
00380                INFO = -13
00381             ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN
00382                INFO = -( 1500 + MB_ )
00383             ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN
00384                INFO = -( 1500 + CTXT_ )
00385             ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN
00386                INFO = -( 1900 + MB_ )
00387             ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN
00388                INFO = -17
00389             ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN
00390                INFO = -( 1900 + NB_ )
00391             ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN
00392                INFO = -18
00393             ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN
00394                INFO = -( 1900 + CTXT_ )
00395             ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
00396                INFO = -23
00397             ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
00398                INFO = -25
00399             END IF
00400          END IF
00401 *
00402          IF( UPPER ) THEN
00403             IDUM1( 1 ) = ICHAR( 'U' )
00404          ELSE
00405             IDUM1( 1 ) = ICHAR( 'L' )
00406          END IF
00407          IDUM2( 1 ) = 1
00408          IDUM1( 2 ) = N
00409          IDUM2( 2 ) = 2
00410          IDUM1( 3 ) = NRHS
00411          IDUM2( 3 ) = 3
00412          IF( LWORK.EQ.-1 ) THEN
00413             IDUM1( 4 ) = -1
00414          ELSE
00415             IDUM1( 4 ) = 1
00416          END IF
00417          IDUM2( 4 ) = 23
00418          IF( LRWORK.EQ.-1 ) THEN
00419             IDUM1( 5 ) = -1
00420          ELSE
00421             IDUM1( 5 ) = 1
00422          END IF
00423          IDUM2( 5 ) = 25
00424          CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF,
00425      $                  JAF, DESCAF, 11, 0, IDUM1, IDUM2, INFO )
00426          CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, N, 2, NRHS, 3,
00427      $                  IX, JX, DESCX, 19, 5, IDUM1, IDUM2, INFO )
00428       END IF
00429       IF( INFO.NE.0 ) THEN
00430          CALL PXERBLA( ICTXT, 'PCPORFS', -INFO )
00431          RETURN
00432       ELSE IF( LQUERY ) THEN
00433          RETURN
00434       END IF
00435 *
00436       JJFBE = JJXB
00437       MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
00438      $                NPCOL )
00439 *
00440 *     Quick return if possible
00441 *
00442       IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN
00443          DO 10 JJ = JJFBE, MYRHS
00444             FERR( JJ ) = ZERO
00445             BERR( JJ ) = ZERO
00446    10    CONTINUE
00447          RETURN
00448       END IF
00449 *
00450       NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW )
00451       CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL,
00452      $              ICTXT, MAX( 1, NP0 ) )
00453       IPB = 1
00454       IPR = 1
00455       IPV = IPR + NP0
00456       IF( MYROW.EQ.IXBROW ) THEN
00457          IIW = 1 + IROFFB
00458          NP = NP0 - IROFFB
00459       ELSE
00460          IIW = 1
00461          NP = NP0
00462       END IF
00463       IW = 1 + IROFFB
00464       JW = 1
00465       LDXB = DESCB( LLD_ )
00466       IOFFXB = ( JJXB-1 )*LDXB
00467 *
00468 *     NZ = 1 + maximum number of nonzero entries in each row of sub( A )
00469 *
00470       NZ = N + 1
00471       EPS = PSLAMCH( ICTXT, 'Epsilon' )
00472       SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' )
00473       SAFE1 = NZ*SAFMIN
00474       SAFE2 = SAFE1 / EPS
00475       JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 )
00476 *
00477 *     Handle first block separately
00478 *
00479       JBRHS = JN - JB + 1
00480       DO 100 K = 0, JBRHS-1
00481 *
00482          COUNT = 1
00483          LSTRES = THREE
00484    20    CONTINUE
00485 *
00486 *        Loop until stopping criterion is satisfied.
00487 *
00488 *        Compute residual R = sub(B) - op(sub(A)) * sub(X)
00489 *
00490          CALL PCCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW,
00491      $                DESCW, 1 )
00492          CALL PCHEMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, JX+K,
00493      $                DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 )
00494 *
00495 *        Compute componentwise relative backward error from formula
00496 *
00497 *        max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) )
00498 *
00499 *        where abs(Z) is the componentwise absolute value of the
00500 *        matrix or vector Z.  If the i-th component of the
00501 *        denominator is less than SAFE2, then SAFE1 is added to
00502 *        the i-th components of the numerator and denominator
00503 *        before dividing.
00504 *
00505          IF( MYCOL.EQ.IXBCOL ) THEN
00506             IF( NP.GT.0 ) THEN
00507                DO 30 II = IIXB, IIXB + NP - 1
00508                   RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) )
00509    30          CONTINUE
00510             END IF
00511          END IF
00512 *
00513          CALL PCAHEMV( UPLO, N, RONE, A, IA, JA, DESCA, X, IX, JX+K,
00514      $                 DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, 1 )
00515 *
00516          S = ZERO
00517          IF( MYCOL.EQ.IXBCOL ) THEN
00518             IF( NP.GT.0 ) THEN
00519                DO 40 II = IIW-1, IIW+NP-2
00520                   IF( RWORK( IPB+II ).GT.SAFE2 ) THEN
00521                      S = MAX( S, CABS1( WORK( IPR+II ) ) /
00522      $                           RWORK( IPB+II ) )
00523                   ELSE
00524                      S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) /
00525      $                           ( RWORK( IPB+II )+SAFE1 ) )
00526                   END IF
00527    40          CONTINUE
00528             END IF
00529          END IF
00530 *
00531          CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1,
00532      $                      -1, MYCOL )
00533          IF( MYCOL.EQ.IXBCOL )
00534      $      BERR( JJFBE ) = S
00535 *
00536 *        Test stopping criterion. Continue iterating if
00537 *         1) The residual BERR(J) is larger than machine epsilon, and
00538 *         2) BERR(J) decreased by at least a factor of 2 during the
00539 *            last iteration, and
00540 *         3) At most ITMAX iterations tried.
00541 *
00542          IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN
00543 *
00544 *           Update solution and try again.
00545 *
00546             CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF,
00547      $                    WORK( IPR ), IW, JW, DESCW, INFO )
00548             CALL PCAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX,
00549      $                   JX+K, DESCX, 1 )
00550             LSTRES = S
00551             COUNT = COUNT + 1
00552             GO TO 20
00553          END IF
00554 *
00555 *        Bound error from formula
00556 *
00557 *        norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
00558 *        norm( abs(inv(sub(A)))*
00559 *            ( abs(R) +
00560 *        NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X))
00561 *
00562 *        where
00563 *          norm(Z) is the magnitude of the largest component of Z
00564 *          inv(sub(A)) is the inverse of sub(A)
00565 *          abs(Z) is the componentwise absolute value of the matrix
00566 *          or vector Z
00567 *          NZ is the maximum number of nonzeros in any row of sub(A),
00568 *          plus 1
00569 *          EPS is machine epsilon
00570 *
00571 *        The i-th component of
00572 *               abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B)))
00573 *        is incremented by SAFE1 if the i-th component of
00574 *        abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2.
00575 *
00576 *        Use PCLACON to estimate the infinity-norm of the matrix
00577 *        inv(sub(A)) * diag(W), where
00578 *        W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)))))
00579 *
00580          IF( MYCOL.EQ.IXBCOL ) THEN
00581             IF( NP.GT.0 ) THEN
00582                DO 50 II = IIW-1, IIW+NP-2
00583                   IF( RWORK( IPB+II ).GT.SAFE2 ) THEN
00584                      RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) +
00585      $                                 NZ*EPS*RWORK( IPB+II )
00586                   ELSE
00587                      RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) +
00588      $                                 NZ*EPS*RWORK( IPB+II ) + SAFE1
00589                   END IF
00590    50          CONTINUE
00591             END IF
00592          END IF
00593 *
00594          KASE = 0
00595    60    CONTINUE
00596          IF( MYCOL.EQ.IXBCOL ) THEN
00597             CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ),
00598      $                    DESCW( LLD_ ) )
00599          ELSE
00600             CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ),
00601      $                    DESCW( LLD_ ), MYROW, IXBCOL )
00602          END IF
00603          DESCW( CSRC_ ) = MYCOL
00604          CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ),
00605      $                 IW, JW, DESCW, EST, KASE )
00606          DESCW( CSRC_ ) = IXBCOL
00607 *
00608          IF( KASE.NE.0 ) THEN
00609             IF( KASE.EQ.1 ) THEN
00610 *
00611 *              Multiply by diag(W)*inv(sub(A)').
00612 *
00613                CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF,
00614      $                       WORK( IPR ), IW, JW, DESCW, INFO )
00615 *
00616                IF( MYCOL.EQ.IXBCOL ) THEN
00617                   IF( NP.GT.0 ) THEN
00618                      DO 70 II = IIW-1, IIW+NP-2
00619                         WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II )
00620    70                CONTINUE
00621                   END IF
00622                END IF
00623             ELSE
00624 *
00625 *              Multiply by inv(sub(A))*diag(W).
00626 *
00627                IF( MYCOL.EQ.IXBCOL ) THEN
00628                   IF( NP.GT.0 ) THEN
00629                      DO 80 II = IIW-1, IIW+NP-2
00630                         WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II )
00631    80                CONTINUE
00632                   END IF
00633                END IF
00634 *
00635                CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF,
00636      $                       WORK( IPR ), IW, JW, DESCW, INFO )
00637             END IF
00638             GO TO 60
00639          END IF
00640 *
00641 *           Normalize error.
00642 *
00643          LSTRES = ZERO
00644          IF( MYCOL.EQ.IXBCOL ) THEN
00645             IF( NP.GT.0 ) THEN
00646                DO 90 II = IIXB, IIXB+NP-1
00647                   LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) )
00648    90          CONTINUE
00649             END IF
00650             CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM,
00651      $                    IDUM, 1, -1, MYCOL )
00652             IF( LSTRES.NE.ZERO )
00653      $         FERR( JJFBE ) = EST / LSTRES
00654 *
00655             JJXB = JJXB + 1
00656             JJFBE = JJFBE + 1
00657             IOFFXB = IOFFXB + LDXB
00658 *
00659          END IF
00660 *
00661   100 CONTINUE
00662 *
00663       ICURCOL = MOD( IXBCOL+1, NPCOL )
00664 *
00665 *     Do for each right hand side
00666 *
00667       DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ )
00668          JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) )
00669          DESCW( CSRC_ ) = ICURCOL
00670 *
00671          DO 190 K = 0, JBRHS-1
00672 *
00673             COUNT = 1
00674             LSTRES = THREE
00675   110       CONTINUE
00676 *
00677 *           Loop until stopping criterion is satisfied.
00678 *
00679 *           Compute residual R = sub( B ) - sub( A )*sub( X ).
00680 *
00681             CALL PCCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW,
00682      $                   DESCW, 1 )
00683             CALL PCHEMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, J+K,
00684      $                  DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 )
00685 *
00686 *           Compute componentwise relative backward error from formula
00687 *
00688 *           max(i) ( abs(R(i)) /
00689 *                    ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) )
00690 *
00691 *           where abs(Z) is the componentwise absolute value of the
00692 *           matrix or vector Z.  If the i-th component of the
00693 *           denominator is less than SAFE2, then SAFE1 is added to the
00694 *           i-th components of the numerator and denominator before
00695 *           dividing.
00696 *
00697             IF( MYCOL.EQ.ICURCOL ) THEN
00698                IF( NP.GT.0 ) THEN
00699                   DO 120 II = IIXB, IIXB+NP-1
00700                      RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) )
00701   120             CONTINUE
00702                END IF
00703             END IF
00704 *
00705             CALL PCAHEMV( UPLO, N, RONE, A, IA, JA, DESCA, X, IX, J+K,
00706      $                    DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW,
00707      $                    1 )
00708 *
00709             S = ZERO
00710             IF( MYCOL.EQ.ICURCOL ) THEN
00711                IF( NP.GT.0 )THEN
00712                   DO 130 II = IIW-1, IIW+NP-2
00713                      IF( RWORK( IPB+II ).GT.SAFE2 ) THEN
00714                         S = MAX( S, CABS1( WORK( IPR+II ) ) /
00715      $                              RWORK( IPB+II ) )
00716                      ELSE
00717                         S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) /
00718      $                              ( RWORK( IPB+II )+SAFE1 ) )
00719                      END IF
00720   130             CONTINUE
00721                END IF
00722             END IF
00723 *
00724             CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1,
00725      $                    -1, MYCOL )
00726             IF( MYCOL.EQ.ICURCOL )
00727      $         BERR( JJFBE ) = S
00728 *
00729 *           Test stopping criterion. Continue iterating if
00730 *             1) The residual BERR(J+K) is larger than machine epsilon,
00731 *                and
00732 *             2) BERR(J+K) decreased by at least a factor of 2 during
00733 *                the last iteration, and
00734 *             3) At most ITMAX iterations tried.
00735 *
00736             IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND.
00737      $          COUNT.LE.ITMAX ) THEN
00738 *
00739 *              Update solution and try again.
00740 *
00741                CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF,
00742      $                       WORK( IPR ), IW, JW, DESCW, INFO )
00743                CALL PCAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X,
00744      $                      IX, J+K, DESCX, 1 )
00745                LSTRES = S
00746                COUNT = COUNT + 1
00747                GO TO 110
00748             END IF
00749 *
00750 *           Bound error from formula
00751 *
00752 *           norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
00753 *           norm( abs(inv(sub(A)))*
00754 *               ( abs(R) + NZ*EPS*(
00755 *                 abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X))
00756 *
00757 *           where
00758 *             norm(Z) is the magnitude of the largest component of Z
00759 *             inv(sub(A)) is the inverse of sub(A)
00760 *             abs(Z) is the componentwise absolute value of the matrix
00761 *                or vector Z
00762 *             NZ is the maximum number of nonzeros in any row of sub(A),
00763 *                plus 1
00764 *             EPS is machine epsilon
00765 *
00766 *           The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))
00767 *           +abs(sub(B))) is incremented by SAFE1 if the i-th component
00768 *           of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2.
00769 *
00770 *           Use PCLACON to estimate the infinity-norm of the matrix
00771 *           inv(sub(A)) * diag(W), where
00772 *           W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)))))
00773 *
00774             IF( MYCOL.EQ.ICURCOL ) THEN
00775                IF( NP.GT.0 ) THEN
00776                   DO 140 II = IIW-1, IIW+NP-2
00777                      IF( RWORK( IPB+II ).GT.SAFE2 ) THEN
00778                         RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) +
00779      $                                    NZ*EPS*RWORK( IPB+II )
00780                      ELSE
00781                         RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) +
00782      $                                    NZ*EPS*RWORK( IPB+II ) + SAFE1
00783                     END IF
00784   140             CONTINUE
00785                END IF
00786             END IF
00787 *
00788             KASE = 0
00789   150       CONTINUE
00790             IF( MYCOL.EQ.ICURCOL ) THEN
00791                CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ),
00792      $                       DESCW( LLD_ ) )
00793             ELSE
00794                CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ),
00795      $                       DESCW( LLD_ ), MYROW, ICURCOL )
00796             END IF
00797             DESCW( CSRC_ ) = MYCOL
00798             CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ),
00799      $                    IW, JW, DESCW, EST, KASE )
00800             DESCW( CSRC_ ) = ICURCOL
00801 *
00802             IF( KASE.NE.0 ) THEN
00803                IF( KASE.EQ.1 ) THEN
00804 *
00805 *                 Multiply by diag(W)*inv(sub(A)').
00806 *
00807                   CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF,
00808      $                         WORK( IPR ), IW, JW, DESCW, INFO )
00809 *
00810                   IF( MYCOL.EQ.ICURCOL ) THEN
00811                      IF( NP.GT.0 ) THEN
00812                         DO 160 II = IIW-1, IIW+NP-2
00813                            WORK( IPR+II ) = RWORK( IPB+II )*
00814      $                                      WORK( IPR+II )
00815   160                   CONTINUE
00816                      END IF
00817                   END IF
00818                ELSE
00819 *
00820 *                 Multiply by inv(sub(A))*diag(W).
00821 *
00822                   IF( MYCOL.EQ.ICURCOL ) THEN
00823                      IF( NP.GT.0 ) THEN
00824                         DO 170 II = IIW-1, IIW+NP-2
00825                            WORK( IPR+II ) = RWORK( IPB+II )*
00826      $                                      WORK( IPR+II )
00827   170                   CONTINUE
00828                      END IF
00829                   END IF
00830 *
00831                   CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF,
00832      $                          WORK( IPR ), IW, JW, DESCW, INFO )
00833                END IF
00834                GO TO 150
00835             END IF
00836 *
00837 *           Normalize error.
00838 *
00839             LSTRES = ZERO
00840             IF( MYCOL.EQ.ICURCOL ) THEN
00841                IF( NP.GT.0 ) THEN
00842                   DO 180 II = IIXB, IIXB+NP-1
00843                      LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) )
00844   180             CONTINUE
00845                END IF
00846                CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1,
00847      $                       IDUM, IDUM, 1, -1, MYCOL )
00848                IF( LSTRES.NE.ZERO )
00849      $            FERR( JJFBE ) = EST / LSTRES
00850 *
00851                JJXB = JJXB + 1
00852                JJFBE = JJFBE + 1
00853                IOFFXB = IOFFXB + LDXB
00854 *
00855             END IF
00856 *
00857   190    CONTINUE
00858 *
00859          ICURCOL = MOD( ICURCOL+1, NPCOL )
00860 *
00861   200 CONTINUE
00862 *
00863       WORK( 1 ) = CMPLX( REAL( LWMIN ) )
00864       RWORK( 1 ) = REAL( LRWMIN )
00865 *
00866       RETURN
00867 *
00868 *     End of PCPORFS
00869 *
00870       END