ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psporfs.f
Go to the documentation of this file.
1  SUBROUTINE psporfs( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF,
2  $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX,
3  $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO )
4 *
5 * -- ScaLAPACK routine (version 1.7) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7 * and University of California, Berkeley.
8 * May 1, 1997
9 *
10 * .. Scalar Arguments ..
11  CHARACTER UPLO
12  INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX,
13  $ liwork, lwork, n, nrhs
14 * ..
15 * .. Array Arguments ..
16  INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
17  $ DESCX( * ), IWORK( * )
18  REAL A( * ), AF( * ), B( * ),
19  $ BERR( * ), FERR( * ), WORK( * ), X( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * PSPORFS improves the computed solution to a system of linear
26 * equations when the coefficient matrix is symmetric positive definite
27 * and provides error bounds and backward error estimates for the
28 * solutions.
29 *
30 * Notes
31 * =====
32 *
33 * Each global data object is described by an associated description
34 * vector. This vector stores the information required to establish
35 * the mapping between an object element and its corresponding process
36 * and memory location.
37 *
38 * Let A be a generic term for any 2D block cyclicly distributed array.
39 * Such a global array has an associated description vector DESCA.
40 * In the following comments, the character _ should be read as
41 * "of the global array".
42 *
43 * NOTATION STORED IN EXPLANATION
44 * --------------- -------------- --------------------------------------
45 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46 * DTYPE_A = 1.
47 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48 * the BLACS process grid A is distribu-
49 * ted over. The context itself is glo-
50 * bal, but the handle (the integer
51 * value) may vary.
52 * M_A (global) DESCA( M_ ) The number of rows in the global
53 * array A.
54 * N_A (global) DESCA( N_ ) The number of columns in the global
55 * array A.
56 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57 * the rows of the array.
58 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59 * the columns of the array.
60 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61 * row of the array A is distributed.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of the array A is
64 * distributed.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array. LLD_A >= MAX(1,LOCr(M_A)).
67 *
68 * Let K be the number of rows or columns of a distributed matrix,
69 * and assume that its process grid has dimension p x q.
70 * LOCr( K ) denotes the number of elements of K that a process
71 * would receive if K were distributed over the p processes of its
72 * process column.
73 * Similarly, LOCc( K ) denotes the number of elements of K that a
74 * process would receive if K were distributed over the q processes of
75 * its process row.
76 * The values of LOCr() and LOCc() may be determined via a call to the
77 * ScaLAPACK tool function, NUMROC:
78 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80 * An upper bound for these quantities may be computed by:
81 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83 *
84 * In the following comments, sub( A ), sub( X ) and sub( B ) denote
85 * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86 * B(IB:IB+N-1,JB:JB+NRHS-1).
87 *
88 * Arguments
89 * =========
90 *
91 * UPLO (global input) CHARACTER*1
92 * Specifies whether the upper or lower triangular part of the
93 * symmetric matrix sub( A ) is stored.
94 * = 'U': Upper triangular
95 * = 'L': Lower triangular
96 *
97 * N (global input) INTEGER
98 * The order of the matrix sub( A ). N >= 0.
99 *
100 * NRHS (global input) INTEGER
101 * The number of right hand sides, i.e., the number of columns
102 * of the matrices sub( B ) and sub( X ). NRHS >= 0.
103 *
104 * A (local input) REAL pointer into the local
105 * memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ).
106 * This array contains the local pieces of the N-by-N symmetric
107 * distributed matrix sub( A ) to be factored.
108 * If UPLO = 'U', the leading N-by-N upper triangular part of
109 * sub( A ) contains the upper triangular part of the matrix,
110 * and its strictly lower triangular part is not referenced.
111 * If UPLO = 'L', the leading N-by-N lower triangular part of
112 * sub( A ) contains the lower triangular part of the distribu-
113 * ted matrix, and its strictly upper triangular part is not
114 * referenced.
115 *
116 * IA (global input) INTEGER
117 * The row index in the global array A indicating the first
118 * row of sub( A ).
119 *
120 * JA (global input) INTEGER
121 * The column index in the global array A indicating the
122 * first column of sub( A ).
123 *
124 * DESCA (global and local input) INTEGER array of dimension DLEN_.
125 * The array descriptor for the distributed matrix A.
126 *
127 * AF (local input) REAL pointer into the local memory
128 * to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
129 * On entry, this array contains the factors L or U from the
130 * Cholesky factorization sub( A ) = L*L**T or U**T*U, as
131 * computed by PSPOTRF.
132 *
133 * IAF (global input) INTEGER
134 * The row index in the global array AF indicating the first
135 * row of sub( AF ).
136 *
137 * JAF (global input) INTEGER
138 * The column index in the global array AF indicating the
139 * first column of sub( AF ).
140 *
141 * DESCAF (global and local input) INTEGER array of dimension DLEN_.
142 * The array descriptor for the distributed matrix AF.
143 *
144 * B (local input) REAL pointer into the local memory
145 * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ).
146 * On entry, this array contains the the local pieces of the
147 * right hand sides sub( B ).
148 *
149 * IB (global input) INTEGER
150 * The row index in the global array B indicating the first
151 * row of sub( B ).
152 *
153 * JB (global input) INTEGER
154 * The column index in the global array B indicating the
155 * first column of sub( B ).
156 *
157 * DESCB (global and local input) INTEGER array of dimension DLEN_.
158 * The array descriptor for the distributed matrix B.
159 *
160 * X (local input) REAL pointer into the local memory
161 * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ).
162 * On entry, this array contains the the local pieces of the
163 * solution vectors sub( X ). On exit, it contains the
164 * improved solution vectors.
165 *
166 * IX (global input) INTEGER
167 * The row index in the global array X indicating the first
168 * row of sub( X ).
169 *
170 * JX (global input) INTEGER
171 * The column index in the global array X indicating the
172 * first column of sub( X ).
173 *
174 * DESCX (global and local input) INTEGER array of dimension DLEN_.
175 * The array descriptor for the distributed matrix X.
176 *
177 * FERR (local output) REAL array of local dimension
178 * LOCc(JB+NRHS-1).
179 * The estimated forward error bound for each solution vector
180 * of sub( X ). If XTRUE is the true solution corresponding
181 * to sub( X ), FERR is an estimated upper bound for the
182 * magnitude of the largest element in (sub( X ) - XTRUE)
183 * divided by the magnitude of the largest element in sub( X ).
184 * The estimate is as reliable as the estimate for RCOND, and
185 * is almost always a slight overestimate of the true error.
186 * This array is tied to the distributed matrix X.
187 *
188 * BERR (local output) REAL array of local dimension
189 * LOCc(JB+NRHS-1). The componentwise relative backward
190 * error of each solution vector (i.e., the smallest re-
191 * lative change in any entry of sub( A ) or sub( B )
192 * that makes sub( X ) an exact solution).
193 * This array is tied to the distributed matrix X.
194 *
195 * WORK (local workspace/local output) REAL array,
196 * dimension (LWORK)
197 * On exit, WORK(1) returns the minimal and optimal LWORK.
198 *
199 * LWORK (local or global input) INTEGER
200 * The dimension of the array WORK.
201 * LWORK is local input and must be at least
202 * LWORK >= 3*LOCr( N + MOD( IA-1, MB_A ) )
203 *
204 * If LWORK = -1, then LWORK is global input and a workspace
205 * query is assumed; the routine only calculates the minimum
206 * and optimal size for all work arrays. Each of these
207 * values is returned in the first entry of the corresponding
208 * work array, and no error message is issued by PXERBLA.
209 *
210 * IWORK (local workspace/local output) INTEGER array,
211 * dimension (LIWORK)
212 * On exit, IWORK(1) returns the minimal and optimal LIWORK.
213 *
214 * LIWORK (local or global input) INTEGER
215 * The dimension of the array IWORK.
216 * LIWORK is local input and must be at least
217 * LIWORK >= LOCr( N + MOD( IB-1, MB_B ) ).
218 *
219 * If LIWORK = -1, then LIWORK is global input and a workspace
220 * query is assumed; the routine only calculates the minimum
221 * and optimal size for all work arrays. Each of these
222 * values is returned in the first entry of the corresponding
223 * work array, and no error message is issued by PXERBLA.
224 *
225 *
226 * INFO (global output) INTEGER
227 * = 0: successful exit
228 * < 0: If the i-th argument is an array and the j-entry had
229 * an illegal value, then INFO = -(i*100+j), if the i-th
230 * argument is a scalar and had an illegal value, then
231 * INFO = -i.
232 *
233 * Internal Parameters
234 * ===================
235 *
236 * ITMAX is the maximum number of steps of iterative refinement.
237 *
238 * Notes
239 * =====
240 *
241 * This routine temporarily returns when N <= 1.
242 *
243 * The distributed submatrices op( A ) and op( AF ) (respectively
244 * sub( X ) and sub( B ) ) should be distributed the same way on the
245 * same processes. These conditions ensure that sub( A ) and sub( AF )
246 * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
247 *
248 * Moreover, this routine requires the distributed submatrices sub( A ),
249 * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
250 * i.e., if f(x,y) = MOD( x-1, y ):
251 * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
252 * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
253 * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
254 * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
255 *
256 * =====================================================================
257 *
258 * .. Parameters ..
259  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
260  $ LLD_, MB_, M_, NB_, N_, RSRC_
261  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
262  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
263  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
264  INTEGER ITMAX
265  PARAMETER ( ITMAX = 5 )
266  REAL ZERO, ONE
267  parameter( zero = 0.0e+0, one = 1.0e+0 )
268  REAL TWO, THREE
269  parameter( two = 2.0e+0, three = 3.0e+0 )
270 * ..
271 * .. Local Scalars ..
272  LOGICAL LQUERY, UPPER
273  INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
274  $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
275  $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
276  $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
277  $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
278  $ k, kase, ldxb, liwmin, lwmin, mycol, myrhs,
279  $ myrow, np, np0, npcol, npmod, nprow, nz
280  REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
281 * ..
282 * .. Local Arrays ..
283  INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
284 * ..
285 * .. External Functions ..
286  LOGICAL LSAME
287  INTEGER ICEIL, INDXG2P, NUMROC
288  REAL PSLAMCH
289  EXTERNAL iceil, indxg2p, lsame, numroc, pslamch
290 * ..
291 * .. External Subroutines ..
292  EXTERNAL blacs_gridinfo, chk1mat, descset, infog2l,
293  $ pchk2mat, psasymv, psaxpy, pscopy,
294  $ pslacon, pspotrs, pssymv, pxerbla,
295  $ sgamx2d, sgebr2d, sgebs2d
296 * ..
297 * .. Intrinsic Functions ..
298  INTRINSIC abs, ichar, max, min, mod, real
299 * ..
300 * .. Executable Statements ..
301 *
302 * Get grid parameters
303 *
304  ictxt = desca( ctxt_ )
305  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
306 *
307 * Test the input parameters.
308 *
309  info = 0
310  IF( nprow.EQ.-1 ) THEN
311  info = -(700+ctxt_)
312  ELSE
313  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
314  CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
315  CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 15, info )
316  CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 19, info )
317  IF( info.EQ.0 ) THEN
318  upper = lsame( uplo, 'U' )
319  iroffa = mod( ia-1, desca( mb_ ) )
320  icoffa = mod( ja-1, desca( nb_ ) )
321  iroffaf = mod( iaf-1, descaf( mb_ ) )
322  icoffaf = mod( jaf-1, descaf( nb_ ) )
323  iroffb = mod( ib-1, descb( mb_ ) )
324  icoffb = mod( jb-1, descb( nb_ ) )
325  iroffx = mod( ix-1, descx( mb_ ) )
326  icoffx = mod( jx-1, descx( nb_ ) )
327  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
328  $ nprow )
329  iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
330  $ descaf( csrc_ ), npcol )
331  iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
332  $ descaf( rsrc_ ), nprow )
333  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
334  $ npcol )
335  CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
336  $ iixb, jjxb, ixbrow, ixbcol )
337  ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
338  $ nprow )
339  ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
340  $ npcol )
341  npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
342  $ nprow )
343  lwmin = 3 * npmod
344  liwmin = npmod
345  work( 1 ) = real( lwmin )
346  iwork( 1 ) = liwmin
347  lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
348 *
349  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
350  info = -1
351  ELSE IF( n.LT.0 ) THEN
352  info = -2
353  ELSE IF( nrhs.LT.0 ) THEN
354  info = -3
355  ELSE IF( iroffa.NE.0 ) THEN
356  info = -5
357  ELSE IF( icoffa.NE.0 ) THEN
358  info = -6
359  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
360  info = -( 700 + nb_ )
361  ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
362  info = -( 1100 + mb_ )
363  ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
364  info = -9
365  ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
366  info = -( 1100 + nb_ )
367  ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
368  info = -10
369  ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
370  info = -( 1100 + ctxt_ )
371  ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
372  info = -13
373  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
374  info = -( 1500 + mb_ )
375  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
376  info = -( 1500 + ctxt_ )
377  ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
378  info = -( 1900 + mb_ )
379  ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
380  info = -17
381  ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
382  info = -( 1900 + nb_ )
383  ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
384  info = -18
385  ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
386  info = -( 1900 + ctxt_ )
387  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
388  info = -23
389  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
390  info = -25
391  END IF
392  END IF
393 *
394  IF( upper ) THEN
395  idum1( 1 ) = ichar( 'U' )
396  ELSE
397  idum1( 1 ) = ichar( 'L' )
398  END IF
399  idum2( 1 ) = 1
400  idum1( 2 ) = n
401  idum2( 2 ) = 2
402  idum1( 3 ) = nrhs
403  idum2( 3 ) = 3
404  IF( lwork.EQ.-1 ) THEN
405  idum1( 4 ) = -1
406  ELSE
407  idum1( 4 ) = 1
408  END IF
409  idum2( 4 ) = 23
410  IF( liwork.EQ.-1 ) THEN
411  idum1( 5 ) = -1
412  ELSE
413  idum1( 5 ) = 1
414  END IF
415  idum2( 5 ) = 25
416  CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
417  $ jaf, descaf, 11, 0, idum1, idum2, info )
418  CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 15, n, 2, nrhs, 3,
419  $ ix, jx, descx, 19, 5, idum1, idum2, info )
420  END IF
421  IF( info.NE.0 ) THEN
422  CALL pxerbla( ictxt, 'PSPORFS', -info )
423  RETURN
424  ELSE IF( lquery ) THEN
425  RETURN
426  END IF
427 *
428  jjfbe = jjxb
429  myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
430  $ npcol )
431 *
432 * Quick return if possible
433 *
434  IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
435  DO 10 jj = jjfbe, myrhs
436  ferr( jj ) = zero
437  berr( jj ) = zero
438  10 CONTINUE
439  RETURN
440  END IF
441 *
442  np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
443  CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
444  $ ictxt, max( 1, np0 ) )
445  ipb = 1
446  ipr = ipb + np0
447  ipv = ipr + np0
448  IF( myrow.EQ.ixbrow ) THEN
449  iiw = 1 + iroffb
450  np = np0 - iroffb
451  ELSE
452  iiw = 1
453  np = np0
454  END IF
455  iw = 1 + iroffb
456  jw = 1
457  ldxb = descb( lld_ )
458  ioffxb = ( jjxb-1 )*ldxb
459 *
460 * NZ = 1 + maximum number of nonzero entries in each row of sub( A )
461 *
462  nz = n + 1
463  eps = pslamch( ictxt, 'Epsilon' )
464  safmin = pslamch( ictxt, 'Safe minimum' )
465  safe1 = nz*safmin
466  safe2 = safe1 / eps
467  jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
468 *
469 * Handle first block separately
470 *
471  jbrhs = jn - jb + 1
472  DO 100 k = 0, jbrhs-1
473 *
474  count = 1
475  lstres = three
476  20 CONTINUE
477 *
478 * Loop until stopping criterion is satisfied.
479 *
480 * Compute residual R = sub(B) - op(sub(A)) * sub(X)
481 *
482  CALL pscopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
483  $ descw, 1 )
484  CALL pssymv( uplo, n, -one, a, ia, ja, desca, x, ix, jx+k,
485  $ descx, 1, one, work( ipr ), iw, jw, descw, 1 )
486 *
487 * Compute componentwise relative backward error from formula
488 *
489 * max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) )
490 *
491 * where abs(Z) is the componentwise absolute value of the
492 * matrix or vector Z. If the i-th component of the
493 * denominator is less than SAFE2, then SAFE1 is added to
494 * the i-th components of the numerator and denominator
495 * before dividing.
496 *
497  IF( mycol.EQ.ixbcol ) THEN
498  IF( np.GT.0 ) THEN
499  DO 30 ii = iixb, iixb + np - 1
500  work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
501  30 CONTINUE
502  END IF
503  END IF
504 *
505  CALL psasymv( uplo, n, one, a, ia, ja, desca, x, ix, jx+k,
506  $ descx, 1, one, work( ipb ), iw, jw, descw, 1 )
507 *
508  s = zero
509  IF( mycol.EQ.ixbcol ) THEN
510  IF( np.GT.0 ) THEN
511  DO 40 ii = iiw-1, iiw+np-2
512  IF( work( ipb+ii ).GT.safe2 ) THEN
513  s = max( s, abs( work( ipr+ii ) ) /
514  $ work( ipb+ii ) )
515  ELSE
516  s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
517  $ ( work( ipb+ii )+safe1 ) )
518  END IF
519  40 CONTINUE
520  END IF
521  END IF
522 *
523  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
524  $ -1, mycol )
525  IF( mycol.EQ.ixbcol )
526  $ berr( jjfbe ) = s
527 *
528 * Test stopping criterion. Continue iterating if
529 * 1) The residual BERR(J) is larger than machine epsilon, and
530 * 2) BERR(J) decreased by at least a factor of 2 during the
531 * last iteration, and
532 * 3) At most ITMAX iterations tried.
533 *
534  IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
535 *
536 * Update solution and try again.
537 *
538  CALL pspotrs( uplo, n, 1, af, iaf, jaf, descaf,
539  $ work( ipr ), iw, jw, descw, info )
540  CALL psaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
541  $ jx+k, descx, 1 )
542  lstres = s
543  count = count + 1
544  GO TO 20
545  END IF
546 *
547 * Bound error from formula
548 *
549 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
550 * norm( abs(inv(sub(A)))*
551 * ( abs(R) +
552 * NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X))
553 *
554 * where
555 * norm(Z) is the magnitude of the largest component of Z
556 * inv(sub(A)) is the inverse of sub(A)
557 * abs(Z) is the componentwise absolute value of the matrix
558 * or vector Z
559 * NZ is the maximum number of nonzeros in any row of sub(A),
560 * plus 1
561 * EPS is machine epsilon
562 *
563 * The i-th component of
564 * abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B)))
565 * is incremented by SAFE1 if the i-th component of
566 * abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2.
567 *
568 * Use PSLACON to estimate the infinity-norm of the matrix
569 * inv(sub(A)) * diag(W), where
570 * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)))))
571 *
572  IF( mycol.EQ.ixbcol ) THEN
573  IF( np.GT.0 ) THEN
574  DO 50 ii = iiw-1, iiw+np-2
575  IF( work( ipb+ii ).GT.safe2 ) THEN
576  work( ipb+ii ) = abs( work( ipr+ii ) ) +
577  $ nz*eps*work( ipb+ii )
578  ELSE
579  work( ipb+ii ) = abs( work( ipr+ii ) ) +
580  $ nz*eps*work( ipb+ii ) + safe1
581  END IF
582  50 CONTINUE
583  END IF
584  END IF
585 *
586  kase = 0
587  60 CONTINUE
588  IF( mycol.EQ.ixbcol ) THEN
589  CALL sgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
590  $ descw( lld_ ) )
591  ELSE
592  CALL sgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
593  $ descw( lld_ ), myrow, ixbcol )
594  END IF
595  descw( csrc_ ) = mycol
596  CALL pslacon( n, work( ipv ), iw, jw, descw, work( ipr ),
597  $ iw, jw, descw, iwork, est, kase )
598  descw( csrc_ ) = ixbcol
599 *
600  IF( kase.NE.0 ) THEN
601  IF( kase.EQ.1 ) THEN
602 *
603 * Multiply by diag(W)*inv(sub(A)').
604 *
605  CALL pspotrs( uplo, n, 1, af, iaf, jaf, descaf,
606  $ work( ipr ), iw, jw, descw, info )
607 *
608  IF( mycol.EQ.ixbcol ) THEN
609  IF( np.GT.0 ) THEN
610  DO 70 ii = iiw-1, iiw+np-2
611  work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
612  70 CONTINUE
613  END IF
614  END IF
615  ELSE
616 *
617 * Multiply by inv(sub(A))*diag(W).
618 *
619  IF( mycol.EQ.ixbcol ) THEN
620  IF( np.GT.0 ) THEN
621  DO 80 ii = iiw-1, iiw+np-2
622  work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
623  80 CONTINUE
624  END IF
625  END IF
626 *
627  CALL pspotrs( uplo, n, 1, af, iaf, jaf, descaf,
628  $ work( ipr ), iw, jw, descw, info )
629  END IF
630  GO TO 60
631  END IF
632 *
633 * Normalize error.
634 *
635  lstres = zero
636  IF( mycol.EQ.ixbcol ) THEN
637  IF( np.GT.0 ) THEN
638  DO 90 ii = iixb, iixb+np-1
639  lstres = max( lstres, abs( x( ioffxb+ii ) ) )
640  90 CONTINUE
641  END IF
642  CALL sgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
643  $ idum, 1, -1, mycol )
644  IF( lstres.NE.zero )
645  $ ferr( jjfbe ) = est / lstres
646 *
647  jjxb = jjxb + 1
648  jjfbe = jjfbe + 1
649  ioffxb = ioffxb + ldxb
650 *
651  END IF
652 *
653  100 CONTINUE
654 *
655  icurcol = mod( ixbcol+1, npcol )
656 *
657 * Do for each right hand side
658 *
659  DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
660  jbrhs = min( jb+nrhs-j, descb( nb_ ) )
661  descw( csrc_ ) = icurcol
662 *
663  DO 190 k = 0, jbrhs-1
664 *
665  count = 1
666  lstres = three
667  110 CONTINUE
668 *
669 * Loop until stopping criterion is satisfied.
670 *
671 * Compute residual R = sub( B ) - sub( A )*sub( X ).
672 *
673  CALL pscopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
674  $ descw, 1 )
675  CALL pssymv( uplo, n, -one, a, ia, ja, desca, x, ix, j+k,
676  $ descx, 1, one, work( ipr ), iw, jw, descw, 1 )
677 *
678 * Compute componentwise relative backward error from formula
679 *
680 * max(i) ( abs(R(i)) /
681 * ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) )
682 *
683 * where abs(Z) is the componentwise absolute value of the
684 * matrix or vector Z. If the i-th component of the
685 * denominator is less than SAFE2, then SAFE1 is added to the
686 * i-th components of the numerator and denominator before
687 * dividing.
688 *
689  IF( mycol.EQ.icurcol ) THEN
690  IF( np.GT.0 ) THEN
691  DO 120 ii = iixb, iixb+np-1
692  work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
693  120 CONTINUE
694  END IF
695  END IF
696 *
697  CALL psasymv( uplo, n, one, a, ia, ja, desca, x, ix, j+k,
698  $ descx, 1, one, work( ipb ), iw, jw, descw, 1 )
699 *
700  s = zero
701  IF( mycol.EQ.icurcol ) THEN
702  IF( np.GT.0 )THEN
703  DO 130 ii = iiw-1, iiw+np-2
704  IF( work( ipb+ii ).GT.safe2 ) THEN
705  s = max( s, abs( work( ipr+ii ) ) /
706  $ work( ipb+ii ) )
707  ELSE
708  s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
709  $ ( work( ipb+ii )+safe1 ) )
710  END IF
711  130 CONTINUE
712  END IF
713  END IF
714 *
715  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
716  $ -1, mycol )
717  IF( mycol.EQ.icurcol )
718  $ berr( jjfbe ) = s
719 *
720 * Test stopping criterion. Continue iterating if
721 * 1) The residual BERR(J+K) is larger than machine epsilon,
722 * and
723 * 2) BERR(J+K) decreased by at least a factor of 2 during
724 * the last iteration, and
725 * 3) At most ITMAX iterations tried.
726 *
727  IF( s.GT.eps .AND. two*s.LE.lstres .AND.
728  $ count.LE.itmax ) THEN
729 *
730 * Update solution and try again.
731 *
732  CALL pspotrs( uplo, n, 1, af, iaf, jaf, descaf,
733  $ work( ipr ), iw, jw, descw, info )
734  CALL psaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
735  $ ix, j+k, descx, 1 )
736  lstres = s
737  count = count + 1
738  GO TO 110
739  END IF
740 *
741 * Bound error from formula
742 *
743 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
744 * norm( abs(inv(sub(A)))*
745 * ( abs(R) + NZ*EPS*(
746 * abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X))
747 *
748 * where
749 * norm(Z) is the magnitude of the largest component of Z
750 * inv(sub(A)) is the inverse of sub(A)
751 * abs(Z) is the componentwise absolute value of the matrix
752 * or vector Z
753 * NZ is the maximum number of nonzeros in any row of sub(A),
754 * plus 1
755 * EPS is machine epsilon
756 *
757 * The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))
758 * +abs(sub(B))) is incremented by SAFE1 if the i-th component
759 * of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2.
760 *
761 * Use PSLACON to estimate the infinity-norm of the matrix
762 * inv(sub(A)) * diag(W), where
763 * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)))))
764 *
765  IF( mycol.EQ.icurcol ) THEN
766  IF( np.GT.0 ) THEN
767  DO 140 ii = iiw-1, iiw+np-2
768  IF( work( ipb+ii ).GT.safe2 ) THEN
769  work( ipb+ii ) = abs( work( ipr+ii ) ) +
770  $ nz*eps*work( ipb+ii )
771  ELSE
772  work( ipb+ii ) = abs( work( ipr+ii ) ) +
773  $ nz*eps*work( ipb+ii ) + safe1
774  END IF
775  140 CONTINUE
776  END IF
777  END IF
778 *
779  kase = 0
780  150 CONTINUE
781  IF( mycol.EQ.icurcol ) THEN
782  CALL sgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
783  $ descw( lld_ ) )
784  ELSE
785  CALL sgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
786  $ descw( lld_ ), myrow, icurcol )
787  END IF
788  descw( csrc_ ) = mycol
789  CALL pslacon( n, work( ipv ), iw, jw, descw, work( ipr ),
790  $ iw, jw, descw, iwork, est, kase )
791  descw( csrc_ ) = icurcol
792 *
793  IF( kase.NE.0 ) THEN
794  IF( kase.EQ.1 ) THEN
795 *
796 * Multiply by diag(W)*inv(sub(A)').
797 *
798  CALL pspotrs( uplo, n, 1, af, iaf, jaf, descaf,
799  $ work( ipr ), iw, jw, descw, info )
800 *
801  IF( mycol.EQ.icurcol ) THEN
802  IF( np.GT.0 ) THEN
803  DO 160 ii = iiw-1, iiw+np-2
804  work( ipr+ii ) = work( ipb+ii )*
805  $ work( ipr+ii )
806  160 CONTINUE
807  END IF
808  END IF
809  ELSE
810 *
811 * Multiply by inv(sub(A))*diag(W).
812 *
813  IF( mycol.EQ.icurcol ) THEN
814  IF( np.GT.0 ) THEN
815  DO 170 ii = iiw-1, iiw+np-2
816  work( ipr+ii ) = work( ipb+ii )*
817  $ work( ipr+ii )
818  170 CONTINUE
819  END IF
820  END IF
821 *
822  CALL pspotrs( uplo, n, 1, af, iaf, jaf, descaf,
823  $ work( ipr ), iw, jw, descw, info )
824  END IF
825  GO TO 150
826  END IF
827 *
828 * Normalize error.
829 *
830  lstres = zero
831  IF( mycol.EQ.icurcol ) THEN
832  IF( np.GT.0 ) THEN
833  DO 180 ii = iixb, iixb+np-1
834  lstres = max( lstres, abs( x( ioffxb+ii ) ) )
835  180 CONTINUE
836  END IF
837  CALL sgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1,
838  $ idum, idum, 1, -1, mycol )
839  IF( lstres.NE.zero )
840  $ ferr( jjfbe ) = est / lstres
841 *
842  jjxb = jjxb + 1
843  jjfbe = jjfbe + 1
844  ioffxb = ioffxb + ldxb
845 *
846  END IF
847 *
848  190 CONTINUE
849 *
850  icurcol = mod( icurcol+1, npcol )
851 *
852  200 CONTINUE
853 *
854  work( 1 ) = real( lwmin )
855  iwork( 1 ) = liwmin
856 *
857  RETURN
858 *
859 * End of PSPORFS
860 *
861  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslacon
subroutine pslacon(N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, EST, KASE)
Definition: pslacon.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pspotrs
subroutine pspotrs(UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, DESCB, INFO)
Definition: pspotrs.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181
psporfs
subroutine psporfs(UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psporfs.f:4