SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pdgerfs()

subroutine pdgerfs ( character  trans,
integer  n,
integer  nrhs,
double precision, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
double precision, dimension( * )  af,
integer  iaf,
integer  jaf,
integer, dimension( * )  descaf,
integer, dimension( * )  ipiv,
double precision, dimension( * )  b,
integer  ib,
integer  jb,
integer, dimension( * )  descb,
double precision, dimension( * )  x,
integer  ix,
integer  jx,
integer, dimension( * )  descx,
double precision, dimension( * )  ferr,
double precision, dimension( * )  berr,
double precision, dimension( * )  work,
integer  lwork,
integer, dimension( * )  iwork,
integer  liwork,
integer  info 
)

Definition at line 1 of file pdgerfs.f.

5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* November 15, 1997
10*
11* .. Scalar Arguments ..
12 CHARACTER TRANS
13 INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX,
14 $ LIWORK, LWORK, N, NRHS
15* ..
16* .. Array Arguments ..
17 INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
18 $ DESCX( * ),IPIV( * ), IWORK( * )
19 DOUBLE PRECISION A( * ), AF( * ), B( * ), BERR( * ), FERR( * ),
20 $ WORK( * ), X( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PDGERFS improves the computed solution to a system of linear
27* equations and provides error bounds and backward error estimates for
28* the solutions.
29*
30* Notes
31* =====
32*
33* Each global data object is described by an associated description
34* vector. This vector stores the information required to establish
35* the mapping between an object element and its corresponding process
36* and memory location.
37*
38* Let A be a generic term for any 2D block cyclicly distributed array.
39* Such a global array has an associated description vector DESCA.
40* In the following comments, the character _ should be read as
41* "of the global array".
42*
43* NOTATION STORED IN EXPLANATION
44* --------------- -------------- --------------------------------------
45* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46* DTYPE_A = 1.
47* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48* the BLACS process grid A is distribu-
49* ted over. The context itself is glo-
50* bal, but the handle (the integer
51* value) may vary.
52* M_A (global) DESCA( M_ ) The number of rows in the global
53* array A.
54* N_A (global) DESCA( N_ ) The number of columns in the global
55* array A.
56* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57* the rows of the array.
58* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59* the columns of the array.
60* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61* row of the array A is distributed.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of the array A is
64* distributed.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array. LLD_A >= MAX(1,LOCr(M_A)).
67*
68* Let K be the number of rows or columns of a distributed matrix,
69* and assume that its process grid has dimension p x q.
70* LOCr( K ) denotes the number of elements of K that a process
71* would receive if K were distributed over the p processes of its
72* process column.
73* Similarly, LOCc( K ) denotes the number of elements of K that a
74* process would receive if K were distributed over the q processes of
75* its process row.
76* The values of LOCr() and LOCc() may be determined via a call to the
77* ScaLAPACK tool function, NUMROC:
78* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80* An upper bound for these quantities may be computed by:
81* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83*
84* In the following comments, sub( A ), sub( X ) and sub( B ) denote
85* respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86* B(IB:IB+N-1,JB:JB+NRHS-1).
87*
88* Arguments
89* =========
90*
91* TRANS (global input) CHARACTER*1
92* Specifies the form of the system of equations.
93* = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose)
94* = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose)
95* = 'C': sub( A )**T * sub( X ) = sub( B )
96* (Conjugate transpose = Transpose)
97*
98*
99* N (global input) INTEGER
100* The order of the matrix sub( A ). N >= 0.
101*
102* NRHS (global input) INTEGER
103* The number of right hand sides, i.e., the number of columns
104* of the matrices sub( B ) and sub( X ). NRHS >= 0.
105*
106* A (local input) DOUBLE PRECISION pointer into the local
107* memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
108* This array contains the local pieces of the distributed
109* matrix sub( A ).
110*
111* IA (global input) INTEGER
112* The row index in the global array A indicating the first
113* row of sub( A ).
114*
115* JA (global input) INTEGER
116* The column index in the global array A indicating the
117* first column of sub( A ).
118*
119* DESCA (global and local input) INTEGER array of dimension DLEN_.
120* The array descriptor for the distributed matrix A.
121*
122* AF (local input) DOUBLE PRECISION pointer into the local
123* memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
124* This array contains the local pieces of the distributed
125* factors of the matrix sub( A ) = P * L * U as computed by
126* PDGETRF.
127*
128* IAF (global input) INTEGER
129* The row index in the global array AF indicating the first
130* row of sub( AF ).
131*
132* JAF (global input) INTEGER
133* The column index in the global array AF indicating the
134* first column of sub( AF ).
135*
136* DESCAF (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix AF.
138*
139* IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF.
140* This array contains the pivoting information as computed
141* by PDGETRF. IPIV(i) -> The global row local row i
142* was swapped with. This array is tied to the distributed
143* matrix A.
144*
145* B (local input) DOUBLE PRECISION pointer into the local
146* memory to an array of local dimension
147* (LLD_B,LOCc(JB+NRHS-1)). This array contains the local
148* pieces of the distributed matrix of right hand sides
149* sub( B ).
150*
151* IB (global input) INTEGER
152* The row index in the global array B indicating the first
153* row of sub( B ).
154*
155* JB (global input) INTEGER
156* The column index in the global array B indicating the
157* first column of sub( B ).
158*
159* DESCB (global and local input) INTEGER array of dimension DLEN_.
160* The array descriptor for the distributed matrix B.
161*
162* X (local input and output) DOUBLE PRECISION pointer into the
163* local memory to an array of local dimension
164* (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains
165* the local pieces of the distributed matrix solution
166* sub( X ). On exit, the improved solution vectors.
167*
168* IX (global input) INTEGER
169* The row index in the global array X indicating the first
170* row of sub( X ).
171*
172* JX (global input) INTEGER
173* The column index in the global array X indicating the
174* first column of sub( X ).
175*
176* DESCX (global and local input) INTEGER array of dimension DLEN_.
177* The array descriptor for the distributed matrix X.
178*
179* FERR (local output) DOUBLE PRECISION array of local dimension
180* LOCc(JB+NRHS-1).
181* The estimated forward error bound for each solution vector
182* of sub( X ). If XTRUE is the true solution corresponding
183* to sub( X ), FERR is an estimated upper bound for the
184* magnitude of the largest element in (sub( X ) - XTRUE)
185* divided by the magnitude of the largest element in sub( X ).
186* The estimate is as reliable as the estimate for RCOND, and
187* is almost always a slight overestimate of the true error.
188* This array is tied to the distributed matrix X.
189*
190* BERR (local output) DOUBLE PRECISION array of local dimension
191* LOCc(JB+NRHS-1). The componentwise relative backward
192* error of each solution vector (i.e., the smallest re-
193* lative change in any entry of sub( A ) or sub( B )
194* that makes sub( X ) an exact solution).
195* This array is tied to the distributed matrix X.
196*
197* WORK (local workspace/local output) DOUBLE PRECISION array,
198* dimension (LWORK)
199* On exit, WORK(1) returns the minimal and optimal LWORK.
200*
201* LWORK (local or global input) INTEGER
202* The dimension of the array WORK.
203* LWORK is local input and must be at least
204* LWORK >= 3*LOCr( N + MOD(IA-1,MB_A) )
205*
206* If LWORK = -1, then LWORK is global input and a workspace
207* query is assumed; the routine only calculates the minimum
208* and optimal size for all work arrays. Each of these
209* values is returned in the first entry of the corresponding
210* work array, and no error message is issued by PXERBLA.
211*
212* IWORK (local workspace/local output) INTEGER array,
213* dimension (LIWORK)
214* On exit, IWORK(1) returns the minimal and optimal LIWORK.
215*
216* LIWORK (local or global input) INTEGER
217* The dimension of the array IWORK.
218* LIWORK is local input and must be at least
219* LIWORK >= LOCr( N + MOD(IB-1,MB_B) ).
220*
221* If LIWORK = -1, then LIWORK is global input and a workspace
222* query is assumed; the routine only calculates the minimum
223* and optimal size for all work arrays. Each of these
224* values is returned in the first entry of the corresponding
225* work array, and no error message is issued by PXERBLA.
226*
227*
228* INFO (global output) INTEGER
229* = 0: successful exit
230* < 0: If the i-th argument is an array and the j-entry had
231* an illegal value, then INFO = -(i*100+j), if the i-th
232* argument is a scalar and had an illegal value, then
233* INFO = -i.
234*
235* Internal Parameters
236* ===================
237*
238* ITMAX is the maximum number of steps of iterative refinement.
239*
240* Notes
241* =====
242*
243* This routine temporarily returns when N <= 1.
244*
245* The distributed submatrices op( A ) and op( AF ) (respectively
246* sub( X ) and sub( B ) ) should be distributed the same way on the
247* same processes. These conditions ensure that sub( A ) and sub( AF )
248* (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
249*
250* Moreover, this routine requires the distributed submatrices sub( A ),
251* sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
252* i.e., if f(x,y) = MOD( x-1, y ):
253* f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
254* f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
255* f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
256* f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
257*
258* =====================================================================
259*
260* .. Parameters ..
261 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
262 $ LLD_, MB_, M_, NB_, N_, RSRC_
263 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
264 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
265 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
266 INTEGER ITMAX
267 parameter( itmax = 5 )
268 DOUBLE PRECISION ZERO, ONE
269 parameter( zero = 0.0d+0, one = 1.0d+0 )
270 DOUBLE PRECISION TWO, THREE
271 parameter( two = 2.0d+0, three = 3.0d+0 )
272* ..
273* .. Local Scalars ..
274 LOGICAL LQUERY, NOTRAN
275 CHARACTER TRANST
276 INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
277 $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB,
278 $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW,
279 $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB,
280 $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW,
281 $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS,
282 $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ
283 DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
284* ..
285* .. Local Arrays ..
286 INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
287* ..
288* .. External Functions ..
289 LOGICAL LSAME
290 INTEGER ICEIL, INDXG2P, NUMROC
291 DOUBLE PRECISION PDLAMCH
292 EXTERNAL iceil, indxg2p, lsame, numroc, pdlamch
293* ..
294* .. External Subroutines ..
295 EXTERNAL blacs_gridinfo, chk1mat, descset, dgamx2d,
296 $ dgebr2d, dgebs2d, infog2l, pchk2mat,
297 $ pdagemv, pdaxpy, pdcopy, pdgemv,
299* ..
300* .. Intrinsic Functions ..
301 INTRINSIC abs, dble, ichar, max, min, mod
302* ..
303* .. Executable Statements ..
304*
305* Get grid parameters
306*
307 ictxt = desca( ctxt_ )
308 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
309*
310* Test the input parameters.
311*
312 notran = lsame( trans, 'N' )
313*
314 info = 0
315 IF( nprow.EQ.-1 ) THEN
316 info = -(700+ctxt_)
317 ELSE
318 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
319 CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
320 CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 16, info )
321 CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 20, info )
322 IF( info.EQ.0 ) THEN
323 iroffa = mod( ia-1, desca( mb_ ) )
324 icoffa = mod( ja-1, desca( nb_ ) )
325 iroffaf = mod( iaf-1, descaf( mb_ ) )
326 icoffaf = mod( jaf-1, descaf( nb_ ) )
327 iroffb = mod( ib-1, descb( mb_ ) )
328 icoffb = mod( jb-1, descb( nb_ ) )
329 iroffx = mod( ix-1, descx( mb_ ) )
330 icoffx = mod( jx-1, descx( nb_ ) )
331 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
332 $ nprow )
333 iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
334 $ descaf( csrc_ ), npcol )
335 iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
336 $ descaf( rsrc_ ), nprow )
337 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
338 $ npcol )
339 CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
340 $ iixb, jjxb, ixbrow, ixbcol )
341 ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
342 $ nprow )
343 ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
344 $ npcol )
345 npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
346 $ nprow )
347 lwmin = 3 * npmod
348 liwmin = npmod
349 work( 1 ) = dble( lwmin )
350 iwork( 1 ) = liwmin
351 lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
352*
353 IF( ( .NOT.notran ) .AND. ( .NOT.lsame( trans, 'T' ) ) .AND.
354 $ ( .NOT.lsame( trans, 'C' ) ) ) THEN
355 info = -1
356 ELSE IF( n.LT.0 ) THEN
357 info = -2
358 ELSE IF( nrhs.LT.0 ) THEN
359 info = -3
360 ELSE IF( iroffa.NE.0 ) THEN
361 info = -5
362 ELSE IF( icoffa.NE.0 ) THEN
363 info = -6
364 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
365 info = -( 700 + nb_ )
366 ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
367 info = -( 1100 + mb_ )
368 ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
369 info = -9
370 ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
371 info = -( 1100 + nb_ )
372 ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
373 info = -10
374 ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
375 info = -( 1100 + ctxt_ )
376 ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
377 info = -14
378 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
379 info = -( 1600 + mb_ )
380 ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
381 info = -( 1600 + ctxt_ )
382 ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
383 info = -( 2000 + mb_ )
384 ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
385 info = -18
386 ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
387 info = -( 2000 + nb_ )
388 ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
389 info = -19
390 ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
391 info = -( 2000 + ctxt_ )
392 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
393 info = -24
394 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
395 info = -26
396 END IF
397 END IF
398*
399 IF( notran ) THEN
400 idum1( 1 ) = ichar( 'N' )
401 ELSE IF( lsame( trans, 'T' ) ) THEN
402 idum1( 1 ) = ichar( 'T' )
403 ELSE
404 idum1( 1 ) = ichar( 'C' )
405 END IF
406 idum2( 1 ) = 1
407 idum1( 2 ) = n
408 idum2( 2 ) = 2
409 idum1( 3 ) = nrhs
410 idum2( 3 ) = 3
411 IF( lwork.EQ.-1 ) THEN
412 idum1( 4 ) = -1
413 ELSE
414 idum1( 4 ) = 1
415 END IF
416 idum2( 4 ) = 24
417 IF( liwork.EQ.-1 ) THEN
418 idum1( 5 ) = -1
419 ELSE
420 idum1( 5 ) = 1
421 END IF
422 idum2( 5 ) = 26
423 CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
424 $ jaf, descaf, 11, 5, idum1, idum2, info )
425 CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 16, n, 2, nrhs, 3,
426 $ ix, jx, descx, 20, 5, idum1, idum2, info )
427 END IF
428 IF( info.NE.0 ) THEN
429 CALL pxerbla( ictxt, 'PDGERFS', -info )
430 RETURN
431 ELSE IF( lquery ) THEN
432 RETURN
433 END IF
434*
435 jjfbe = jjxb
436 myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
437 $ npcol )
438*
439* Quick return if possible
440*
441 IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
442 DO 10 jj = jjfbe, myrhs
443 ferr( jj ) = zero
444 berr( jj ) = zero
445 10 CONTINUE
446 RETURN
447 END IF
448*
449 IF( notran ) THEN
450 transt = 'T'
451 ELSE
452 transt = 'N'
453 END IF
454*
455 np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
456 CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
457 $ ictxt, max( 1, np0 ) )
458 ipb = 1
459 ipr = ipb + np0
460 ipv = ipr + np0
461 IF( myrow.EQ.ixbrow ) THEN
462 iiw = 1 + iroffb
463 np = np0 - iroffb
464 ELSE
465 iiw = 1
466 np = np0
467 END IF
468 iw = 1 + iroffb
469 jw = 1
470 ldxb = descb( lld_ )
471 ioffxb = ( jjxb-1 )*ldxb
472*
473* NZ = 1 + maximum number of nonzero entries in each row of sub( A )
474*
475 nz = n + 1
476 eps = pdlamch( ictxt, 'Epsilon' )
477 safmin = pdlamch( ictxt, 'Safe minimum' )
478 safe1 = nz*safmin
479 safe2 = safe1 / eps
480 jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
481*
482* Handle first block separately
483*
484 jbrhs = jn - jb + 1
485 DO 100 k = 0, jbrhs-1
486*
487 count = 1
488 lstres = three
489 20 CONTINUE
490*
491* Loop until stopping criterion is satisfied.
492*
493* Compute residual R = sub(B) - op(sub(A)) * sub(X),
494* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
495* depending on TRANS.
496*
497 CALL pdcopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
498 $ descw, 1 )
499 CALL pdgemv( trans, n, n, -one, a, ia, ja, desca, x, ix,
500 $ jx+k, descx, 1, one, work( ipr ), iw, jw,
501 $ descw, 1 )
502*
503* Compute componentwise relative backward error from formula
504*
505* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
506*
507* where abs(Z) is the componentwise absolute value of the
508* matrix or vector Z. If the i-th component of the
509* denominator is less than SAFE2, then SAFE1 is added to the
510* i-th components of the numerator and denominator before
511* dividing.
512*
513 IF( mycol.EQ.ixbcol ) THEN
514 IF( np.GT.0 ) THEN
515 DO 30 ii = iixb, iixb + np - 1
516 work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
517 30 CONTINUE
518 END IF
519 END IF
520*
521 CALL pdagemv( trans, n, n, one, a, ia, ja, desca, x, ix, jx+k,
522 $ descx, 1, one, work( ipb ), iw, jw, descw, 1 )
523*
524 s = zero
525 IF( mycol.EQ.ixbcol ) THEN
526 IF( np.GT.0 ) THEN
527 DO 40 ii = iiw-1, iiw+np-2
528 IF( work( ipb+ii ).GT.safe2 ) THEN
529 s = max( s, abs( work( ipr+ii ) ) /
530 $ work( ipb+ii ) )
531 ELSE
532 s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
533 $ ( work( ipb+ii )+safe1 ) )
534 END IF
535 40 CONTINUE
536 END IF
537 END IF
538*
539 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
540 $ -1, mycol )
541 IF( mycol.EQ.ixbcol )
542 $ berr( jjfbe ) = s
543*
544* Test stopping criterion. Continue iterating if
545* 1) The residual BERR(J+K) is larger than machine epsilon,
546* and
547* 2) BERR(J+K) decreased by at least a factor of 2 during the
548* last iteration, and
549* 3) At most ITMAX iterations tried.
550*
551 IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
552*
553* Update solution and try again.
554*
555 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
556 $ work( ipr ), iw, jw, descw, info )
557 CALL pdaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
558 $ jx+k, descx, 1 )
559 lstres = s
560 count = count + 1
561 GO TO 20
562 END IF
563*
564* Bound error from formula
565*
566* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
567* norm( abs(inv(op(sub(A))))*
568* ( abs(R) + NZ*EPS*(
569* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
570*
571* where
572* norm(Z) is the magnitude of the largest component of Z
573* inv(op(sub(A))) is the inverse of op(sub(A))
574* abs(Z) is the componentwise absolute value of the matrix
575* or vector Z
576* NZ is the maximum number of nonzeros in any row of sub(A),
577* plus 1
578* EPS is machine epsilon
579*
580* The i-th component of
581* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
582* is incremented by SAFE1 if the i-th component of
583* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
584* SAFE2.
585*
586* Use PDLACON to estimate the infinity-norm of the matrix
587* inv(op(sub(A))) * diag(W), where
588* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
589*
590 IF( mycol.EQ.ixbcol ) THEN
591 IF( np.GT.0 ) THEN
592 DO 50 ii = iiw-1, iiw+np-2
593 IF( work( ipb+ii ).GT.safe2 ) THEN
594 work( ipb+ii ) = abs( work( ipr+ii ) ) +
595 $ nz*eps*work( ipb+ii )
596 ELSE
597 work( ipb+ii ) = abs( work( ipr+ii ) ) +
598 $ nz*eps*work( ipb+ii ) + safe1
599 END IF
600 50 CONTINUE
601 END IF
602 END IF
603*
604 kase = 0
605 60 CONTINUE
606 IF( mycol.EQ.ixbcol ) THEN
607 CALL dgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
608 $ descw( lld_ ) )
609 ELSE
610 CALL dgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
611 $ descw( lld_ ), myrow, ixbcol )
612 END IF
613 descw( csrc_ ) = mycol
614 CALL pdlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
615 $ iw, jw, descw, iwork, est, kase )
616 descw( csrc_ ) = ixbcol
617*
618 IF( kase.NE.0 ) THEN
619 IF( kase.EQ.1 ) THEN
620*
621* Multiply by diag(W)*inv(op(sub(A))').
622*
623 CALL pdgetrs( transt, n, 1, af, iaf, jaf, descaf,
624 $ ipiv, work( ipr ), iw, jw, descw, info )
625*
626 IF( mycol.EQ.ixbcol ) THEN
627 IF( np.GT.0 ) THEN
628 DO 70 ii = iiw-1, iiw+np-2
629 work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
630 70 CONTINUE
631 END IF
632 END IF
633 ELSE
634*
635* Multiply by inv(op(sub(A)))*diag(W).
636*
637 IF( mycol.EQ.ixbcol ) THEN
638 IF( np.GT.0 ) THEN
639 DO 80 ii = iiw-1, iiw+np-2
640 work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
641 80 CONTINUE
642 END IF
643 END IF
644*
645 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
646 $ work( ipr ), iw, jw, descw, info )
647 END IF
648 GO TO 60
649 END IF
650*
651* Normalize error.
652*
653 lstres = zero
654 IF( mycol.EQ.ixbcol ) THEN
655 IF( np.GT.0 ) THEN
656 DO 90 ii = iixb, iixb+np-1
657 lstres = max( lstres, abs( x( ioffxb+ii ) ) )
658 90 CONTINUE
659 END IF
660 CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
661 $ idum, 1, -1, mycol )
662 IF( lstres.NE.zero )
663 $ ferr( jjfbe ) = est / lstres
664*
665 jjxb = jjxb + 1
666 jjfbe = jjfbe + 1
667 ioffxb = ioffxb + ldxb
668*
669 END IF
670*
671 100 CONTINUE
672*
673 icurcol = mod( ixbcol+1, npcol )
674*
675* Do for each right hand side
676*
677 DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
678 jbrhs = min( jb+nrhs-j, descb( nb_ ) )
679 descw( csrc_ ) = icurcol
680*
681 DO 190 k = 0, jbrhs-1
682*
683 count = 1
684 lstres = three
685 110 CONTINUE
686*
687* Loop until stopping criterion is satisfied.
688*
689* Compute residual R = sub(B) - op(sub(A)) * sub(X),
690* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
691* depending on TRANS.
692*
693 CALL pdcopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
694 $ descw, 1 )
695 CALL pdgemv( trans, n, n, -one, a, ia, ja, desca, x,
696 $ ix, j+k, descx, 1, one, work( ipr ), iw, jw,
697 $ descw, 1 )
698*
699* Compute componentwise relative backward error from formula
700*
701* max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) +
702* abs(sub(B)))(i))
703*
704* where abs(Z) is the componentwise absolute value of the
705* matrix or vector Z. If the i-th component of the
706* denominator is less than SAFE2, then SAFE1 is added to the
707* i-th components of the numerator and denominator before
708* dividing.
709*
710 IF( mycol.EQ.icurcol ) THEN
711 IF( np.GT.0 ) THEN
712 DO 120 ii = iixb, iixb+np-1
713 work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
714 120 CONTINUE
715 END IF
716 END IF
717*
718 CALL pdagemv( trans, n, n, one, a, ia, ja, desca, x, ix,
719 $ j+k, descx, 1, one, work( ipb ), iw, jw,
720 $ descw, 1 )
721*
722 s = zero
723 IF( mycol.EQ.icurcol ) THEN
724 IF( np.GT.0 )THEN
725 DO 130 ii = iiw-1, iiw+np-2
726 IF( work( ipb+ii ).GT.safe2 ) THEN
727 s = max( s, abs( work( ipr+ii ) ) /
728 $ work( ipb+ii ) )
729 ELSE
730 s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
731 $ ( work( ipb+ii )+safe1 ) )
732 END IF
733 130 CONTINUE
734 END IF
735 END IF
736*
737 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
738 $ -1, mycol )
739 IF( mycol.EQ.icurcol )
740 $ berr( jjfbe ) = s
741*
742* Test stopping criterion. Continue iterating if
743* 1) The residual BERR(J+K) is larger than machine epsilon,
744* and
745* 2) BERR(J+K) decreased by at least a factor of 2 during
746* the last iteration, and
747* 3) At most ITMAX iterations tried.
748*
749 IF( s.GT.eps .AND. two*s.LE.lstres .AND.
750 $ count.LE.itmax ) THEN
751*
752* Update solution and try again.
753*
754 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
755 $ work( ipr ), iw, jw, descw, info )
756 CALL pdaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
757 $ ix, j+k, descx, 1 )
758 lstres = s
759 count = count + 1
760 GO TO 110
761 END IF
762*
763* Bound error from formula
764*
765* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
766* norm( abs(inv(op(sub(A))))*
767* ( abs(R) + NZ*EPS*(
768* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
769*
770* where
771* norm(Z) is the magnitude of the largest component of Z
772* inv(op(sub(A))) is the inverse of op(sub(A))
773* abs(Z) is the componentwise absolute value of the matrix
774* or vector Z
775* NZ is the maximum number of nonzeros in any row of sub(A),
776* plus 1
777* EPS is machine epsilon
778*
779* The i-th component of
780* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
781* is incremented by SAFE1 if the i-th component of
782* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
783* SAFE2.
784*
785* Use PDLACON to estimate the infinity-norm of the matrix
786* inv(op(sub(A))) * diag(W), where
787* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
788*
789 IF( mycol.EQ.icurcol ) THEN
790 IF( np.GT.0 ) THEN
791 DO 140 ii = iiw-1, iiw+np-2
792 IF( work( ipb+ii ).GT.safe2 ) THEN
793 work( ipb+ii ) = abs( work( ipr+ii ) ) +
794 $ nz*eps*work( ipb+ii )
795 ELSE
796 work( ipb+ii ) = abs( work( ipr+ii ) ) +
797 $ nz*eps*work( ipb+ii ) + safe1
798 END IF
799 140 CONTINUE
800 END IF
801 END IF
802*
803 kase = 0
804 150 CONTINUE
805 IF( mycol.EQ.icurcol ) THEN
806 CALL dgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
807 $ descw( lld_ ) )
808 ELSE
809 CALL dgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
810 $ descw( lld_ ), myrow, icurcol )
811 END IF
812 descw( csrc_ ) = mycol
813 CALL pdlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
814 $ iw, jw, descw, iwork, est, kase )
815 descw( csrc_ ) = icurcol
816*
817 IF( kase.NE.0 ) THEN
818 IF( kase.EQ.1 ) THEN
819*
820* Multiply by diag(W)*inv(op(sub(A))').
821*
822 CALL pdgetrs( transt, n, 1, af, iaf, jaf, descaf,
823 $ ipiv, work( ipr ), iw, jw, descw, info )
824*
825 IF( mycol.EQ.icurcol ) THEN
826 IF( np.GT.0 ) THEN
827 DO 160 ii = iiw-1, iiw+np-2
828 work( ipr+ii ) = work( ipb+ii )*
829 $ work( ipr+ii )
830 160 CONTINUE
831 END IF
832 END IF
833 ELSE
834*
835* Multiply by inv(op(sub(A)))*diag(W).
836*
837 IF( mycol.EQ.icurcol ) THEN
838 IF( np.GT.0 ) THEN
839 DO 170 ii = iiw-1, iiw+np-2
840 work( ipr+ii ) = work( ipb+ii )*
841 $ work( ipr+ii )
842 170 CONTINUE
843 END IF
844 END IF
845*
846 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf,
847 $ ipiv, work( ipr ), iw, jw, descw,
848 $ info )
849 END IF
850 GO TO 150
851 END IF
852*
853* Normalize error.
854*
855 lstres = zero
856 IF( mycol.EQ.icurcol ) THEN
857 IF( np.GT.0 ) THEN
858 DO 180 ii = iixb, iixb+np-1
859 lstres = max( lstres, abs( x( ioffxb+ii ) ) )
860 180 CONTINUE
861 END IF
862 CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres,
863 $ 1, idum, idum, 1, -1, mycol )
864 IF( lstres.NE.zero )
865 $ ferr( jjfbe ) = est / lstres
866*
867 jjxb = jjxb + 1
868 jjfbe = jjfbe + 1
869 ioffxb = ioffxb + ldxb
870*
871 END IF
872*
873 190 CONTINUE
874*
875 icurcol = mod( icurcol+1, npcol )
876*
877 200 CONTINUE
878*
879 work( 1 ) = dble( lwmin )
880 iwork( 1 ) = liwmin
881*
882 RETURN
883*
884* End of PDGERFS
885*
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pdgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pdgetrs.f:3
subroutine pdlacon(n, v, iv, jv, descv, x, ix, jx, descx, isgn, est, kase)
Definition pdlacon.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: