ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzporfs.f
Go to the documentation of this file.
1  SUBROUTINE pzporfs( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF,
2  $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX,
3  $ FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO )
4 *
5 * -- ScaLAPACK routine (version 1.7) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7 * and University of California, Berkeley.
8 * November 15, 1997
9 *
10 * .. Scalar Arguments ..
11  CHARACTER UPLO
12  INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX,
13  $ lrwork, lwork, n, nrhs
14 * ..
15 * .. Array Arguments ..
16  INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
17  $ DESCX( * )
18  COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ), X( * )
19  DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * PZPORFS improves the computed solution to a system of linear
26 * equations when the coefficient matrix is Hermitian positive definite
27 * and provides error bounds and backward error estimates for the
28 * solutions.
29 *
30 * Notes
31 * =====
32 *
33 * Each global data object is described by an associated description
34 * vector. This vector stores the information required to establish
35 * the mapping between an object element and its corresponding process
36 * and memory location.
37 *
38 * Let A be a generic term for any 2D block cyclicly distributed array.
39 * Such a global array has an associated description vector DESCA.
40 * In the following comments, the character _ should be read as
41 * "of the global array".
42 *
43 * NOTATION STORED IN EXPLANATION
44 * --------------- -------------- --------------------------------------
45 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46 * DTYPE_A = 1.
47 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48 * the BLACS process grid A is distribu-
49 * ted over. The context itself is glo-
50 * bal, but the handle (the integer
51 * value) may vary.
52 * M_A (global) DESCA( M_ ) The number of rows in the global
53 * array A.
54 * N_A (global) DESCA( N_ ) The number of columns in the global
55 * array A.
56 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57 * the rows of the array.
58 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59 * the columns of the array.
60 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61 * row of the array A is distributed.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of the array A is
64 * distributed.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array. LLD_A >= MAX(1,LOCr(M_A)).
67 *
68 * Let K be the number of rows or columns of a distributed matrix,
69 * and assume that its process grid has dimension p x q.
70 * LOCr( K ) denotes the number of elements of K that a process
71 * would receive if K were distributed over the p processes of its
72 * process column.
73 * Similarly, LOCc( K ) denotes the number of elements of K that a
74 * process would receive if K were distributed over the q processes of
75 * its process row.
76 * The values of LOCr() and LOCc() may be determined via a call to the
77 * ScaLAPACK tool function, NUMROC:
78 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80 * An upper bound for these quantities may be computed by:
81 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83 *
84 * In the following comments, sub( A ), sub( X ) and sub( B ) denote
85 * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86 * B(IB:IB+N-1,JB:JB+NRHS-1).
87 *
88 * Arguments
89 * =========
90 *
91 * UPLO (global input) CHARACTER*1
92 * Specifies whether the upper or lower triangular part of the
93 * Hermitian matrix sub( A ) is stored.
94 * = 'U': Upper triangular
95 * = 'L': Lower triangular
96 *
97 * N (global input) INTEGER
98 * The order of the matrix sub( A ). N >= 0.
99 *
100 * NRHS (global input) INTEGER
101 * The number of right hand sides, i.e., the number of columns
102 * of the matrices sub( B ) and sub( X ). NRHS >= 0.
103 *
104 * A (local input) COMPLEX*16 pointer into the local
105 * memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ).
106 * This array contains the local pieces of the N-by-N Hermitian
107 * distributed matrix sub( A ) to be factored.
108 * If UPLO = 'U', the leading N-by-N upper triangular part of
109 * sub( A ) contains the upper triangular part of the matrix,
110 * and its strictly lower triangular part is not referenced.
111 * If UPLO = 'L', the leading N-by-N lower triangular part of
112 * sub( A ) contains the lower triangular part of the distribu-
113 * ted matrix, and its strictly upper triangular part is not
114 * referenced.
115 *
116 * IA (global input) INTEGER
117 * The row index in the global array A indicating the first
118 * row of sub( A ).
119 *
120 * JA (global input) INTEGER
121 * The column index in the global array A indicating the
122 * first column of sub( A ).
123 *
124 * DESCA (global and local input) INTEGER array of dimension DLEN_.
125 * The array descriptor for the distributed matrix A.
126 *
127 * AF (local input) COMPLEX*16 pointer into the local memory
128 * to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
129 * On entry, this array contains the factors L or U from the
130 * Cholesky factorization sub( A ) = L*L**H or U**H*U, as
131 * computed by PZPOTRF.
132 *
133 * IAF (global input) INTEGER
134 * The row index in the global array AF indicating the first
135 * row of sub( AF ).
136 *
137 * JAF (global input) INTEGER
138 * The column index in the global array AF indicating the
139 * first column of sub( AF ).
140 *
141 * DESCAF (global and local input) INTEGER array of dimension DLEN_.
142 * The array descriptor for the distributed matrix AF.
143 *
144 * B (local input) COMPLEX*16 pointer into the local memory
145 * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ).
146 * On entry, this array contains the the local pieces of the
147 * right hand sides sub( B ).
148 *
149 * IB (global input) INTEGER
150 * The row index in the global array B indicating the first
151 * row of sub( B ).
152 *
153 * JB (global input) INTEGER
154 * The column index in the global array B indicating the
155 * first column of sub( B ).
156 *
157 * DESCB (global and local input) INTEGER array of dimension DLEN_.
158 * The array descriptor for the distributed matrix B.
159 *
160 * X (local input) COMPLEX*16 pointer into the local memory
161 * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ).
162 * On entry, this array contains the the local pieces of the
163 * solution vectors sub( X ). On exit, it contains the
164 * improved solution vectors.
165 *
166 * IX (global input) INTEGER
167 * The row index in the global array X indicating the first
168 * row of sub( X ).
169 *
170 * JX (global input) INTEGER
171 * The column index in the global array X indicating the
172 * first column of sub( X ).
173 *
174 * DESCX (global and local input) INTEGER array of dimension DLEN_.
175 * The array descriptor for the distributed matrix X.
176 *
177 * FERR (local output) DOUBLE PRECISION array of local dimension
178 * LOCc(JB+NRHS-1).
179 * The estimated forward error bound for each solution vector
180 * of sub( X ). If XTRUE is the true solution corresponding
181 * to sub( X ), FERR is an estimated upper bound for the
182 * magnitude of the largest element in (sub( X ) - XTRUE)
183 * divided by the magnitude of the largest element in sub( X ).
184 * The estimate is as reliable as the estimate for RCOND, and
185 * is almost always a slight overestimate of the true error.
186 * This array is tied to the distributed matrix X.
187 *
188 * BERR (local output) DOUBLE PRECISION array of local dimension
189 * LOCc(JB+NRHS-1). The componentwise relative backward
190 * error of each solution vector (i.e., the smallest re-
191 * lative change in any entry of sub( A ) or sub( B )
192 * that makes sub( X ) an exact solution).
193 * This array is tied to the distributed matrix X.
194 *
195 * WORK (local workspace/local output) COMPLEX*16 array,
196 * dimension (LWORK)
197 * On exit, WORK(1) returns the minimal and optimal LWORK.
198 *
199 * LWORK (local or global input) INTEGER
200 * The dimension of the array WORK.
201 * LWORK is local input and must be at least
202 * LWORK >= 2*LOCr( N + MOD( IA-1, MB_A ) )
203 *
204 * If LWORK = -1, then LWORK is global input and a workspace
205 * query is assumed; the routine only calculates the minimum
206 * and optimal size for all work arrays. Each of these
207 * values is returned in the first entry of the corresponding
208 * work array, and no error message is issued by PXERBLA.
209 *
210 * RWORK (local workspace/local output) DOUBLE PRECISION array,
211 * dimension (LRWORK)
212 * On exit, RWORK(1) returns the minimal and optimal LRWORK.
213 *
214 * LRWORK (local or global input) INTEGER
215 * The dimension of the array RWORK.
216 * LRWORK is local input and must be at least
217 * LRWORK >= LOCr( N + MOD( IB-1, MB_B ) ).
218 *
219 * If LRWORK = -1, then LRWORK is global input and a workspace
220 * query is assumed; the routine only calculates the minimum
221 * and optimal size for all work arrays. Each of these
222 * values is returned in the first entry of the corresponding
223 * work array, and no error message is issued by PXERBLA.
224 *
225 *
226 * INFO (global output) INTEGER
227 * = 0: successful exit
228 * < 0: If the i-th argument is an array and the j-entry had
229 * an illegal value, then INFO = -(i*100+j), if the i-th
230 * argument is a scalar and had an illegal value, then
231 * INFO = -i.
232 *
233 * Internal Parameters
234 * ===================
235 *
236 * ITMAX is the maximum number of steps of iterative refinement.
237 *
238 * Notes
239 * =====
240 *
241 * This routine temporarily returns when N <= 1.
242 *
243 * The distributed submatrices op( A ) and op( AF ) (respectively
244 * sub( X ) and sub( B ) ) should be distributed the same way on the
245 * same processes. These conditions ensure that sub( A ) and sub( AF )
246 * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
247 *
248 * Moreover, this routine requires the distributed submatrices sub( A ),
249 * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
250 * i.e., if f(x,y) = MOD( x-1, y ):
251 * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
252 * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
253 * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
254 * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
255 *
256 * =====================================================================
257 *
258 * .. Parameters ..
259  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
260  $ LLD_, MB_, M_, NB_, N_, RSRC_
261  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
262  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
263  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
264  INTEGER ITMAX
265  PARAMETER ( ITMAX = 5 )
266  DOUBLE PRECISION ZERO, RONE, TWO, THREE
267  parameter( zero = 0.0d+0, rone = 1.0d+0, two = 2.0d+0,
268  $ three = 3.0d+0 )
269  COMPLEX*16 ONE
270  PARAMETER ( ONE = ( 1.0d+0, 0.0d+0 ) )
271 * ..
272 * .. Local Scalars ..
273  LOGICAL LQUERY, UPPER
274  INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
275  $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
276  $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
277  $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
278  $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
279  $ k, kase, ldxb, lrwmin, lwmin, mycol, myrhs,
280  $ myrow, np, np0, npcol, npmod, nprow, nz
281  DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
282  COMPLEX*16 ZDUM
283 * ..
284 * .. Local Arrays ..
285  INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
286 * ..
287 * .. External Functions ..
288  LOGICAL LSAME
289  INTEGER ICEIL, INDXG2P, NUMROC
290  DOUBLE PRECISION PDLAMCH
291  EXTERNAL iceil, indxg2p, lsame, numroc, pdlamch
292 * ..
293 * .. External Subroutines ..
294  EXTERNAL blacs_gridinfo, chk1mat, descset, dgamx2d,
295  $ infog2l, pchk2mat, pxerbla, pzahemv, pzaxpy,
296  $ pzcopy, pzhemv, pzpotrs, pzlacon,
297  $ zgebr2d, zgebs2d
298 * ..
299 * .. Intrinsic Functions ..
300  INTRINSIC abs, dble, dcmplx, dimag, ichar, max, min, mod
301 * ..
302 * .. Statement Functions ..
303  DOUBLE PRECISION CABS1
304 * ..
305 * .. Statement Function definitions ..
306  cabs1( zdum ) = abs( dble( zdum ) ) + abs( dimag( zdum ) )
307 * ..
308 * .. Executable Statements ..
309 *
310 * Get grid parameters
311 *
312  ictxt = desca( ctxt_ )
313  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
314 *
315 * Test the input parameters.
316 *
317  info = 0
318  IF( nprow.EQ.-1 ) THEN
319  info = -(700+ctxt_)
320  ELSE
321  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
322  CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
323  CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 15, info )
324  CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 19, info )
325  IF( info.EQ.0 ) THEN
326  upper = lsame( uplo, 'U' )
327  iroffa = mod( ia-1, desca( mb_ ) )
328  icoffa = mod( ja-1, desca( nb_ ) )
329  iroffaf = mod( iaf-1, descaf( mb_ ) )
330  icoffaf = mod( jaf-1, descaf( nb_ ) )
331  iroffb = mod( ib-1, descb( mb_ ) )
332  icoffb = mod( jb-1, descb( nb_ ) )
333  iroffx = mod( ix-1, descx( mb_ ) )
334  icoffx = mod( jx-1, descx( nb_ ) )
335  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
336  $ nprow )
337  iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
338  $ descaf( csrc_ ), npcol )
339  iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
340  $ descaf( rsrc_ ), nprow )
341  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
342  $ npcol )
343  CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
344  $ iixb, jjxb, ixbrow, ixbcol )
345  ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
346  $ nprow )
347  ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
348  $ npcol )
349  npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
350  $ nprow )
351  lwmin = 2 * npmod
352  lrwmin = npmod
353  work( 1 ) = dcmplx( dble( lwmin ) )
354  rwork( 1 ) = dble( lrwmin )
355  lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 )
356 *
357  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
358  info = -1
359  ELSE IF( n.LT.0 ) THEN
360  info = -2
361  ELSE IF( nrhs.LT.0 ) THEN
362  info = -3
363  ELSE IF( iroffa.NE.0 ) THEN
364  info = -5
365  ELSE IF( icoffa.NE.0 ) THEN
366  info = -6
367  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
368  info = -( 700 + nb_ )
369  ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
370  info = -( 1100 + mb_ )
371  ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
372  info = -9
373  ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
374  info = -( 1100 + nb_ )
375  ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
376  info = -10
377  ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
378  info = -( 1100 + ctxt_ )
379  ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
380  info = -13
381  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
382  info = -( 1500 + mb_ )
383  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
384  info = -( 1500 + ctxt_ )
385  ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
386  info = -( 1900 + mb_ )
387  ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
388  info = -17
389  ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
390  info = -( 1900 + nb_ )
391  ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
392  info = -18
393  ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
394  info = -( 1900 + ctxt_ )
395  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
396  info = -23
397  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
398  info = -25
399  END IF
400  END IF
401 *
402  IF( upper ) THEN
403  idum1( 1 ) = ichar( 'U' )
404  ELSE
405  idum1( 1 ) = ichar( 'L' )
406  END IF
407  idum2( 1 ) = 1
408  idum1( 2 ) = n
409  idum2( 2 ) = 2
410  idum1( 3 ) = nrhs
411  idum2( 3 ) = 3
412  IF( lwork.EQ.-1 ) THEN
413  idum1( 4 ) = -1
414  ELSE
415  idum1( 4 ) = 1
416  END IF
417  idum2( 4 ) = 23
418  IF( lrwork.EQ.-1 ) THEN
419  idum1( 5 ) = -1
420  ELSE
421  idum1( 5 ) = 1
422  END IF
423  idum2( 5 ) = 25
424  CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
425  $ jaf, descaf, 11, 0, idum1, idum2, info )
426  CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 15, n, 2, nrhs, 3,
427  $ ix, jx, descx, 19, 5, idum1, idum2, info )
428  END IF
429  IF( info.NE.0 ) THEN
430  CALL pxerbla( ictxt, 'PZPORFS', -info )
431  RETURN
432  ELSE IF( lquery ) THEN
433  RETURN
434  END IF
435 *
436  jjfbe = jjxb
437  myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
438  $ npcol )
439 *
440 * Quick return if possible
441 *
442  IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
443  DO 10 jj = jjfbe, myrhs
444  ferr( jj ) = zero
445  berr( jj ) = zero
446  10 CONTINUE
447  RETURN
448  END IF
449 *
450  np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
451  CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
452  $ ictxt, max( 1, np0 ) )
453  ipb = 1
454  ipr = 1
455  ipv = ipr + np0
456  IF( myrow.EQ.ixbrow ) THEN
457  iiw = 1 + iroffb
458  np = np0 - iroffb
459  ELSE
460  iiw = 1
461  np = np0
462  END IF
463  iw = 1 + iroffb
464  jw = 1
465  ldxb = descb( lld_ )
466  ioffxb = ( jjxb-1 )*ldxb
467 *
468 * NZ = 1 + maximum number of nonzero entries in each row of sub( A )
469 *
470  nz = n + 1
471  eps = pdlamch( ictxt, 'Epsilon' )
472  safmin = pdlamch( ictxt, 'Safe minimum' )
473  safe1 = nz*safmin
474  safe2 = safe1 / eps
475  jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
476 *
477 * Handle first block separately
478 *
479  jbrhs = jn - jb + 1
480  DO 100 k = 0, jbrhs-1
481 *
482  count = 1
483  lstres = three
484  20 CONTINUE
485 *
486 * Loop until stopping criterion is satisfied.
487 *
488 * Compute residual R = sub(B) - op(sub(A)) * sub(X)
489 *
490  CALL pzcopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
491  $ descw, 1 )
492  CALL pzhemv( uplo, n, -one, a, ia, ja, desca, x, ix, jx+k,
493  $ descx, 1, one, work( ipr ), iw, jw, descw, 1 )
494 *
495 * Compute componentwise relative backward error from formula
496 *
497 * max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) )
498 *
499 * where abs(Z) is the componentwise absolute value of the
500 * matrix or vector Z. If the i-th component of the
501 * denominator is less than SAFE2, then SAFE1 is added to
502 * the i-th components of the numerator and denominator
503 * before dividing.
504 *
505  IF( mycol.EQ.ixbcol ) THEN
506  IF( np.GT.0 ) THEN
507  DO 30 ii = iixb, iixb + np - 1
508  rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
509  30 CONTINUE
510  END IF
511  END IF
512 *
513  CALL pzahemv( uplo, n, rone, a, ia, ja, desca, x, ix, jx+k,
514  $ descx, 1, rone, rwork( ipb ), iw, jw, descw, 1 )
515 *
516  s = zero
517  IF( mycol.EQ.ixbcol ) THEN
518  IF( np.GT.0 ) THEN
519  DO 40 ii = iiw-1, iiw+np-2
520  IF( rwork( ipb+ii ).GT.safe2 ) THEN
521  s = max( s, cabs1( work( ipr+ii ) ) /
522  $ rwork( ipb+ii ) )
523  ELSE
524  s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
525  $ ( rwork( ipb+ii )+safe1 ) )
526  END IF
527  40 CONTINUE
528  END IF
529  END IF
530 *
531  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
532  $ -1, mycol )
533  IF( mycol.EQ.ixbcol )
534  $ berr( jjfbe ) = s
535 *
536 * Test stopping criterion. Continue iterating if
537 * 1) The residual BERR(J) is larger than machine epsilon, and
538 * 2) BERR(J) decreased by at least a factor of 2 during the
539 * last iteration, and
540 * 3) At most ITMAX iterations tried.
541 *
542  IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
543 *
544 * Update solution and try again.
545 *
546  CALL pzpotrs( uplo, n, 1, af, iaf, jaf, descaf,
547  $ work( ipr ), iw, jw, descw, info )
548  CALL pzaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
549  $ jx+k, descx, 1 )
550  lstres = s
551  count = count + 1
552  GO TO 20
553  END IF
554 *
555 * Bound error from formula
556 *
557 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
558 * norm( abs(inv(sub(A)))*
559 * ( abs(R) +
560 * NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X))
561 *
562 * where
563 * norm(Z) is the magnitude of the largest component of Z
564 * inv(sub(A)) is the inverse of sub(A)
565 * abs(Z) is the componentwise absolute value of the matrix
566 * or vector Z
567 * NZ is the maximum number of nonzeros in any row of sub(A),
568 * plus 1
569 * EPS is machine epsilon
570 *
571 * The i-th component of
572 * abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B)))
573 * is incremented by SAFE1 if the i-th component of
574 * abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2.
575 *
576 * Use PZLACON to estimate the infinity-norm of the matrix
577 * inv(sub(A)) * diag(W), where
578 * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)))))
579 *
580  IF( mycol.EQ.ixbcol ) THEN
581  IF( np.GT.0 ) THEN
582  DO 50 ii = iiw-1, iiw+np-2
583  IF( rwork( ipb+ii ).GT.safe2 ) THEN
584  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
585  $ nz*eps*rwork( ipb+ii )
586  ELSE
587  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
588  $ nz*eps*rwork( ipb+ii ) + safe1
589  END IF
590  50 CONTINUE
591  END IF
592  END IF
593 *
594  kase = 0
595  60 CONTINUE
596  IF( mycol.EQ.ixbcol ) THEN
597  CALL zgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
598  $ descw( lld_ ) )
599  ELSE
600  CALL zgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
601  $ descw( lld_ ), myrow, ixbcol )
602  END IF
603  descw( csrc_ ) = mycol
604  CALL pzlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
605  $ iw, jw, descw, est, kase )
606  descw( csrc_ ) = ixbcol
607 *
608  IF( kase.NE.0 ) THEN
609  IF( kase.EQ.1 ) THEN
610 *
611 * Multiply by diag(W)*inv(sub(A)').
612 *
613  CALL pzpotrs( uplo, n, 1, af, iaf, jaf, descaf,
614  $ work( ipr ), iw, jw, descw, info )
615 *
616  IF( mycol.EQ.ixbcol ) THEN
617  IF( np.GT.0 ) THEN
618  DO 70 ii = iiw-1, iiw+np-2
619  work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
620  70 CONTINUE
621  END IF
622  END IF
623  ELSE
624 *
625 * Multiply by inv(sub(A))*diag(W).
626 *
627  IF( mycol.EQ.ixbcol ) THEN
628  IF( np.GT.0 ) THEN
629  DO 80 ii = iiw-1, iiw+np-2
630  work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
631  80 CONTINUE
632  END IF
633  END IF
634 *
635  CALL pzpotrs( uplo, n, 1, af, iaf, jaf, descaf,
636  $ work( ipr ), iw, jw, descw, info )
637  END IF
638  GO TO 60
639  END IF
640 *
641 * Normalize error.
642 *
643  lstres = zero
644  IF( mycol.EQ.ixbcol ) THEN
645  IF( np.GT.0 ) THEN
646  DO 90 ii = iixb, iixb+np-1
647  lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
648  90 CONTINUE
649  END IF
650  CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
651  $ idum, 1, -1, mycol )
652  IF( lstres.NE.zero )
653  $ ferr( jjfbe ) = est / lstres
654 *
655  jjxb = jjxb + 1
656  jjfbe = jjfbe + 1
657  ioffxb = ioffxb + ldxb
658 *
659  END IF
660 *
661  100 CONTINUE
662 *
663  icurcol = mod( ixbcol+1, npcol )
664 *
665 * Do for each right hand side
666 *
667  DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
668  jbrhs = min( jb+nrhs-j, descb( nb_ ) )
669  descw( csrc_ ) = icurcol
670 *
671  DO 190 k = 0, jbrhs-1
672 *
673  count = 1
674  lstres = three
675  110 CONTINUE
676 *
677 * Loop until stopping criterion is satisfied.
678 *
679 * Compute residual R = sub( B ) - sub( A )*sub( X ).
680 *
681  CALL pzcopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
682  $ descw, 1 )
683  CALL pzhemv( uplo, n, -one, a, ia, ja, desca, x, ix, j+k,
684  $ descx, 1, one, work( ipr ), iw, jw, descw, 1 )
685 *
686 * Compute componentwise relative backward error from formula
687 *
688 * max(i) ( abs(R(i)) /
689 * ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) )
690 *
691 * where abs(Z) is the componentwise absolute value of the
692 * matrix or vector Z. If the i-th component of the
693 * denominator is less than SAFE2, then SAFE1 is added to the
694 * i-th components of the numerator and denominator before
695 * dividing.
696 *
697  IF( mycol.EQ.icurcol ) THEN
698  IF( np.GT.0 ) THEN
699  DO 120 ii = iixb, iixb+np-1
700  rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
701  120 CONTINUE
702  END IF
703  END IF
704 *
705  CALL pzahemv( uplo, n, rone, a, ia, ja, desca, x, ix, j+k,
706  $ descx, 1, rone, rwork( ipb ), iw, jw, descw,
707  $ 1 )
708 *
709  s = zero
710  IF( mycol.EQ.icurcol ) THEN
711  IF( np.GT.0 )THEN
712  DO 130 ii = iiw-1, iiw+np-2
713  IF( rwork( ipb+ii ).GT.safe2 ) THEN
714  s = max( s, cabs1( work( ipr+ii ) ) /
715  $ rwork( ipb+ii ) )
716  ELSE
717  s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
718  $ ( rwork( ipb+ii )+safe1 ) )
719  END IF
720  130 CONTINUE
721  END IF
722  END IF
723 *
724  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
725  $ -1, mycol )
726  IF( mycol.EQ.icurcol )
727  $ berr( jjfbe ) = s
728 *
729 * Test stopping criterion. Continue iterating if
730 * 1) The residual BERR(J+K) is larger than machine epsilon,
731 * and
732 * 2) BERR(J+K) decreased by at least a factor of 2 during
733 * the last iteration, and
734 * 3) At most ITMAX iterations tried.
735 *
736  IF( s.GT.eps .AND. two*s.LE.lstres .AND.
737  $ count.LE.itmax ) THEN
738 *
739 * Update solution and try again.
740 *
741  CALL pzpotrs( uplo, n, 1, af, iaf, jaf, descaf,
742  $ work( ipr ), iw, jw, descw, info )
743  CALL pzaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
744  $ ix, j+k, descx, 1 )
745  lstres = s
746  count = count + 1
747  GO TO 110
748  END IF
749 *
750 * Bound error from formula
751 *
752 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
753 * norm( abs(inv(sub(A)))*
754 * ( abs(R) + NZ*EPS*(
755 * abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X))
756 *
757 * where
758 * norm(Z) is the magnitude of the largest component of Z
759 * inv(sub(A)) is the inverse of sub(A)
760 * abs(Z) is the componentwise absolute value of the matrix
761 * or vector Z
762 * NZ is the maximum number of nonzeros in any row of sub(A),
763 * plus 1
764 * EPS is machine epsilon
765 *
766 * The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))
767 * +abs(sub(B))) is incremented by SAFE1 if the i-th component
768 * of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2.
769 *
770 * Use PZLACON to estimate the infinity-norm of the matrix
771 * inv(sub(A)) * diag(W), where
772 * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)))))
773 *
774  IF( mycol.EQ.icurcol ) THEN
775  IF( np.GT.0 ) THEN
776  DO 140 ii = iiw-1, iiw+np-2
777  IF( rwork( ipb+ii ).GT.safe2 ) THEN
778  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
779  $ nz*eps*rwork( ipb+ii )
780  ELSE
781  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
782  $ nz*eps*rwork( ipb+ii ) + safe1
783  END IF
784  140 CONTINUE
785  END IF
786  END IF
787 *
788  kase = 0
789  150 CONTINUE
790  IF( mycol.EQ.icurcol ) THEN
791  CALL zgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
792  $ descw( lld_ ) )
793  ELSE
794  CALL zgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
795  $ descw( lld_ ), myrow, icurcol )
796  END IF
797  descw( csrc_ ) = mycol
798  CALL pzlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
799  $ iw, jw, descw, est, kase )
800  descw( csrc_ ) = icurcol
801 *
802  IF( kase.NE.0 ) THEN
803  IF( kase.EQ.1 ) THEN
804 *
805 * Multiply by diag(W)*inv(sub(A)').
806 *
807  CALL pzpotrs( uplo, n, 1, af, iaf, jaf, descaf,
808  $ work( ipr ), iw, jw, descw, info )
809 *
810  IF( mycol.EQ.icurcol ) THEN
811  IF( np.GT.0 ) THEN
812  DO 160 ii = iiw-1, iiw+np-2
813  work( ipr+ii ) = rwork( ipb+ii )*
814  $ work( ipr+ii )
815  160 CONTINUE
816  END IF
817  END IF
818  ELSE
819 *
820 * Multiply by inv(sub(A))*diag(W).
821 *
822  IF( mycol.EQ.icurcol ) THEN
823  IF( np.GT.0 ) THEN
824  DO 170 ii = iiw-1, iiw+np-2
825  work( ipr+ii ) = rwork( ipb+ii )*
826  $ work( ipr+ii )
827  170 CONTINUE
828  END IF
829  END IF
830 *
831  CALL pzpotrs( uplo, n, 1, af, iaf, jaf, descaf,
832  $ work( ipr ), iw, jw, descw, info )
833  END IF
834  GO TO 150
835  END IF
836 *
837 * Normalize error.
838 *
839  lstres = zero
840  IF( mycol.EQ.icurcol ) THEN
841  IF( np.GT.0 ) THEN
842  DO 180 ii = iixb, iixb+np-1
843  lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
844  180 CONTINUE
845  END IF
846  CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1,
847  $ idum, idum, 1, -1, mycol )
848  IF( lstres.NE.zero )
849  $ ferr( jjfbe ) = est / lstres
850 *
851  jjxb = jjxb + 1
852  jjfbe = jjfbe + 1
853  ioffxb = ioffxb + ldxb
854 *
855  END IF
856 *
857  190 CONTINUE
858 *
859  icurcol = mod( icurcol+1, npcol )
860 *
861  200 CONTINUE
862 *
863  work( 1 ) = dcmplx( dble( lwmin ) )
864  rwork( 1 ) = dble( lrwmin )
865 *
866  RETURN
867 *
868 * End of PZPORFS
869 *
870  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzporfs
subroutine pzporfs(UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pzporfs.f:4
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pzlacon
subroutine pzlacon(N, V, IV, JV, DESCV, X, IX, JX, DESCX, EST, KASE)
Definition: pzlacon.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pzpotrs
subroutine pzpotrs(UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, DESCB, INFO)
Definition: pzpotrs.f:3
min
#define min(A, B)
Definition: pcgemr.c:181