ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclafchk.f
Go to the documentation of this file.
1  SUBROUTINE pclafchk( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED,
2  $ ANORM, FRESID, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER AFORM, DIAG
11  INTEGER IA, IASEED, JA, M, N
12  REAL ANORM, FRESID
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * )
16  COMPLEX A( * ), WORK( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PCLAFCHK computes the residual
23 * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)),
24 * where Ao will be regenerated by the parallel random matrix generator,
25 * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini-
26 * ty norm.
27 *
28 * Notes
29 * =====
30 *
31 * Each global data object is described by an associated description
32 * vector. This vector stores the information required to establish
33 * the mapping between an object element and its corresponding process
34 * and memory location.
35 *
36 * Let A be a generic term for any 2D block cyclicly distributed array.
37 * Such a global array has an associated description vector DESCA.
38 * In the following comments, the character _ should be read as
39 * "of the global array".
40 *
41 * NOTATION STORED IN EXPLANATION
42 * --------------- -------------- --------------------------------------
43 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44 * DTYPE_A = 1.
45 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46 * the BLACS process grid A is distribu-
47 * ted over. The context itself is glo-
48 * bal, but the handle (the integer
49 * value) may vary.
50 * M_A (global) DESCA( M_ ) The number of rows in the global
51 * array A.
52 * N_A (global) DESCA( N_ ) The number of columns in the global
53 * array A.
54 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55 * the rows of the array.
56 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57 * the columns of the array.
58 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59 * row of the array A is distributed.
60 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61 * first column of the array A is
62 * distributed.
63 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64 * array. LLD_A >= MAX(1,LOCr(M_A)).
65 *
66 * Let K be the number of rows or columns of a distributed matrix,
67 * and assume that its process grid has dimension p x q.
68 * LOCr( K ) denotes the number of elements of K that a process
69 * would receive if K were distributed over the p processes of its
70 * process column.
71 * Similarly, LOCc( K ) denotes the number of elements of K that a
72 * process would receive if K were distributed over the q processes of
73 * its process row.
74 * The values of LOCr() and LOCc() may be determined via a call to the
75 * ScaLAPACK tool function, NUMROC:
76 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78 * An upper bound for these quantities may be computed by:
79 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81 *
82 * Arguments
83 * =========
84 *
85 * AFORM (global input) CHARACTER
86 * sub( A ) is overwritten with:
87 * - a symmetric matrix, if AFORM = 'S';
88 * - a Hermitian matrix, if AFORM = 'H';
89 * - the transpose of what would normally be generated,
90 * if AFORM = 'T';
91 * - the conjugate transpose of what would normally be
92 * generated, if AFORM = 'C';
93 * - otherwise a random matrix.
94 *
95 * DIAG (global input) CHARACTER
96 * if DIAG = 'D' : sub( A ) is diagonally dominant.
97 *
98 * M (global input) INTEGER
99 * The number of rows to be operated on, i.e. the number of rows
100 * of the distributed submatrix sub( A ). M >= 0.
101 *
102 * N (global input) INTEGER
103 * The number of columns to be operated on, i.e. the number of
104 * columns of the distributed submatrix sub( A ). N >= 0.
105 *
106 * A (local input/local output) COMPLEX pointer into the
107 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
108 * On entry, this array contains the local pieces of the M-by-N
109 * distributed matrix sub( A ) to be checked. On exit, this
110 * array contains the local pieces of the difference
111 * sub( A ) - sub( Ao ).
112 *
113 * IA (global input) INTEGER
114 * The row index in the global array A indicating the first
115 * row of sub( A ).
116 *
117 * JA (global input) INTEGER
118 * The column index in the global array A indicating the
119 * first column of sub( A ).
120 *
121 * DESCA (global and local input) INTEGER array of dimension DLEN_.
122 * The array descriptor for the distributed matrix A.
123 *
124 * IASEED (global input) INTEGER
125 * The seed number to generate the original matrix Ao.
126 *
127 * ANORM (global input) REAL
128 * The Infinity norm of sub( A ).
129 *
130 * FRESID (global output) REAL
131 * The maximum (worst) factorizational error.
132 *
133 * WORK (local workspace) COMPLEX array, dimension (LWORK).
134 * LWORK >= MpA0 * NB_A, where
135 *
136 * IROFFA = MOD( IA-1, MB_A ),
137 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
138 * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
139 *
140 * WORK is used to store a block of columns of sub( A ).
141 * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
142 * MYCOL, NPROW and NPCOL can be determined by calling the
143 * subroutine BLACS_GRIDINFO.
144 *
145 * =====================================================================
146 *
147 * .. Parameters ..
148  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
149  $ lld_, mb_, m_, nb_, n_, rsrc_
150  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
151  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
152  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
153  COMPLEX ONE
154  parameter( one = (1.0e+0, 0.0e+0) )
155 * ..
156 * .. Local Scalars ..
157  INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW,
158  $ ii, iia, ioffa, iroff, jb, jj, jja, jn, kk,
159  $ lda, ldw, ldwp1, mp, mycol, myrow, npcol,
160  $ nprow, nq
161  REAL EPS
162 * ..
163 * .. External Subroutines ..
164  EXTERNAL blacs_gridinfo, cmatadd, infog2l, pcmatgen
165 * ..
166 * .. External Functions ..
167  LOGICAL LSAME
168  INTEGER ICEIL, NUMROC
169  REAL PSLAMCH, PCLANGE
170  EXTERNAL iceil, lsame, numroc, pclange, pslamch
171 * ..
172 * .. Intrinsic Functions ..
173  INTRINSIC max, min, mod, real
174 * ..
175 * .. Executable Statements ..
176 *
177  ictxt = desca( ctxt_ )
178  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
179  eps = pslamch( ictxt, 'eps' )
180  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
181  $ iarow, iacol )
182 *
183 * Compute sub( A ) := sub( A ) - sub( Ao )
184 *
185  iroff = mod( ia-1, desca( mb_ ) )
186  icoff = mod( ja-1, desca( nb_ ) )
187  mp = numroc( m+iroff, desca( mb_ ), myrow, iarow, nprow )
188  nq = numroc( n+icoff, desca( nb_ ), mycol, iacol, npcol )
189  IF( myrow.EQ.iarow )
190  $ mp = mp-iroff
191  IF( mycol.EQ.iacol )
192  $ nq = nq-icoff
193  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
194  ldw = max( 1, mp )
195  ldwp1 = ldw + 1
196  lda = desca( lld_ )
197  ioffa = iia + ( jja - 1 )*lda
198 *
199  IF( lsame( aform, 'H' ) ) THEN
200 *
201 * Handle first block of columns separately
202 *
203  ii = 1
204  icurrow = iarow
205  icurcol = iacol
206  jb = jn - ja + 1
207 *
208  IF( mycol.EQ.icurcol ) THEN
209  CALL pcmatgen( ictxt, aform, diag, desca( m_ ), desca( n_ ),
210  $ desca( mb_ ), desca( nb_ ), work, ldw,
211  $ desca( rsrc_ ), desca( csrc_ ), iaseed,
212  $ iia-1, mp, jja-1, jb, myrow, mycol, nprow,
213  $ npcol )
214  IF( myrow.EQ.icurrow ) THEN
215  DO 10, kk = 0, jb-1
216  work( ii+kk*ldwp1 ) = real( work( ii+kk*ldwp1 ) )
217  10 CONTINUE
218  END IF
219  CALL cmatadd( mp, jb, -one, work, ldw, one, a( ioffa ),
220  $ lda )
221  jja = jja + jb
222  ioffa = ioffa + jb*lda
223  END IF
224 *
225  IF( myrow.EQ.icurrow )
226  $ ii = ii + jb
227  icurrow = mod( icurrow+1, nprow )
228  icurcol = mod( icurcol+1, npcol )
229 *
230  DO 30, jj = jn+1, ja+n-1, desca( nb_ )
231  jb = min( ja+n-jj, desca( nb_ ) )
232 *
233  IF( mycol.EQ.icurcol ) THEN
234  CALL pcmatgen( ictxt, aform, diag, desca( m_ ),
235  $ desca( n_ ), desca( mb_ ), desca( nb_ ),
236  $ work, ldw, desca( rsrc_ ), desca( csrc_ ),
237  $ iaseed, iia-1, mp, jja-1, jb, myrow,
238  $ mycol, nprow, npcol )
239  IF( myrow.EQ.icurrow ) THEN
240  DO 20, kk = 0, jb-1
241  work( ii+kk*ldwp1 ) = real( work( ii+kk*ldwp1 ) )
242  20 CONTINUE
243  END IF
244  CALL cmatadd( mp, jb, -one, work, ldw, one, a( ioffa ),
245  $ lda )
246  jja = jja + jb
247  ioffa = ioffa + jb*lda
248  END IF
249  IF( myrow.EQ.icurrow )
250  $ ii = ii + jb
251  icurrow = mod( icurrow+1, nprow )
252  icurcol = mod( icurcol+1, npcol )
253  30 CONTINUE
254 *
255  ELSE
256 *
257 * Handle first block of columns separately
258 *
259  IF( mycol.EQ.iacol ) THEN
260  jb = jn-ja+1
261  CALL pcmatgen( ictxt, aform, diag, desca( m_ ), desca( n_ ),
262  $ desca( mb_ ), desca( nb_ ), work, ldw,
263  $ desca( rsrc_ ), desca( csrc_ ), iaseed,
264  $ iia-1, mp, jja-1, jb, myrow, mycol, nprow,
265  $ npcol )
266  CALL cmatadd( mp, jb, -one, work, ldw, one, a( ioffa ),
267  $ lda )
268  jja = jja + jb
269  nq = nq - jb
270  ioffa = ioffa + jb * lda
271  END IF
272 *
273 * Handle the remaning blocks of columns
274 *
275  DO 40 jj = jja, jja+nq-1, desca( nb_ )
276  jb = min( desca( nb_ ), jja+nq-jj )
277  ioffa = iia + ( jj - 1 )*lda
278  CALL pcmatgen( ictxt, aform, diag, desca( m_ ), desca( n_ ),
279  $ desca( mb_ ), desca( nb_ ), work, ldw,
280  $ desca( rsrc_ ), desca( csrc_ ), iaseed,
281  $ iia-1, mp, jj-1, jb, myrow, mycol, nprow,
282  $ npcol )
283  CALL cmatadd( mp, jb, -one, work, ldw, one, a( ioffa ),
284  $ lda )
285  40 CONTINUE
286 *
287  END IF
288 *
289 * Calculate factor residual
290 *
291  fresid = pclange( 'I', m, n, a, ia, ja, desca, work ) /
292  $ ( max( m, n ) * eps * anorm )
293 *
294  RETURN
295 *
296 * End PCLAFCHK
297 *
298  END
pclafchk
subroutine pclafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pclafchk.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
cmatadd
subroutine cmatadd(M, N, ALPHA, A, LDA, BETA, C, LDC)
Definition: cmatadd.f:2
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
min
#define min(A, B)
Definition: pcgemr.c:181