SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcnepfchk.f
Go to the documentation of this file.
1 SUBROUTINE pcnepfchk( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ,
2 $ DESCZ, ANORM, FRESID, WORK )
3*
4* -- ScaLAPACK testing routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* March, 2000
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IASEED, IZ, JA, JZ, N
11 REAL ANORM, FRESID
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCZ( * )
15 COMPLEX A( * ), WORK( * ), Z( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PCNEPFCHK computes the residual
22* || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N),
23* where Ao will be regenerated by the parallel random matrix generator,
24* sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1)
25* and ||.|| stands for the infinity norm.
26*
27* Notes
28* =====
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* N (global input) INTEGER
85* The order of sub( A ) and sub( Z ). N >= 0.
86*
87* A (local input/local output) COMPLEX pointer into the
88* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
89* On entry, this array contains the local pieces of the N-by-N
90* distributed matrix sub( A ) to be checked. On exit, this
91* array contains the local pieces of the difference
92* sub(Z)*sub( A )*sub(Z)**T - sub( Ao ).
93*
94* IA (global input) INTEGER
95* A's global row index, which points to the beginning of the
96* submatrix which is to be operated on.
97*
98* JA (global input) INTEGER
99* A's global column index, which points to the beginning of
100* the submatrix which is to be operated on.
101*
102* DESCA (global and local input) INTEGER array of dimension DLEN_.
103* The array descriptor for the distributed matrix A.
104*
105* IASEED (global input) INTEGER
106* The seed number to generate the original matrix Ao.
107*
108* Z (local input) COMPLEX pointer into the local memory
109* to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this
110* array contains the local pieces of the N-by-N distributed
111* matrix sub( Z ).
112*
113* IZ (global input) INTEGER
114* Z's global row index, which points to the beginning of the
115* submatrix which is to be operated on.
116*
117* JZ (global input) INTEGER
118* Z's global column index, which points to the beginning of
119* the submatrix which is to be operated on.
120*
121* DESCZ (global and local input) INTEGER array of dimension DLEN_.
122* The array descriptor for the distributed matrix Z.
123*
124* ANORM (global input) REAL
125* The Infinity norm of sub( A ).
126*
127* FRESID (global output) REAL
128* The maximum (worst) factorizational error.
129*
130* WORK (local workspace) COMPLEX array, dimension (LWORK).
131* LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where
132*
133* IROFFA = MOD( IA-1, MB_A ),
134* ICOFFA = MOD( JA-1, NB_A ),
135* IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
136* IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
137* NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ),
138* NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
139*
140* WORK is used to store a block of rows and a block of columns
141* of sub( A ).
142* INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
143* MYCOL, NPROW and NPCOL can be determined by calling the
144* subroutine BLACS_GRIDINFO.
145*
146* Further Details
147* ===============
148*
149* Contributed by Mark Fahey, March, 2000.
150*
151* =====================================================================
152*
153* .. Parameters ..
154 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
155 $ lld_, mb_, m_, nb_, n_, rsrc_
156 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
157 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
158 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
159 COMPLEX ONE, ZERO
160 parameter( one = ( 1.0e+0, 0.0e+0 ),
161 $ zero = ( 0.0e+0, 0.0e+0 ) )
162* ..
163* .. Local Scalars ..
164 INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF,
165 $ iw, j, jb, jja, jn, lda, ldw, mycol, myrow, np,
166 $ npcol, nprow
167 REAL EPS
168* ..
169* .. Local Arrays ..
170 INTEGER DESCW( DLEN_ )
171* ..
172* .. External Subroutines ..
173 EXTERNAL blacs_gridinfo, descset, infog2l, pcgemm,
175* ..
176* .. External Functions ..
177 INTEGER ICEIL, NUMROC
178 REAL PSLAMCH, PCLANGE
179 EXTERNAL iceil, numroc, pslamch, pclange
180* ..
181* .. Intrinsic Functions ..
182 INTRINSIC max, min, mod
183* ..
184* .. Executable Statements ..
185*
186 ictxt = desca( ctxt_ )
187 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
188 eps = pslamch( ictxt, 'eps' )
189*
190 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
191 $ iarow, iacol )
192 iroff = mod( ia-1, desca( mb_ ) )
193 np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
194 IF( myrow.EQ.iarow )
195 $ np = np - iroff
196 ldw = max( 1, np )
197*
198* First compute H <- H * Z**T
199*
200 CALL descset( descw, desca( mb_ ), n, desca( mb_ ), desca( nb_ ),
201 $ iarow, iacol, ictxt, desca( mb_ ) )
202*
203 DO 10 i = ia, ia + n - 1, desca( mb_ )
204 ib = min( ia+n-i, desca( mb_ ) )
205*
206 CALL pclacpy( 'All', ib, n, a, i, ja, desca, work, 1, 1,
207 $ descw )
208 CALL pcgemm( 'No transpose', 'Cong Tran', ib, n, n, one, work,
209 $ 1, 1, descw, z, iz, jz, descz, zero, a, i, ja,
210 $ desca )
211*
212 descw( rsrc_ ) = mod( descw( rsrc_ )+1, nprow )
213*
214 10 CONTINUE
215*
216* Then compute H <- Z * H = Z * H0 * Z**T
217*
218 CALL descset( descw, n, desca( nb_ ), desca( mb_ ), desca( nb_ ),
219 $ iarow, iacol, ictxt, ldw )
220*
221 DO 20 j = ja, ja + n - 1, desca( nb_ )
222 jb = min( ja+n-j, desca( nb_ ) )
223*
224 CALL pclacpy( 'All', n, jb, a, ia, j, desca, work, 1, 1,
225 $ descw )
226 CALL pcgemm( 'No transpose', 'No transpose', n, jb, n, one, z,
227 $ iz, jz, descz, work, 1, 1, descw, zero, a, ia, j,
228 $ desca )
229*
230 descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
231*
232 20 CONTINUE
233*
234* Compute H - H0
235*
236 jn = min( iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+n-1 )
237 lda = desca( lld_ )
238 ioffa = iia + ( jja-1 )*lda
239 iw = 1
240 jb = jn - ja + 1
241 descw( csrc_ ) = iacol
242*
243* Handle first block of columns separately
244*
245 IF( mycol.EQ.descw( csrc_ ) ) THEN
246 CALL pcmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
247 $ desca( mb_ ), desca( nb_ ), work, ldw,
248 $ desca( rsrc_ ), desca( csrc_ ), iaseed, iia-1,
249 $ np, jja-1, jb, myrow, mycol, nprow, npcol )
250 CALL pclaset( 'Lower', max( 0, n-2 ), jb, zero, zero, work,
251 $ min( iw+2, n ), 1, descw )
252 CALL cmatadd( np, jb, -one, work, ldw, one, a( ioffa ), lda )
253 jja = jja + jb
254 ioffa = ioffa + jb*lda
255 END IF
256*
257 iw = iw + desca( mb_ )
258 descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
259*
260 DO 30 j = jn + 1, ja + n - 1, desca( nb_ )
261 jb = min( ja+n-j, desca( nb_ ) )
262*
263 IF( mycol.EQ.descw( csrc_ ) ) THEN
264 CALL pcmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
265 $ desca( mb_ ), desca( nb_ ), work, ldw,
266 $ desca( rsrc_ ), desca( csrc_ ), iaseed,
267 $ iia-1, np, jja-1, jb, myrow, mycol, nprow,
268 $ npcol )
269 CALL pclaset( 'Lower', max( 0, n-iw-1 ), jb, zero, zero,
270 $ work, min( n, iw+2 ), 1, descw )
271 CALL cmatadd( np, jb, -one, work, ldw, one, a( ioffa ),
272 $ lda )
273 jja = jja + jb
274 ioffa = ioffa + jb*lda
275 END IF
276 iw = iw + desca( mb_ )
277 descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
278 30 CONTINUE
279*
280* Calculate factor residual
281*
282 fresid = pclange( 'I', n, n, a, ia, ja, desca, work ) /
283 $ ( n*eps*anorm )
284*
285 RETURN
286*
287* End PCNEPFCHK
288*
289 END
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine cmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition cmatadd.f:2
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pclaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pcblastst.f:7508
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pclacpy.f:3
subroutine pcnepfchk(n, a, ia, ja, desca, iaseed, z, iz, jz, descz, anorm, fresid, work)
Definition pcnepfchk.f:3