ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcnepfchk.f
Go to the documentation of this file.
1  SUBROUTINE pcnepfchk( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ,
2  $ DESCZ, ANORM, FRESID, WORK )
3 *
4 * -- ScaLAPACK testing routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * March, 2000
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IASEED, IZ, JA, JZ, N
11  REAL ANORM, FRESID
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCZ( * )
15  COMPLEX A( * ), WORK( * ), Z( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PCNEPFCHK computes the residual
22 * || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N),
23 * where Ao will be regenerated by the parallel random matrix generator,
24 * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1)
25 * and ||.|| stands for the infinity norm.
26 *
27 * Notes
28 * =====
29 *
30 * Each global data object is described by an associated description
31 * vector. This vector stores the information required to establish
32 * the mapping between an object element and its corresponding process
33 * and memory location.
34 *
35 * Let A be a generic term for any 2D block cyclicly distributed array.
36 * Such a global array has an associated description vector DESCA.
37 * In the following comments, the character _ should be read as
38 * "of the global array".
39 *
40 * NOTATION STORED IN EXPLANATION
41 * --------------- -------------- --------------------------------------
42 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43 * DTYPE_A = 1.
44 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45 * the BLACS process grid A is distribu-
46 * ted over. The context itself is glo-
47 * bal, but the handle (the integer
48 * value) may vary.
49 * M_A (global) DESCA( M_ ) The number of rows in the global
50 * array A.
51 * N_A (global) DESCA( N_ ) The number of columns in the global
52 * array A.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54 * the rows of the array.
55 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56 * the columns of the array.
57 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58 * row of the array A is distributed.
59 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60 * first column of the array A is
61 * distributed.
62 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63 * array. LLD_A >= MAX(1,LOCr(M_A)).
64 *
65 * Let K be the number of rows or columns of a distributed matrix,
66 * and assume that its process grid has dimension p x q.
67 * LOCr( K ) denotes the number of elements of K that a process
68 * would receive if K were distributed over the p processes of its
69 * process column.
70 * Similarly, LOCc( K ) denotes the number of elements of K that a
71 * process would receive if K were distributed over the q processes of
72 * its process row.
73 * The values of LOCr() and LOCc() may be determined via a call to the
74 * ScaLAPACK tool function, NUMROC:
75 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77 * An upper bound for these quantities may be computed by:
78 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80 *
81 * Arguments
82 * =========
83 *
84 * N (global input) INTEGER
85 * The order of sub( A ) and sub( Z ). N >= 0.
86 *
87 * A (local input/local output) COMPLEX pointer into the
88 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
89 * On entry, this array contains the local pieces of the N-by-N
90 * distributed matrix sub( A ) to be checked. On exit, this
91 * array contains the local pieces of the difference
92 * sub(Z)*sub( A )*sub(Z)**T - sub( Ao ).
93 *
94 * IA (global input) INTEGER
95 * A's global row index, which points to the beginning of the
96 * submatrix which is to be operated on.
97 *
98 * JA (global input) INTEGER
99 * A's global column index, which points to the beginning of
100 * the submatrix which is to be operated on.
101 *
102 * DESCA (global and local input) INTEGER array of dimension DLEN_.
103 * The array descriptor for the distributed matrix A.
104 *
105 * IASEED (global input) INTEGER
106 * The seed number to generate the original matrix Ao.
107 *
108 * Z (local input) COMPLEX pointer into the local memory
109 * to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this
110 * array contains the local pieces of the N-by-N distributed
111 * matrix sub( Z ).
112 *
113 * IZ (global input) INTEGER
114 * Z's global row index, which points to the beginning of the
115 * submatrix which is to be operated on.
116 *
117 * JZ (global input) INTEGER
118 * Z's global column index, which points to the beginning of
119 * the submatrix which is to be operated on.
120 *
121 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
122 * The array descriptor for the distributed matrix Z.
123 *
124 * ANORM (global input) REAL
125 * The Infinity norm of sub( A ).
126 *
127 * FRESID (global output) REAL
128 * The maximum (worst) factorizational error.
129 *
130 * WORK (local workspace) COMPLEX array, dimension (LWORK).
131 * LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where
132 *
133 * IROFFA = MOD( IA-1, MB_A ),
134 * ICOFFA = MOD( JA-1, NB_A ),
135 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
136 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
137 * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ),
138 * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
139 *
140 * WORK is used to store a block of rows and a block of columns
141 * of sub( A ).
142 * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
143 * MYCOL, NPROW and NPCOL can be determined by calling the
144 * subroutine BLACS_GRIDINFO.
145 *
146 * Further Details
147 * ===============
148 *
149 * Contributed by Mark Fahey, March, 2000.
150 *
151 * =====================================================================
152 *
153 * .. Parameters ..
154  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
155  $ lld_, mb_, m_, nb_, n_, rsrc_
156  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
157  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
158  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
159  COMPLEX ONE, ZERO
160  parameter( one = ( 1.0e+0, 0.0e+0 ),
161  $ zero = ( 0.0e+0, 0.0e+0 ) )
162 * ..
163 * .. Local Scalars ..
164  INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF,
165  $ iw, j, jb, jja, jn, lda, ldw, mycol, myrow, np,
166  $ npcol, nprow
167  REAL EPS
168 * ..
169 * .. Local Arrays ..
170  INTEGER DESCW( DLEN_ )
171 * ..
172 * .. External Subroutines ..
173  EXTERNAL blacs_gridinfo, descset, infog2l, pcgemm,
175 * ..
176 * .. External Functions ..
177  INTEGER ICEIL, NUMROC
178  REAL PSLAMCH, PCLANGE
179  EXTERNAL iceil, numroc, pslamch, pclange
180 * ..
181 * .. Intrinsic Functions ..
182  INTRINSIC max, min, mod
183 * ..
184 * .. Executable Statements ..
185 *
186  ictxt = desca( ctxt_ )
187  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
188  eps = pslamch( ictxt, 'eps' )
189 *
190  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
191  $ iarow, iacol )
192  iroff = mod( ia-1, desca( mb_ ) )
193  np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
194  IF( myrow.EQ.iarow )
195  $ np = np - iroff
196  ldw = max( 1, np )
197 *
198 * First compute H <- H * Z**T
199 *
200  CALL descset( descw, desca( mb_ ), n, desca( mb_ ), desca( nb_ ),
201  $ iarow, iacol, ictxt, desca( mb_ ) )
202 *
203  DO 10 i = ia, ia + n - 1, desca( mb_ )
204  ib = min( ia+n-i, desca( mb_ ) )
205 *
206  CALL pclacpy( 'All', ib, n, a, i, ja, desca, work, 1, 1,
207  $ descw )
208  CALL pcgemm( 'No transpose', 'Cong Tran', ib, n, n, one, work,
209  $ 1, 1, descw, z, iz, jz, descz, zero, a, i, ja,
210  $ desca )
211 *
212  descw( rsrc_ ) = mod( descw( rsrc_ )+1, nprow )
213 *
214  10 CONTINUE
215 *
216 * Then compute H <- Z * H = Z * H0 * Z**T
217 *
218  CALL descset( descw, n, desca( nb_ ), desca( mb_ ), desca( nb_ ),
219  $ iarow, iacol, ictxt, ldw )
220 *
221  DO 20 j = ja, ja + n - 1, desca( nb_ )
222  jb = min( ja+n-j, desca( nb_ ) )
223 *
224  CALL pclacpy( 'All', n, jb, a, ia, j, desca, work, 1, 1,
225  $ descw )
226  CALL pcgemm( 'No transpose', 'No transpose', n, jb, n, one, z,
227  $ iz, jz, descz, work, 1, 1, descw, zero, a, ia, j,
228  $ desca )
229 *
230  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
231 *
232  20 CONTINUE
233 *
234 * Compute H - H0
235 *
236  jn = min( iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+n-1 )
237  lda = desca( lld_ )
238  ioffa = iia + ( jja-1 )*lda
239  iw = 1
240  jb = jn - ja + 1
241  descw( csrc_ ) = iacol
242 *
243 * Handle first block of columns separately
244 *
245  IF( mycol.EQ.descw( csrc_ ) ) THEN
246  CALL pcmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
247  $ desca( mb_ ), desca( nb_ ), work, ldw,
248  $ desca( rsrc_ ), desca( csrc_ ), iaseed, iia-1,
249  $ np, jja-1, jb, myrow, mycol, nprow, npcol )
250  CALL pclaset( 'Lower', max( 0, n-2 ), jb, zero, zero, work,
251  $ min( iw+2, n ), 1, descw )
252  CALL cmatadd( np, jb, -one, work, ldw, one, a( ioffa ), lda )
253  jja = jja + jb
254  ioffa = ioffa + jb*lda
255  END IF
256 *
257  iw = iw + desca( mb_ )
258  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
259 *
260  DO 30 j = jn + 1, ja + n - 1, desca( nb_ )
261  jb = min( ja+n-j, desca( nb_ ) )
262 *
263  IF( mycol.EQ.descw( csrc_ ) ) THEN
264  CALL pcmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
265  $ desca( mb_ ), desca( nb_ ), work, ldw,
266  $ desca( rsrc_ ), desca( csrc_ ), iaseed,
267  $ iia-1, np, jja-1, jb, myrow, mycol, nprow,
268  $ npcol )
269  CALL pclaset( 'Lower', max( 0, n-iw-1 ), jb, zero, zero,
270  $ work, min( n, iw+2 ), 1, descw )
271  CALL cmatadd( np, jb, -one, work, ldw, one, a( ioffa ),
272  $ lda )
273  jja = jja + jb
274  ioffa = ioffa + jb*lda
275  END IF
276  iw = iw + desca( mb_ )
277  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
278  30 CONTINUE
279 *
280 * Calculate factor residual
281 *
282  fresid = pclange( 'I', n, n, a, ia, ja, desca, work ) /
283  $ ( n*eps*anorm )
284 *
285  RETURN
286 *
287 * End PCNEPFCHK
288 *
289  END
max
#define max(A, B)
Definition: pcgemr.c:180
cmatadd
subroutine cmatadd(M, N, ALPHA, A, LDA, BETA, C, LDC)
Definition: cmatadd.f:2
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pclaset
subroutine pclaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pcblastst.f:7508
pclacpy
subroutine pclacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pclacpy.f:3
pcnepfchk
subroutine pcnepfchk(N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ, DESCZ, ANORM, FRESID, WORK)
Definition: pcnepfchk.f:3
min
#define min(A, B)
Definition: pcgemr.c:181