ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psblastst.f
Go to the documentation of this file.
1  SUBROUTINE psoptee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
2 *
3 * -- PBLAS test routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * .. Scalar Arguments ..
9  INTEGER ICTXT, NOUT, SCODE
10 * ..
11 * .. Array Arguments ..
12  CHARACTER*(*) SNAME
13 * ..
14 * .. Subroutine Arguments ..
15  EXTERNAL subptr
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSOPTEE tests whether the PBLAS respond correctly to a bad option
22 * argument.
23 *
24 * Notes
25 * =====
26 *
27 * A description vector is associated with each 2D block-cyclicly dis-
28 * tributed matrix. This vector stores the information required to
29 * establish the mapping between a matrix entry and its corresponding
30 * process and memory location.
31 *
32 * In the following comments, the character _ should be read as
33 * "of the distributed matrix". Let A be a generic term for any 2D
34 * block cyclicly distributed matrix. Its description vector is DESCA:
35 *
36 * NOTATION STORED IN EXPLANATION
37 * ---------------- --------------- ------------------------------------
38 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
39 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
40 * the NPROW x NPCOL BLACS process grid
41 * A is distributed over. The context
42 * itself is global, but the handle
43 * (the integer value) may vary.
44 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
45 * ted matrix A, M_A >= 0.
46 * N_A (global) DESCA( N_ ) The number of columns in the distri-
47 * buted matrix A, N_A >= 0.
48 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
49 * block of the matrix A, IMB_A > 0.
50 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
51 * left block of the matrix A,
52 * INB_A > 0.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
54 * bute the last M_A-IMB_A rows of A,
55 * MB_A > 0.
56 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
57 * bute the last N_A-INB_A columns of
58 * A, NB_A > 0.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the matrix A is distributed,
61 * NPROW > RSRC_A >= 0.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of A is distributed.
64 * NPCOL > CSRC_A >= 0.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array storing the local blocks of
67 * the distributed matrix A,
68 * IF( Lc( 1, N_A ) > 0 )
69 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
70 * ELSE
71 * LLD_A >= 1.
72 *
73 * Let K be the number of rows of a matrix A starting at the global in-
74 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
75 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
76 * receive if these K rows were distributed over NPROW processes. If K
77 * is the number of columns of a matrix A starting at the global index
78 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
79 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
80 * these K columns were distributed over NPCOL processes.
81 *
82 * The values of Lr() and Lc() may be determined via a call to the func-
83 * tion PB_NUMROC:
84 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
85 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
86 *
87 * Arguments
88 * =========
89 *
90 * ICTXT (local input) INTEGER
91 * On entry, ICTXT specifies the BLACS context handle, indica-
92 * ting the global context of the operation. The context itself
93 * is global, but the value of ICTXT is local.
94 *
95 * NOUT (global input) INTEGER
96 * On entry, NOUT specifies the unit number for the output file.
97 * When NOUT is 6, output to screen, when NOUT is 0, output to
98 * stderr. NOUT is only defined for process 0.
99 *
100 * SUBPTR (global input) SUBROUTINE
101 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
102 * EXTERNAL in the calling subroutine.
103 *
104 * SCODE (global input) INTEGER
105 * On entry, SCODE specifies the calling sequence code.
106 *
107 * SNAME (global input) CHARACTER*(*)
108 * On entry, SNAME specifies the subroutine name calling this
109 * subprogram.
110 *
111 * Calling sequence encodings
112 * ==========================
113 *
114 * code Formal argument list Examples
115 *
116 * 11 (n, v1,v2) _SWAP, _COPY
117 * 12 (n,s1, v1 ) _SCAL, _SCAL
118 * 13 (n,s1, v1,v2) _AXPY, _DOT_
119 * 14 (n,s1,i1,v1 ) _AMAX
120 * 15 (n,u1, v1 ) _ASUM, _NRM2
121 *
122 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
123 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
124 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
125 * 24 ( m,n,s1,v1,v2,m1) _GER_
126 * 25 (uplo, n,s1,v1, m1) _SYR
127 * 26 (uplo, n,u1,v1, m1) _HER
128 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
129 *
130 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
131 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
132 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
133 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
134 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
135 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
136 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
137 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
138 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
139 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
140 *
141 * -- Written on April 1, 1998 by
142 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
143 *
144 * =====================================================================
145 *
146 * .. Local Scalars ..
147  INTEGER APOS
148 * ..
149 * .. External Subroutines ..
150  EXTERNAL pschkopt
151 * ..
152 * .. Executable Statements ..
153 *
154 * Level 2 PBLAS
155 *
156  IF( scode.EQ.21 ) THEN
157 *
158 * Check 1st (and only) option
159 *
160  apos = 1
161  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
162 *
163  ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
164  $ scode.EQ.27 ) THEN
165 *
166 * Check 1st (and only) option
167 *
168  apos = 1
169  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
170 *
171  ELSE IF( scode.EQ.23 ) THEN
172 *
173 * Check 1st option
174 *
175  apos = 1
176  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
177 *
178 * Check 2nd option
179 *
180  apos = 2
181  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
182 *
183 * Check 3rd option
184 *
185  apos = 3
186  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
187 *
188 * Level 3 PBLAS
189 *
190  ELSE IF( scode.EQ.31 ) THEN
191 *
192 * Check 1st option
193 *
194  apos = 1
195  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
196 *
197 * Check 2'nd option
198 *
199  apos = 2
200  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'B', apos )
201 *
202  ELSE IF( scode.EQ.32 ) THEN
203 *
204 * Check 1st option
205 *
206  apos = 1
207  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
208 *
209 * Check 2nd option
210 *
211  apos = 2
212  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
213 *
214  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
215  $ scode.EQ.36 .OR. scode.EQ.40 ) THEN
216 *
217 * Check 1st option
218 *
219  apos = 1
220  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
221 *
222 * Check 2'nd option
223 *
224  apos = 2
225  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
226 *
227  ELSE IF( scode.EQ.38 ) THEN
228 *
229 * Check 1st option
230 *
231  apos = 1
232  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
233 *
234 * Check 2nd option
235 *
236  apos = 2
237  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
238 *
239 * Check 3rd option
240 *
241  apos = 3
242  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
243 *
244 * Check 4th option
245 *
246  apos = 4
247  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
248 *
249 *
250  ELSE IF( scode.EQ.39 ) THEN
251 *
252 * Check 1st option
253 *
254  apos = 1
255  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
256 *
257  END IF
258 *
259  RETURN
260 *
261 * End of PSOPTEE
262 *
263  END
264  SUBROUTINE pschkopt( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
265  $ ARGPOS )
266 *
267 * -- PBLAS test routine (version 2.0) --
268 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
269 * and University of California, Berkeley.
270 * April 1, 1998
271 *
272 * .. Scalar Arguments ..
273  CHARACTER*1 ARGNAM
274  INTEGER ARGPOS, ICTXT, NOUT, SCODE
275 * ..
276 * .. Array Arguments ..
277  CHARACTER*(*) SNAME
278 * ..
279 * .. Subroutine Arguments ..
280  EXTERNAL subptr
281 * ..
282 *
283 * Purpose
284 * =======
285 *
286 * PSCHKOPT tests the option ARGNAM in any PBLAS routine.
287 *
288 * Notes
289 * =====
290 *
291 * A description vector is associated with each 2D block-cyclicly dis-
292 * tributed matrix. This vector stores the information required to
293 * establish the mapping between a matrix entry and its corresponding
294 * process and memory location.
295 *
296 * In the following comments, the character _ should be read as
297 * "of the distributed matrix". Let A be a generic term for any 2D
298 * block cyclicly distributed matrix. Its description vector is DESCA:
299 *
300 * NOTATION STORED IN EXPLANATION
301 * ---------------- --------------- ------------------------------------
302 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
303 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
304 * the NPROW x NPCOL BLACS process grid
305 * A is distributed over. The context
306 * itself is global, but the handle
307 * (the integer value) may vary.
308 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
309 * ted matrix A, M_A >= 0.
310 * N_A (global) DESCA( N_ ) The number of columns in the distri-
311 * buted matrix A, N_A >= 0.
312 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
313 * block of the matrix A, IMB_A > 0.
314 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
315 * left block of the matrix A,
316 * INB_A > 0.
317 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
318 * bute the last M_A-IMB_A rows of A,
319 * MB_A > 0.
320 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
321 * bute the last N_A-INB_A columns of
322 * A, NB_A > 0.
323 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
324 * row of the matrix A is distributed,
325 * NPROW > RSRC_A >= 0.
326 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
327 * first column of A is distributed.
328 * NPCOL > CSRC_A >= 0.
329 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
330 * array storing the local blocks of
331 * the distributed matrix A,
332 * IF( Lc( 1, N_A ) > 0 )
333 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
334 * ELSE
335 * LLD_A >= 1.
336 *
337 * Let K be the number of rows of a matrix A starting at the global in-
338 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
339 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
340 * receive if these K rows were distributed over NPROW processes. If K
341 * is the number of columns of a matrix A starting at the global index
342 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
343 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
344 * these K columns were distributed over NPCOL processes.
345 *
346 * The values of Lr() and Lc() may be determined via a call to the func-
347 * tion PB_NUMROC:
348 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
349 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
350 *
351 * Arguments
352 * =========
353 *
354 * ICTXT (local input) INTEGER
355 * On entry, ICTXT specifies the BLACS context handle, indica-
356 * ting the global context of the operation. The context itself
357 * is global, but the value of ICTXT is local.
358 *
359 * NOUT (global input) INTEGER
360 * On entry, NOUT specifies the unit number for the output file.
361 * When NOUT is 6, output to screen, when NOUT is 0, output to
362 * stderr. NOUT is only defined for process 0.
363 *
364 * SUBPTR (global input) SUBROUTINE
365 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
366 * EXTERNAL in the calling subroutine.
367 *
368 * SCODE (global input) INTEGER
369 * On entry, SCODE specifies the calling sequence code.
370 *
371 * SNAME (global input) CHARACTER*(*)
372 * On entry, SNAME specifies the subroutine name calling this
373 * subprogram.
374 *
375 * ARGNAM (global input) CHARACTER*(*)
376 * On entry, ARGNAM specifies the name of the option to be
377 * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'.
378 *
379 * ARGPOS (global input) INTEGER
380 * On entry, ARGPOS indicates the position of the option ARGNAM
381 * to be tested.
382 *
383 * -- Written on April 1, 1998 by
384 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
385 *
386 * =====================================================================
387 *
388 * .. Local Scalars ..
389  INTEGER INFOT
390 * ..
391 * .. External Subroutines ..
392  EXTERNAL pchkpbe, pscallsub, pssetpblas
393 * ..
394 * .. External Functions ..
395  LOGICAL LSAME
396  EXTERNAL lsame
397 * ..
398 * .. Common Blocks ..
399  CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO
400  COMMON /pblasc/diag, side, transa, transb, uplo
401 * ..
402 * .. Executable Statements ..
403 *
404 * Reiniatilize the dummy arguments to correct values
405 *
406  CALL pssetpblas( ictxt )
407 *
408  IF( lsame( argnam, 'D' ) ) THEN
409 *
410 * Generate bad DIAG option
411 *
412  diag = '/'
413 *
414  ELSE IF( lsame( argnam, 'S' ) ) THEN
415 *
416 * Generate bad SIDE option
417 *
418  side = '/'
419 *
420  ELSE IF( lsame( argnam, 'A' ) ) THEN
421 *
422 * Generate bad TRANSA option
423 *
424  transa = '/'
425 *
426  ELSE IF( lsame( argnam, 'B' ) ) THEN
427 *
428 * Generate bad TRANSB option
429 *
430  transb = '/'
431 *
432  ELSE IF( lsame( argnam, 'U' ) ) THEN
433 *
434 * Generate bad UPLO option
435 *
436  uplo = '/'
437 *
438  END IF
439 *
440 * Set INFOT to the position of the bad dimension argument
441 *
442  infot = argpos
443 *
444 * Call the PBLAS routine
445 *
446  CALL pscallsub( subptr, scode )
447  CALL pchkpbe( ictxt, nout, sname, infot )
448 *
449  RETURN
450 *
451 * End of PSCHKOPT
452 *
453  END
454  SUBROUTINE psdimee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
455 *
456 * -- PBLAS test routine (version 2.0) --
457 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
458 * and University of California, Berkeley.
459 * April 1, 1998
460 *
461 * .. Scalar Arguments ..
462  INTEGER ICTXT, NOUT, SCODE
463 * ..
464 * .. Array Arguments ..
465  CHARACTER*(*) SNAME
466 * ..
467 * .. Subroutine Arguments ..
468  EXTERNAL subptr
469 * ..
470 *
471 * Purpose
472 * =======
473 *
474 * PSDIMEE tests whether the PBLAS respond correctly to a bad dimension
475 * argument.
476 *
477 * Notes
478 * =====
479 *
480 * A description vector is associated with each 2D block-cyclicly dis-
481 * tributed matrix. This vector stores the information required to
482 * establish the mapping between a matrix entry and its corresponding
483 * process and memory location.
484 *
485 * In the following comments, the character _ should be read as
486 * "of the distributed matrix". Let A be a generic term for any 2D
487 * block cyclicly distributed matrix. Its description vector is DESCA:
488 *
489 * NOTATION STORED IN EXPLANATION
490 * ---------------- --------------- ------------------------------------
491 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
492 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
493 * the NPROW x NPCOL BLACS process grid
494 * A is distributed over. The context
495 * itself is global, but the handle
496 * (the integer value) may vary.
497 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
498 * ted matrix A, M_A >= 0.
499 * N_A (global) DESCA( N_ ) The number of columns in the distri-
500 * buted matrix A, N_A >= 0.
501 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
502 * block of the matrix A, IMB_A > 0.
503 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
504 * left block of the matrix A,
505 * INB_A > 0.
506 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
507 * bute the last M_A-IMB_A rows of A,
508 * MB_A > 0.
509 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
510 * bute the last N_A-INB_A columns of
511 * A, NB_A > 0.
512 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
513 * row of the matrix A is distributed,
514 * NPROW > RSRC_A >= 0.
515 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
516 * first column of A is distributed.
517 * NPCOL > CSRC_A >= 0.
518 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
519 * array storing the local blocks of
520 * the distributed matrix A,
521 * IF( Lc( 1, N_A ) > 0 )
522 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
523 * ELSE
524 * LLD_A >= 1.
525 *
526 * Let K be the number of rows of a matrix A starting at the global in-
527 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
528 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
529 * receive if these K rows were distributed over NPROW processes. If K
530 * is the number of columns of a matrix A starting at the global index
531 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
532 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
533 * these K columns were distributed over NPCOL processes.
534 *
535 * The values of Lr() and Lc() may be determined via a call to the func-
536 * tion PB_NUMROC:
537 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
538 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
539 *
540 * Arguments
541 * =========
542 *
543 * ICTXT (local input) INTEGER
544 * On entry, ICTXT specifies the BLACS context handle, indica-
545 * ting the global context of the operation. The context itself
546 * is global, but the value of ICTXT is local.
547 *
548 * NOUT (global input) INTEGER
549 * On entry, NOUT specifies the unit number for the output file.
550 * When NOUT is 6, output to screen, when NOUT is 0, output to
551 * stderr. NOUT is only defined for process 0.
552 *
553 * SUBPTR (global input) SUBROUTINE
554 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
555 * EXTERNAL in the calling subroutine.
556 *
557 * SCODE (global input) INTEGER
558 * On entry, SCODE specifies the calling sequence code.
559 *
560 * SNAME (global input) CHARACTER*(*)
561 * On entry, SNAME specifies the subroutine name calling this
562 * subprogram.
563 *
564 * Calling sequence encodings
565 * ==========================
566 *
567 * code Formal argument list Examples
568 *
569 * 11 (n, v1,v2) _SWAP, _COPY
570 * 12 (n,s1, v1 ) _SCAL, _SCAL
571 * 13 (n,s1, v1,v2) _AXPY, _DOT_
572 * 14 (n,s1,i1,v1 ) _AMAX
573 * 15 (n,u1, v1 ) _ASUM, _NRM2
574 *
575 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
576 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
577 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
578 * 24 ( m,n,s1,v1,v2,m1) _GER_
579 * 25 (uplo, n,s1,v1, m1) _SYR
580 * 26 (uplo, n,u1,v1, m1) _HER
581 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
582 *
583 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
584 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
585 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
586 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
587 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
588 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
589 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
590 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
591 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
592 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
593 *
594 * -- Written on April 1, 1998 by
595 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
596 *
597 * =====================================================================
598 *
599 * .. Local Scalars ..
600  INTEGER APOS
601 * ..
602 * .. External Subroutines ..
603  EXTERNAL pschkdim
604 * ..
605 * .. Executable Statements ..
606 *
607 * Level 1 PBLAS
608 *
609  IF( scode.EQ.11 .OR. scode.EQ.12 .OR. scode.EQ.13 .OR.
610  $ scode.EQ.14 .OR. scode.EQ.15 ) THEN
611 *
612 * Check 1st (and only) dimension
613 *
614  apos = 1
615  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
616 *
617 * Level 2 PBLAS
618 *
619  ELSE IF( scode.EQ.21 ) THEN
620 *
621 * Check 1st dimension
622 *
623  apos = 2
624  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
625 *
626 * Check 2nd dimension
627 *
628  apos = 3
629  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
630 *
631  ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
632  $ scode.EQ.27 ) THEN
633 *
634 * Check 1st (and only) dimension
635 *
636  apos = 2
637  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
638 *
639  ELSE IF( scode.EQ.23 ) THEN
640 *
641 * Check 1st (and only) dimension
642 *
643  apos = 4
644  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
645 *
646  ELSE IF( scode.EQ.24 ) THEN
647 *
648 * Check 1st dimension
649 *
650  apos = 1
651  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
652 *
653 * Check 2nd dimension
654 *
655  apos = 2
656  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
657 *
658 * Level 3 PBLAS
659 *
660  ELSE IF( scode.EQ.31 ) THEN
661 *
662 * Check 1st dimension
663 *
664  apos = 3
665  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
666 *
667 * Check 2nd dimension
668 *
669  apos = 4
670  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
671 *
672 * Check 3rd dimension
673 *
674  apos = 5
675  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
676 *
677  ELSE IF( scode.EQ.32 ) THEN
678 *
679 * Check 1st dimension
680 *
681  apos = 3
682  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
683 *
684 * Check 2nd dimension
685 *
686  apos = 4
687  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
688 *
689  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
690  $ scode.EQ.36 ) THEN
691 *
692 * Check 1st dimension
693 *
694  apos = 3
695  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
696 *
697 * Check 2nd dimension
698 *
699  apos = 4
700  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
701 *
702  ELSE IF( scode.EQ.37 ) THEN
703 *
704 * Check 1st dimension
705 *
706  apos = 1
707  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
708 *
709 * Check 2nd dimension
710 *
711  apos = 2
712  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
713 *
714  ELSE IF( scode.EQ.38 ) THEN
715 *
716 * Check 1st dimension
717 *
718  apos = 5
719  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
720 *
721 * Check 2nd dimension
722 *
723  apos = 6
724  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
725 *
726  ELSE IF( scode.EQ.39 ) THEN
727 *
728 * Check 1st dimension
729 *
730  apos = 2
731  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
732 *
733 * Check 2nd dimension
734 *
735  apos = 3
736  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
737 *
738  ELSE IF( scode.EQ.40 ) THEN
739 *
740 * Check 1st dimension
741 *
742  apos = 3
743  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
744 *
745 * Check 2nd dimension
746 *
747  apos = 4
748  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
749 *
750  END IF
751 *
752  RETURN
753 *
754 * End of PSDIMEE
755 *
756  END
757  SUBROUTINE pschkdim( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
758  $ ARGPOS )
759 *
760 * -- PBLAS test routine (version 2.0) --
761 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
762 * and University of California, Berkeley.
763 * April 1, 1998
764 *
765 * .. Scalar Arguments ..
766  CHARACTER*1 ARGNAM
767  INTEGER ARGPOS, ICTXT, NOUT, SCODE
768 * ..
769 * .. Array Arguments ..
770  CHARACTER*(*) SNAME
771 * ..
772 * .. Subroutine Arguments ..
773  EXTERNAL subptr
774 * ..
775 *
776 * Purpose
777 * =======
778 *
779 * PSCHKDIM tests the dimension ARGNAM in any PBLAS routine.
780 *
781 * Notes
782 * =====
783 *
784 * A description vector is associated with each 2D block-cyclicly dis-
785 * tributed matrix. This vector stores the information required to
786 * establish the mapping between a matrix entry and its corresponding
787 * process and memory location.
788 *
789 * In the following comments, the character _ should be read as
790 * "of the distributed matrix". Let A be a generic term for any 2D
791 * block cyclicly distributed matrix. Its description vector is DESCA:
792 *
793 * NOTATION STORED IN EXPLANATION
794 * ---------------- --------------- ------------------------------------
795 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
796 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
797 * the NPROW x NPCOL BLACS process grid
798 * A is distributed over. The context
799 * itself is global, but the handle
800 * (the integer value) may vary.
801 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
802 * ted matrix A, M_A >= 0.
803 * N_A (global) DESCA( N_ ) The number of columns in the distri-
804 * buted matrix A, N_A >= 0.
805 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
806 * block of the matrix A, IMB_A > 0.
807 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
808 * left block of the matrix A,
809 * INB_A > 0.
810 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
811 * bute the last M_A-IMB_A rows of A,
812 * MB_A > 0.
813 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
814 * bute the last N_A-INB_A columns of
815 * A, NB_A > 0.
816 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
817 * row of the matrix A is distributed,
818 * NPROW > RSRC_A >= 0.
819 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
820 * first column of A is distributed.
821 * NPCOL > CSRC_A >= 0.
822 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
823 * array storing the local blocks of
824 * the distributed matrix A,
825 * IF( Lc( 1, N_A ) > 0 )
826 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
827 * ELSE
828 * LLD_A >= 1.
829 *
830 * Let K be the number of rows of a matrix A starting at the global in-
831 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
832 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
833 * receive if these K rows were distributed over NPROW processes. If K
834 * is the number of columns of a matrix A starting at the global index
835 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
836 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
837 * these K columns were distributed over NPCOL processes.
838 *
839 * The values of Lr() and Lc() may be determined via a call to the func-
840 * tion PB_NUMROC:
841 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
842 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
843 *
844 * Arguments
845 * =========
846 *
847 * ICTXT (local input) INTEGER
848 * On entry, ICTXT specifies the BLACS context handle, indica-
849 * ting the global context of the operation. The context itself
850 * is global, but the value of ICTXT is local.
851 *
852 * NOUT (global input) INTEGER
853 * On entry, NOUT specifies the unit number for the output file.
854 * When NOUT is 6, output to screen, when NOUT is 0, output to
855 * stderr. NOUT is only defined for process 0.
856 *
857 * SUBPTR (global input) SUBROUTINE
858 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
859 * EXTERNAL in the calling subroutine.
860 *
861 * SCODE (global input) INTEGER
862 * On entry, SCODE specifies the calling sequence code.
863 *
864 * SNAME (global input) CHARACTER*(*)
865 * On entry, SNAME specifies the subroutine name calling this
866 * subprogram.
867 *
868 * ARGNAM (global input) CHARACTER*(*)
869 * On entry, ARGNAM specifies the name of the dimension to be
870 * checked. ARGNAM can either be 'M', 'N' or 'K'.
871 *
872 * ARGPOS (global input) INTEGER
873 * On entry, ARGPOS indicates the position of the option ARGNAM
874 * to be tested.
875 *
876 * -- Written on April 1, 1998 by
877 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
878 *
879 * =====================================================================
880 *
881 * .. Local Scalars ..
882  INTEGER INFOT
883 * ..
884 * .. External Subroutines ..
885  EXTERNAL pchkpbe, pscallsub, pssetpblas
886 * ..
887 * .. External Functions ..
888  LOGICAL LSAME
889  EXTERNAL LSAME
890 * ..
891 * .. Common Blocks ..
892  INTEGER KDIM, MDIM, NDIM
893  COMMON /PBLASN/KDIM, MDIM, NDIM
894 * ..
895 * .. Executable Statements ..
896 *
897 * Reiniatilize the dummy arguments to correct values
898 *
899  CALL pssetpblas( ictxt )
900 *
901  IF( lsame( argnam, 'M' ) ) THEN
902 *
903 * Generate bad MDIM
904 *
905  mdim = -1
906 *
907  ELSE IF( lsame( argnam, 'N' ) ) THEN
908 *
909 * Generate bad NDIM
910 *
911  ndim = -1
912 *
913  ELSE
914 *
915 * Generate bad KDIM
916 *
917  kdim = -1
918 *
919  END IF
920 *
921 * Set INFOT to the position of the bad dimension argument
922 *
923  infot = argpos
924 *
925 * Call the PBLAS routine
926 *
927  CALL pscallsub( subptr, scode )
928  CALL pchkpbe( ictxt, nout, sname, infot )
929 *
930  RETURN
931 *
932 * End of PSCHKDIM
933 *
934  END
935  SUBROUTINE psvecee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
936 *
937 * -- PBLAS test routine (version 2.0) --
938 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
939 * and University of California, Berkeley.
940 * April 1, 1998
941 *
942 * .. Scalar Arguments ..
943  INTEGER ICTXT, NOUT, SCODE
944 * ..
945 * .. Array Arguments ..
946  CHARACTER*7 SNAME
947 * ..
948 * .. Subroutine Arguments ..
949  EXTERNAL subptr
950 * ..
951 *
952 * Purpose
953 * =======
954 *
955 * PSVECEE tests whether the PBLAS respond correctly to a bad vector
956 * argument. Each vector <vec> is described by: <vec>, I<vec>, J<vec>,
957 * DESC<vec>, INC<vec>. Out of all these, only I<vec>, J<vec>,
958 * DESC<vec>, and INC<vec> can be tested.
959 *
960 * Notes
961 * =====
962 *
963 * A description vector is associated with each 2D block-cyclicly dis-
964 * tributed matrix. This vector stores the information required to
965 * establish the mapping between a matrix entry and its corresponding
966 * process and memory location.
967 *
968 * In the following comments, the character _ should be read as
969 * "of the distributed matrix". Let A be a generic term for any 2D
970 * block cyclicly distributed matrix. Its description vector is DESCA:
971 *
972 * NOTATION STORED IN EXPLANATION
973 * ---------------- --------------- ------------------------------------
974 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
975 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
976 * the NPROW x NPCOL BLACS process grid
977 * A is distributed over. The context
978 * itself is global, but the handle
979 * (the integer value) may vary.
980 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
981 * ted matrix A, M_A >= 0.
982 * N_A (global) DESCA( N_ ) The number of columns in the distri-
983 * buted matrix A, N_A >= 0.
984 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
985 * block of the matrix A, IMB_A > 0.
986 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
987 * left block of the matrix A,
988 * INB_A > 0.
989 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
990 * bute the last M_A-IMB_A rows of A,
991 * MB_A > 0.
992 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
993 * bute the last N_A-INB_A columns of
994 * A, NB_A > 0.
995 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
996 * row of the matrix A is distributed,
997 * NPROW > RSRC_A >= 0.
998 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
999 * first column of A is distributed.
1000 * NPCOL > CSRC_A >= 0.
1001 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1002 * array storing the local blocks of
1003 * the distributed matrix A,
1004 * IF( Lc( 1, N_A ) > 0 )
1005 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1006 * ELSE
1007 * LLD_A >= 1.
1008 *
1009 * Let K be the number of rows of a matrix A starting at the global in-
1010 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1011 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1012 * receive if these K rows were distributed over NPROW processes. If K
1013 * is the number of columns of a matrix A starting at the global index
1014 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1015 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1016 * these K columns were distributed over NPCOL processes.
1017 *
1018 * The values of Lr() and Lc() may be determined via a call to the func-
1019 * tion PB_NUMROC:
1020 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1021 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1022 *
1023 * Arguments
1024 * =========
1025 *
1026 * ICTXT (local input) INTEGER
1027 * On entry, ICTXT specifies the BLACS context handle, indica-
1028 * ting the global context of the operation. The context itself
1029 * is global, but the value of ICTXT is local.
1030 *
1031 * NOUT (global input) INTEGER
1032 * On entry, NOUT specifies the unit number for the output file.
1033 * When NOUT is 6, output to screen, when NOUT is 0, output to
1034 * stderr. NOUT is only defined for process 0.
1035 *
1036 * SUBPTR (global input) SUBROUTINE
1037 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1038 * EXTERNAL in the calling subroutine.
1039 *
1040 * SCODE (global input) INTEGER
1041 * On entry, SCODE specifies the calling sequence code.
1042 *
1043 * SNAME (global input) CHARACTER*(*)
1044 * On entry, SNAME specifies the subroutine name calling this
1045 * subprogram.
1046 *
1047 * Calling sequence encodings
1048 * ==========================
1049 *
1050 * code Formal argument list Examples
1051 *
1052 * 11 (n, v1,v2) _SWAP, _COPY
1053 * 12 (n,s1, v1 ) _SCAL, _SCAL
1054 * 13 (n,s1, v1,v2) _AXPY, _DOT_
1055 * 14 (n,s1,i1,v1 ) _AMAX
1056 * 15 (n,u1, v1 ) _ASUM, _NRM2
1057 *
1058 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1059 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1060 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1061 * 24 ( m,n,s1,v1,v2,m1) _GER_
1062 * 25 (uplo, n,s1,v1, m1) _SYR
1063 * 26 (uplo, n,u1,v1, m1) _HER
1064 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1065 *
1066 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1067 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1068 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1069 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1070 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1071 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1072 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
1073 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1074 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1075 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1076 *
1077 * -- Written on April 1, 1998 by
1078 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1079 *
1080 * =====================================================================
1081 *
1082 * .. Local Scalars ..
1083  INTEGER APOS
1084 * ..
1085 * .. External Subroutines ..
1086  EXTERNAL pschkmat
1087 * ..
1088 * .. Executable Statements ..
1089 *
1090 * Level 1 PBLAS
1091 *
1092  IF( scode.EQ.11 ) THEN
1093 *
1094 * Check 1st vector
1095 *
1096  apos = 2
1097  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1098 *
1099 * Check 2nd vector
1100 *
1101  apos = 7
1102  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1103 *
1104  ELSE IF( scode.EQ.12 .OR. scode.EQ.15 ) THEN
1105 *
1106 * Check 1st (and only) vector
1107 *
1108  apos = 3
1109  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1110 *
1111  ELSE IF( scode.EQ.13 ) THEN
1112 *
1113 * Check 1st vector
1114 *
1115  apos = 3
1116  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1117 *
1118 * Check 2nd vector
1119 *
1120  apos = 8
1121  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1122 *
1123  ELSE IF( scode.EQ.14 ) THEN
1124 *
1125 * Check 1st (and only) vector
1126 *
1127  apos = 4
1128  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1129 *
1130 * Level 2 PBLAS
1131 *
1132  ELSE IF( scode.EQ.21 ) THEN
1133 *
1134 * Check 1st vector
1135 *
1136  apos = 9
1137  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1138 *
1139 * Check 2nd vector
1140 *
1141  apos = 15
1142  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1143 *
1144  ELSE IF( scode.EQ.22 ) THEN
1145 *
1146 * Check 1st vector
1147 *
1148  apos = 8
1149  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1150 *
1151 * Check 2nd vector
1152 *
1153  apos = 14
1154  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1155 *
1156  ELSE IF( scode.EQ.23 ) THEN
1157 *
1158 * Check 1st (and only) vector
1159 *
1160  apos = 9
1161  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1162 *
1163  ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1164 *
1165 * Check 1st vector
1166 *
1167  apos = 4
1168  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1169 *
1170 * Check 2nd vector
1171 *
1172  apos = 9
1173  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1174 *
1175  ELSE IF( scode.EQ.26 .OR. scode.EQ.27 ) THEN
1176 *
1177 * Check 1'st (and only) vector
1178 *
1179  apos = 4
1180  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1181 *
1182  END IF
1183 *
1184  RETURN
1185 *
1186 * End of PSVECEE
1187 *
1188  END
1189  SUBROUTINE psmatee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
1191 * -- PBLAS test routine (version 2.0) --
1192 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1193 * and University of California, Berkeley.
1194 * April 1, 1998
1195 *
1196 * .. Scalar Arguments ..
1197  INTEGER ICTXT, NOUT, SCODE
1198 * ..
1199 * .. Array Arguments ..
1200  CHARACTER*7 SNAME
1201 * ..
1202 * .. Subroutine Arguments ..
1203  EXTERNAL subptr
1204 * ..
1205 *
1206 * Purpose
1207 * =======
1208 *
1209 * PSMATEE tests whether the PBLAS respond correctly to a bad matrix
1210 * argument. Each matrix <mat> is described by: <mat>, I<mat>, J<mat>,
1211 * and DESC<mat>. Out of all these, only I<vec>, J<vec> and DESC<mat>
1212 * can be tested.
1213 *
1214 * Notes
1215 * =====
1216 *
1217 * A description vector is associated with each 2D block-cyclicly dis-
1218 * tributed matrix. This vector stores the information required to
1219 * establish the mapping between a matrix entry and its corresponding
1220 * process and memory location.
1221 *
1222 * In the following comments, the character _ should be read as
1223 * "of the distributed matrix". Let A be a generic term for any 2D
1224 * block cyclicly distributed matrix. Its description vector is DESCA:
1225 *
1226 * NOTATION STORED IN EXPLANATION
1227 * ---------------- --------------- ------------------------------------
1228 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1229 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1230 * the NPROW x NPCOL BLACS process grid
1231 * A is distributed over. The context
1232 * itself is global, but the handle
1233 * (the integer value) may vary.
1234 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1235 * ted matrix A, M_A >= 0.
1236 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1237 * buted matrix A, N_A >= 0.
1238 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1239 * block of the matrix A, IMB_A > 0.
1240 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1241 * left block of the matrix A,
1242 * INB_A > 0.
1243 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1244 * bute the last M_A-IMB_A rows of A,
1245 * MB_A > 0.
1246 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1247 * bute the last N_A-INB_A columns of
1248 * A, NB_A > 0.
1249 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1250 * row of the matrix A is distributed,
1251 * NPROW > RSRC_A >= 0.
1252 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1253 * first column of A is distributed.
1254 * NPCOL > CSRC_A >= 0.
1255 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1256 * array storing the local blocks of
1257 * the distributed matrix A,
1258 * IF( Lc( 1, N_A ) > 0 )
1259 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1260 * ELSE
1261 * LLD_A >= 1.
1262 *
1263 * Let K be the number of rows of a matrix A starting at the global in-
1264 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1265 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1266 * receive if these K rows were distributed over NPROW processes. If K
1267 * is the number of columns of a matrix A starting at the global index
1268 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1269 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1270 * these K columns were distributed over NPCOL processes.
1271 *
1272 * The values of Lr() and Lc() may be determined via a call to the func-
1273 * tion PB_NUMROC:
1274 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1275 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1276 *
1277 * Arguments
1278 * =========
1279 *
1280 * ICTXT (local input) INTEGER
1281 * On entry, ICTXT specifies the BLACS context handle, indica-
1282 * ting the global context of the operation. The context itself
1283 * is global, but the value of ICTXT is local.
1284 *
1285 * NOUT (global input) INTEGER
1286 * On entry, NOUT specifies the unit number for the output file.
1287 * When NOUT is 6, output to screen, when NOUT is 0, output to
1288 * stderr. NOUT is only defined for process 0.
1289 *
1290 * SUBPTR (global input) SUBROUTINE
1291 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1292 * EXTERNAL in the calling subroutine.
1293 *
1294 * SCODE (global input) INTEGER
1295 * On entry, SCODE specifies the calling sequence code.
1296 *
1297 * SNAME (global input) CHARACTER*(*)
1298 * On entry, SNAME specifies the subroutine name calling this
1299 * subprogram.
1300 *
1301 * Calling sequence encodings
1302 * ==========================
1303 *
1304 * code Formal argument list Examples
1305 *
1306 * 11 (n, v1,v2) _SWAP, _COPY
1307 * 12 (n,s1, v1 ) _SCAL, _SCAL
1308 * 13 (n,s1, v1,v2) _AXPY, _DOT_
1309 * 14 (n,s1,i1,v1 ) _AMAX
1310 * 15 (n,u1, v1 ) _ASUM, _NRM2
1311 *
1312 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1313 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1314 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1315 * 24 ( m,n,s1,v1,v2,m1) _GER_
1316 * 25 (uplo, n,s1,v1, m1) _SYR
1317 * 26 (uplo, n,u1,v1, m1) _HER
1318 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1319 *
1320 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1321 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1322 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1323 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1324 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1325 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1326 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
1327 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1328 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1329 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1330 *
1331 * -- Written on April 1, 1998 by
1332 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1333 *
1334 * =====================================================================
1335 *
1336 * .. Local Scalars ..
1337  INTEGER APOS
1338 * ..
1339 * .. External Subroutines ..
1340  EXTERNAL pschkmat
1341 * ..
1342 * .. Executable Statements ..
1343 *
1344 * Level 2 PBLAS
1345 *
1346  IF( scode.EQ.21 .OR. scode.EQ.23 ) THEN
1347 *
1348 * Check 1st (and only) matrix
1349 *
1350  apos = 5
1351  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1352 *
1353  ELSE IF( scode.EQ.22 ) THEN
1354 *
1355 * Check 1st (and only) matrix
1356 *
1357  apos = 4
1358  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1359 *
1360  ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1361 *
1362 * Check 1st (and only) matrix
1363 *
1364  apos = 14
1365  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1366 *
1367  ELSE IF( scode.EQ.25 .OR. scode.EQ.26 ) THEN
1368 *
1369 * Check 1st (and only) matrix
1370 *
1371  apos = 9
1372  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1373 *
1374 * Level 3 PBLAS
1375 *
1376  ELSE IF( scode.EQ.31 ) THEN
1377 *
1378 * Check 1st matrix
1379 *
1380  apos = 7
1381  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1382 *
1383 * Check 2nd matrix
1384 *
1385  apos = 11
1386  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1387 *
1388 * Check 3nd matrix
1389 *
1390  apos = 16
1391  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1392 *
1393  ELSE IF( scode.EQ.32 .OR. scode.EQ.35 .OR. scode.EQ.36 ) THEN
1394 *
1395 * Check 1st matrix
1396 *
1397  apos = 6
1398  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1399 *
1400 * Check 2nd matrix
1401 *
1402  apos = 10
1403  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1404 *
1405 * Check 3nd matrix
1406 *
1407  apos = 15
1408  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1409 *
1410  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 ) THEN
1411 *
1412 * Check 1st matrix
1413 *
1414  apos = 6
1415  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1416 *
1417 * Check 2nd matrix
1418 *
1419  apos = 11
1420  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1421 *
1422  ELSE IF( scode.EQ.37 ) THEN
1423 *
1424 * Check 1st matrix
1425 *
1426  apos = 4
1427  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1428 *
1429 * Check 2nd matrix
1430 *
1431  apos = 9
1432  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1433 *
1434  ELSE IF( scode.EQ.38 ) THEN
1435 *
1436 * Check 1st matrix
1437 *
1438  apos = 8
1439  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1440 *
1441 * Check 2nd matrix
1442 *
1443  apos = 12
1444  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1445 *
1446  ELSE IF( scode.EQ.39 ) THEN
1447 *
1448 * Check 1st matrix
1449 *
1450  apos = 5
1451  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1452 *
1453 * Check 2nd matrix
1454 *
1455  apos = 10
1456  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1457 *
1458  ELSE IF( scode.EQ.40 ) THEN
1459 *
1460 * Check 1st matrix
1461 *
1462  apos = 6
1463  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1464 *
1465 * Check 2nd matrix
1466 *
1467  apos = 11
1468  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1469 *
1470  END IF
1471 *
1472  RETURN
1473 *
1474 * End of PSMATEE
1475 *
1476  END
1477  SUBROUTINE pssetpblas( ICTXT )
1479 * -- PBLAS test routine (version 2.0) --
1480 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1481 * and University of California, Berkeley.
1482 * April 1, 1998
1483 *
1484 * .. Scalar Arguments ..
1485  INTEGER ICTXT
1486 * ..
1487 *
1488 * Purpose
1489 * =======
1490 *
1491 * PSSETPBLAS initializes *all* the dummy arguments to correct values.
1492 *
1493 * Notes
1494 * =====
1495 *
1496 * A description vector is associated with each 2D block-cyclicly dis-
1497 * tributed matrix. This vector stores the information required to
1498 * establish the mapping between a matrix entry and its corresponding
1499 * process and memory location.
1500 *
1501 * In the following comments, the character _ should be read as
1502 * "of the distributed matrix". Let A be a generic term for any 2D
1503 * block cyclicly distributed matrix. Its description vector is DESCA:
1504 *
1505 * NOTATION STORED IN EXPLANATION
1506 * ---------------- --------------- ------------------------------------
1507 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1508 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1509 * the NPROW x NPCOL BLACS process grid
1510 * A is distributed over. The context
1511 * itself is global, but the handle
1512 * (the integer value) may vary.
1513 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1514 * ted matrix A, M_A >= 0.
1515 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1516 * buted matrix A, N_A >= 0.
1517 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1518 * block of the matrix A, IMB_A > 0.
1519 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1520 * left block of the matrix A,
1521 * INB_A > 0.
1522 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1523 * bute the last M_A-IMB_A rows of A,
1524 * MB_A > 0.
1525 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1526 * bute the last N_A-INB_A columns of
1527 * A, NB_A > 0.
1528 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1529 * row of the matrix A is distributed,
1530 * NPROW > RSRC_A >= 0.
1531 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1532 * first column of A is distributed.
1533 * NPCOL > CSRC_A >= 0.
1534 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1535 * array storing the local blocks of
1536 * the distributed matrix A,
1537 * IF( Lc( 1, N_A ) > 0 )
1538 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1539 * ELSE
1540 * LLD_A >= 1.
1541 *
1542 * Let K be the number of rows of a matrix A starting at the global in-
1543 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1544 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1545 * receive if these K rows were distributed over NPROW processes. If K
1546 * is the number of columns of a matrix A starting at the global index
1547 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1548 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1549 * these K columns were distributed over NPCOL processes.
1550 *
1551 * The values of Lr() and Lc() may be determined via a call to the func-
1552 * tion PB_NUMROC:
1553 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1554 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1555 *
1556 * Arguments
1557 * =========
1558 *
1559 * ICTXT (local input) INTEGER
1560 * On entry, ICTXT specifies the BLACS context handle, indica-
1561 * ting the global context of the operation. The context itself
1562 * is global, but the value of ICTXT is local.
1563 *
1564 * -- Written on April 1, 1998 by
1565 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1566 *
1567 * =====================================================================
1568 *
1569 * .. Parameters ..
1570  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1571  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1572  $ rsrc_
1573  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1574  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1575  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1576  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1577  REAL ONE
1578  PARAMETER ( ONE = 1.0e+0 )
1579 * ..
1580 * .. External Subroutines ..
1581  EXTERNAL pb_descset2
1582 * ..
1583 * .. Common Blocks ..
1584  CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
1585  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1586  $ jc, jx, jy, kdim, mdim, ndim
1587  REAL USCLR, SCLR
1588  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1589  $ descx( dlen_ ), descy( dlen_ )
1590  REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
1591  COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO
1592  COMMON /pblasd/desca, descb, descc, descx, descy
1593  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1594  $ ja, jb, jc, jx, jy
1595  COMMON /pblasm/a, b, c
1596  COMMON /pblasn/kdim, mdim, ndim
1597  COMMON /pblass/sclr, usclr
1598  COMMON /pblasv/x, y
1599 * ..
1600 * .. Executable Statements ..
1601 *
1602 * Set default values for options
1603 *
1604  diag = 'N'
1605  side = 'L'
1606  transa = 'N'
1607  transb = 'N'
1608  uplo = 'U'
1609 *
1610 * Set default values for scalars
1611 *
1612  kdim = 1
1613  mdim = 1
1614  ndim = 1
1615  isclr = 1
1616  sclr = one
1617  usclr = one
1618 *
1619 * Set default values for distributed matrix A
1620 *
1621  a( 1, 1 ) = one
1622  a( 2, 1 ) = one
1623  a( 1, 2 ) = one
1624  a( 2, 2 ) = one
1625  ia = 1
1626  ja = 1
1627  CALL pb_descset2( desca, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1628 *
1629 * Set default values for distributed matrix B
1630 *
1631  b( 1, 1 ) = one
1632  b( 2, 1 ) = one
1633  b( 1, 2 ) = one
1634  b( 2, 2 ) = one
1635  ib = 1
1636  jb = 1
1637  CALL pb_descset2( descb, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1638 *
1639 * Set default values for distributed matrix C
1640 *
1641  c( 1, 1 ) = one
1642  c( 2, 1 ) = one
1643  c( 1, 2 ) = one
1644  c( 2, 2 ) = one
1645  ic = 1
1646  jc = 1
1647  CALL pb_descset2( descc, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1648 *
1649 * Set default values for distributed matrix X
1650 *
1651  x( 1 ) = one
1652  x( 2 ) = one
1653  ix = 1
1654  jx = 1
1655  CALL pb_descset2( descx, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1656  incx = 1
1657 *
1658 * Set default values for distributed matrix Y
1659 *
1660  y( 1 ) = one
1661  y( 2 ) = one
1662  iy = 1
1663  jy = 1
1664  CALL pb_descset2( descy, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1665  incy = 1
1666 *
1667  RETURN
1668 *
1669 * End of PSSETPBLAS
1670 *
1671  END
1672  SUBROUTINE pschkmat( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
1673  $ ARGPOS )
1675 * -- PBLAS test routine (version 2.0) --
1676 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1677 * and University of California, Berkeley.
1678 * April 1, 1998
1679 *
1680 * .. Scalar Arguments ..
1681  CHARACTER*1 ARGNAM
1682  INTEGER ARGPOS, ICTXT, NOUT, SCODE
1683 * ..
1684 * .. Array Arguments ..
1685  CHARACTER*(*) SNAME
1686 * ..
1687 * .. Subroutine Arguments ..
1688  EXTERNAL subptr
1689 * ..
1690 *
1691 * Purpose
1692 * =======
1693 *
1694 * PSCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine.
1695 *
1696 * Notes
1697 * =====
1698 *
1699 * A description vector is associated with each 2D block-cyclicly dis-
1700 * tributed matrix. This vector stores the information required to
1701 * establish the mapping between a matrix entry and its corresponding
1702 * process and memory location.
1703 *
1704 * In the following comments, the character _ should be read as
1705 * "of the distributed matrix". Let A be a generic term for any 2D
1706 * block cyclicly distributed matrix. Its description vector is DESCA:
1707 *
1708 * NOTATION STORED IN EXPLANATION
1709 * ---------------- --------------- ------------------------------------
1710 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1711 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1712 * the NPROW x NPCOL BLACS process grid
1713 * A is distributed over. The context
1714 * itself is global, but the handle
1715 * (the integer value) may vary.
1716 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1717 * ted matrix A, M_A >= 0.
1718 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1719 * buted matrix A, N_A >= 0.
1720 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1721 * block of the matrix A, IMB_A > 0.
1722 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1723 * left block of the matrix A,
1724 * INB_A > 0.
1725 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1726 * bute the last M_A-IMB_A rows of A,
1727 * MB_A > 0.
1728 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1729 * bute the last N_A-INB_A columns of
1730 * A, NB_A > 0.
1731 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1732 * row of the matrix A is distributed,
1733 * NPROW > RSRC_A >= 0.
1734 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1735 * first column of A is distributed.
1736 * NPCOL > CSRC_A >= 0.
1737 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1738 * array storing the local blocks of
1739 * the distributed matrix A,
1740 * IF( Lc( 1, N_A ) > 0 )
1741 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1742 * ELSE
1743 * LLD_A >= 1.
1744 *
1745 * Let K be the number of rows of a matrix A starting at the global in-
1746 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1747 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1748 * receive if these K rows were distributed over NPROW processes. If K
1749 * is the number of columns of a matrix A starting at the global index
1750 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1751 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1752 * these K columns were distributed over NPCOL processes.
1753 *
1754 * The values of Lr() and Lc() may be determined via a call to the func-
1755 * tion PB_NUMROC:
1756 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1757 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1758 *
1759 * Arguments
1760 * =========
1761 *
1762 * ICTXT (local input) INTEGER
1763 * On entry, ICTXT specifies the BLACS context handle, indica-
1764 * ting the global context of the operation. The context itself
1765 * is global, but the value of ICTXT is local.
1766 *
1767 * NOUT (global input) INTEGER
1768 * On entry, NOUT specifies the unit number for the output file.
1769 * When NOUT is 6, output to screen, when NOUT is 0, output to
1770 * stderr. NOUT is only defined for process 0.
1771 *
1772 * SUBPTR (global input) SUBROUTINE
1773 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1774 * EXTERNAL in the calling subroutine.
1775 *
1776 * SCODE (global input) INTEGER
1777 * On entry, SCODE specifies the calling sequence code.
1778 *
1779 * SNAME (global input) CHARACTER*(*)
1780 * On entry, SNAME specifies the subroutine name calling this
1781 * subprogram.
1782 *
1783 * ARGNAM (global input) CHARACTER*(*)
1784 * On entry, ARGNAM specifies the name of the matrix or vector
1785 * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one
1786 * wants to check a matrix, and 'X' or 'Y' for a vector.
1787 *
1788 * ARGPOS (global input) INTEGER
1789 * On entry, ARGPOS indicates the position of the first argument
1790 * of the matrix (or vector) ARGNAM.
1791 *
1792 * -- Written on April 1, 1998 by
1793 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1794 *
1795 * =====================================================================
1796 *
1797 * .. Parameters ..
1798  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1799  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1800  $ RSRC_
1801  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1802  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1803  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1804  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1805  INTEGER DESCMULT
1806  PARAMETER ( DESCMULT = 100 )
1807 * ..
1808 * .. Local Scalars ..
1809  INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL
1810 * ..
1811 * .. External Subroutines ..
1812  EXTERNAL blacs_gridinfo, pchkpbe, pscallsub, pssetpblas
1813 * ..
1814 * .. External Functions ..
1815  LOGICAL LSAME
1816  EXTERNAL LSAME
1817 * ..
1818 * .. Common Blocks ..
1819  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1820  $ JC, JX, JY
1821  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1822  $ descx( dlen_ ), descy( dlen_ )
1823  COMMON /pblasd/desca, descb, descc, descx, descy
1824  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1825  $ ja, jb, jc, jx, jy
1826 * ..
1827 * .. Executable Statements ..
1828 *
1829  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1830 *
1831  IF( lsame( argnam, 'A' ) ) THEN
1832 *
1833 * Check IA. Set all other OK, bad IA
1834 *
1835  CALL pssetpblas( ictxt )
1836  ia = -1
1837  infot = argpos + 1
1838  CALL pscallsub( subptr, scode )
1839  CALL pchkpbe( ictxt, nout, sname, infot )
1840 *
1841 * Check JA. Set all other OK, bad JA
1842 *
1843  CALL pssetpblas( ictxt )
1844  ja = -1
1845  infot = argpos + 2
1846  CALL pscallsub( subptr, scode )
1847  CALL pchkpbe( ictxt, nout, sname, infot )
1848 *
1849 * Check DESCA. Set all other OK, bad DESCA
1850 *
1851  DO 10 i = 1, dlen_
1852 *
1853 * Set I'th entry of DESCA to incorrect value, rest ok.
1854 *
1855  CALL pssetpblas( ictxt )
1856  desca( i ) = -2
1857  infot = ( ( argpos + 3 ) * descmult ) + i
1858  CALL pscallsub( subptr, scode )
1859  CALL pchkpbe( ictxt, nout, sname, infot )
1860 *
1861 * Extra tests for RSRCA, CSRCA, LDA
1862 *
1863  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1864  $ ( i.EQ.lld_ ) ) THEN
1865 *
1866  CALL pssetpblas( ictxt )
1867 *
1868 * Test RSRCA >= NPROW
1869 *
1870  IF( i.EQ.rsrc_ )
1871  $ desca( i ) = nprow
1872 *
1873 * Test CSRCA >= NPCOL
1874 *
1875  IF( i.EQ.csrc_ )
1876  $ desca( i ) = npcol
1877 *
1878 * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1879 *
1880  IF( i.EQ.lld_ ) THEN
1881  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1882  desca( i ) = 1
1883  ELSE
1884  desca( i ) = 0
1885  END IF
1886  END IF
1887 *
1888  infot = ( ( argpos + 3 ) * descmult ) + i
1889  CALL pscallsub( subptr, scode )
1890  CALL pchkpbe( ictxt, nout, sname, infot )
1891 *
1892  END IF
1893 *
1894  10 CONTINUE
1895 *
1896  ELSE IF( lsame( argnam, 'B' ) ) THEN
1897 *
1898 * Check IB. Set all other OK, bad IB
1899 *
1900  CALL pssetpblas( ictxt )
1901  ib = -1
1902  infot = argpos + 1
1903  CALL pscallsub( subptr, scode )
1904  CALL pchkpbe( ictxt, nout, sname, infot )
1905 *
1906 * Check JB. Set all other OK, bad JB
1907 *
1908  CALL pssetpblas( ictxt )
1909  jb = -1
1910  infot = argpos + 2
1911  CALL pscallsub( subptr, scode )
1912  CALL pchkpbe( ictxt, nout, sname, infot )
1913 *
1914 * Check DESCB. Set all other OK, bad DESCB
1915 *
1916  DO 20 i = 1, dlen_
1917 *
1918 * Set I'th entry of DESCB to incorrect value, rest ok.
1919 *
1920  CALL pssetpblas( ictxt )
1921  descb( i ) = -2
1922  infot = ( ( argpos + 3 ) * descmult ) + i
1923  CALL pscallsub( subptr, scode )
1924  CALL pchkpbe( ictxt, nout, sname, infot )
1925 *
1926 * Extra tests for RSRCB, CSRCB, LDB
1927 *
1928  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1929  $ ( i.EQ.lld_ ) ) THEN
1930 *
1931  CALL pssetpblas( ictxt )
1932 *
1933 * Test RSRCB >= NPROW
1934 *
1935  IF( i.EQ.rsrc_ )
1936  $ descb( i ) = nprow
1937 *
1938 * Test CSRCB >= NPCOL
1939 *
1940  IF( i.EQ.csrc_ )
1941  $ descb( i ) = npcol
1942 *
1943 * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1944 *
1945  IF( i.EQ.lld_ ) THEN
1946  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1947  descb( i ) = 1
1948  ELSE
1949  descb( i ) = 0
1950  END IF
1951  END IF
1952 *
1953  infot = ( ( argpos + 3 ) * descmult ) + i
1954  CALL pscallsub( subptr, scode )
1955  CALL pchkpbe( ictxt, nout, sname, infot )
1956 *
1957  END IF
1958 *
1959  20 CONTINUE
1960 *
1961  ELSE IF( lsame( argnam, 'C' ) ) THEN
1962 *
1963 * Check IC. Set all other OK, bad IC
1964 *
1965  CALL pssetpblas( ictxt )
1966  ic = -1
1967  infot = argpos + 1
1968  CALL pscallsub( subptr, scode )
1969  CALL pchkpbe( ictxt, nout, sname, infot )
1970 *
1971 * Check JC. Set all other OK, bad JC
1972 *
1973  CALL pssetpblas( ictxt )
1974  jc = -1
1975  infot = argpos + 2
1976  CALL pscallsub( subptr, scode )
1977  CALL pchkpbe( ictxt, nout, sname, infot )
1978 *
1979 * Check DESCC. Set all other OK, bad DESCC
1980 *
1981  DO 30 i = 1, dlen_
1982 *
1983 * Set I'th entry of DESCC to incorrect value, rest ok.
1984 *
1985  CALL pssetpblas( ictxt )
1986  descc( i ) = -2
1987  infot = ( ( argpos + 3 ) * descmult ) + i
1988  CALL pscallsub( subptr, scode )
1989  CALL pchkpbe( ictxt, nout, sname, infot )
1990 *
1991 * Extra tests for RSRCC, CSRCC, LDC
1992 *
1993  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1994  $ ( i.EQ.lld_ ) ) THEN
1995 *
1996  CALL pssetpblas( ictxt )
1997 *
1998 * Test RSRCC >= NPROW
1999 *
2000  IF( i.EQ.rsrc_ )
2001  $ descc( i ) = nprow
2002 *
2003 * Test CSRCC >= NPCOL
2004 *
2005  IF( i.EQ.csrc_ )
2006  $ descc( i ) = npcol
2007 *
2008 * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2009 *
2010  IF( i.EQ.lld_ ) THEN
2011  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2012  descc( i ) = 1
2013  ELSE
2014  descc( i ) = 0
2015  END IF
2016  END IF
2017 *
2018  infot = ( ( argpos + 3 ) * descmult ) + i
2019  CALL pscallsub( subptr, scode )
2020  CALL pchkpbe( ictxt, nout, sname, infot )
2021 *
2022  END IF
2023 *
2024  30 CONTINUE
2025 *
2026  ELSE IF( lsame( argnam, 'X' ) ) THEN
2027 *
2028 * Check IX. Set all other OK, bad IX
2029 *
2030  CALL pssetpblas( ictxt )
2031  ix = -1
2032  infot = argpos + 1
2033  CALL pscallsub( subptr, scode )
2034  CALL pchkpbe( ictxt, nout, sname, infot )
2035 *
2036 * Check JX. Set all other OK, bad JX
2037 *
2038  CALL pssetpblas( ictxt )
2039  jx = -1
2040  infot = argpos + 2
2041  CALL pscallsub( subptr, scode )
2042  CALL pchkpbe( ictxt, nout, sname, infot )
2043 *
2044 * Check DESCX. Set all other OK, bad DESCX
2045 *
2046  DO 40 i = 1, dlen_
2047 *
2048 * Set I'th entry of DESCX to incorrect value, rest ok.
2049 *
2050  CALL pssetpblas( ictxt )
2051  descx( i ) = -2
2052  infot = ( ( argpos + 3 ) * descmult ) + i
2053  CALL pscallsub( subptr, scode )
2054  CALL pchkpbe( ictxt, nout, sname, infot )
2055 *
2056 * Extra tests for RSRCX, CSRCX, LDX
2057 *
2058  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2059  $ ( i.EQ.lld_ ) ) THEN
2060 *
2061  CALL pssetpblas( ictxt )
2062 *
2063 * Test RSRCX >= NPROW
2064 *
2065  IF( i.EQ.rsrc_ )
2066  $ descx( i ) = nprow
2067 *
2068 * Test CSRCX >= NPCOL
2069 *
2070  IF( i.EQ.csrc_ )
2071  $ descx( i ) = npcol
2072 *
2073 * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2074 *
2075  IF( i.EQ.lld_ ) THEN
2076  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2077  descx( i ) = 1
2078  ELSE
2079  descx( i ) = 0
2080  END IF
2081  END IF
2082 *
2083  infot = ( ( argpos + 3 ) * descmult ) + i
2084  CALL pscallsub( subptr, scode )
2085  CALL pchkpbe( ictxt, nout, sname, infot )
2086 *
2087  END IF
2088 *
2089  40 CONTINUE
2090 *
2091 * Check INCX. Set all other OK, bad INCX
2092 *
2093  CALL pssetpblas( ictxt )
2094  incx = -1
2095  infot = argpos + 4
2096  CALL pscallsub( subptr, scode )
2097  CALL pchkpbe( ictxt, nout, sname, infot )
2098 *
2099  ELSE
2100 *
2101 * Check IY. Set all other OK, bad IY
2102 *
2103  CALL pssetpblas( ictxt )
2104  iy = -1
2105  infot = argpos + 1
2106  CALL pscallsub( subptr, scode )
2107  CALL pchkpbe( ictxt, nout, sname, infot )
2108 *
2109 * Check JY. Set all other OK, bad JY
2110 *
2111  CALL pssetpblas( ictxt )
2112  jy = -1
2113  infot = argpos + 2
2114  CALL pscallsub( subptr, scode )
2115  CALL pchkpbe( ictxt, nout, sname, infot )
2116 *
2117 * Check DESCY. Set all other OK, bad DESCY
2118 *
2119  DO 50 i = 1, dlen_
2120 *
2121 * Set I'th entry of DESCY to incorrect value, rest ok.
2122 *
2123  CALL pssetpblas( ictxt )
2124  descy( i ) = -2
2125  infot = ( ( argpos + 3 ) * descmult ) + i
2126  CALL pscallsub( subptr, scode )
2127  CALL pchkpbe( ictxt, nout, sname, infot )
2128 *
2129 * Extra tests for RSRCY, CSRCY, LDY
2130 *
2131  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2132  $ ( i.EQ.lld_ ) ) THEN
2133 *
2134  CALL pssetpblas( ictxt )
2135 *
2136 * Test RSRCY >= NPROW
2137 *
2138  IF( i.EQ.rsrc_ )
2139  $ descy( i ) = nprow
2140 *
2141 * Test CSRCY >= NPCOL
2142 *
2143  IF( i.EQ.csrc_ )
2144  $ descy( i ) = npcol
2145 *
2146 * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2147 *
2148  IF( i.EQ.lld_ ) THEN
2149  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2150  descy( i ) = 1
2151  ELSE
2152  descy( i ) = 0
2153  END IF
2154  END IF
2155 *
2156  infot = ( ( argpos + 3 ) * descmult ) + i
2157  CALL pscallsub( subptr, scode )
2158  CALL pchkpbe( ictxt, nout, sname, infot )
2159 *
2160  END IF
2161 *
2162  50 CONTINUE
2163 *
2164 * Check INCY. Set all other OK, bad INCY
2165 *
2166  CALL pssetpblas( ictxt )
2167  incy = -1
2168  infot = argpos + 4
2169  CALL pscallsub( subptr, scode )
2170  CALL pchkpbe( ictxt, nout, sname, infot )
2171 *
2172  END IF
2173 *
2174  RETURN
2175 *
2176 * End of PSCHKMAT
2177 *
2178  END
2179  SUBROUTINE pscallsub( SUBPTR, SCODE )
2181 * -- PBLAS test routine (version 2.0) --
2182 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2183 * and University of California, Berkeley.
2184 * April 1, 1998
2185 *
2186 * .. Scalar Arguments ..
2187  INTEGER SCODE
2188 * ..
2189 * .. Subroutine Arguments ..
2190  EXTERNAL subptr
2191 * ..
2192 *
2193 * Purpose
2194 * =======
2195 *
2196 * PSCALLSUB calls the subroutine SUBPTR with the calling sequence iden-
2197 * tified by SCODE.
2198 *
2199 * Notes
2200 * =====
2201 *
2202 * A description vector is associated with each 2D block-cyclicly dis-
2203 * tributed matrix. This vector stores the information required to
2204 * establish the mapping between a matrix entry and its corresponding
2205 * process and memory location.
2206 *
2207 * In the following comments, the character _ should be read as
2208 * "of the distributed matrix". Let A be a generic term for any 2D
2209 * block cyclicly distributed matrix. Its description vector is DESCA:
2210 *
2211 * NOTATION STORED IN EXPLANATION
2212 * ---------------- --------------- ------------------------------------
2213 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2214 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2215 * the NPROW x NPCOL BLACS process grid
2216 * A is distributed over. The context
2217 * itself is global, but the handle
2218 * (the integer value) may vary.
2219 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2220 * ted matrix A, M_A >= 0.
2221 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2222 * buted matrix A, N_A >= 0.
2223 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2224 * block of the matrix A, IMB_A > 0.
2225 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2226 * left block of the matrix A,
2227 * INB_A > 0.
2228 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2229 * bute the last M_A-IMB_A rows of A,
2230 * MB_A > 0.
2231 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2232 * bute the last N_A-INB_A columns of
2233 * A, NB_A > 0.
2234 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2235 * row of the matrix A is distributed,
2236 * NPROW > RSRC_A >= 0.
2237 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2238 * first column of A is distributed.
2239 * NPCOL > CSRC_A >= 0.
2240 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2241 * array storing the local blocks of
2242 * the distributed matrix A,
2243 * IF( Lc( 1, N_A ) > 0 )
2244 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2245 * ELSE
2246 * LLD_A >= 1.
2247 *
2248 * Let K be the number of rows of a matrix A starting at the global in-
2249 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2250 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2251 * receive if these K rows were distributed over NPROW processes. If K
2252 * is the number of columns of a matrix A starting at the global index
2253 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2254 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2255 * these K columns were distributed over NPCOL processes.
2256 *
2257 * The values of Lr() and Lc() may be determined via a call to the func-
2258 * tion PB_NUMROC:
2259 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2260 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2261 *
2262 * Arguments
2263 * =========
2264 *
2265 * SUBPTR (global input) SUBROUTINE
2266 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
2267 * EXTERNAL in the calling subroutine.
2268 *
2269 * SCODE (global input) INTEGER
2270 * On entry, SCODE specifies the calling sequence code.
2271 *
2272 * Calling sequence encodings
2273 * ==========================
2274 *
2275 * code Formal argument list Examples
2276 *
2277 * 11 (n, v1,v2) _SWAP, _COPY
2278 * 12 (n,s1, v1 ) _SCAL, _SCAL
2279 * 13 (n,s1, v1,v2) _AXPY, _DOT_
2280 * 14 (n,s1,i1,v1 ) _AMAX
2281 * 15 (n,u1, v1 ) _ASUM, _NRM2
2282 *
2283 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
2284 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
2285 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
2286 * 24 ( m,n,s1,v1,v2,m1) _GER_
2287 * 25 (uplo, n,s1,v1, m1) _SYR
2288 * 26 (uplo, n,u1,v1, m1) _HER
2289 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
2290 *
2291 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
2292 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
2293 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
2294 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
2295 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
2296 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
2297 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
2298 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
2299 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
2300 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
2301 *
2302 * -- Written on April 1, 1998 by
2303 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2304 *
2305 * =====================================================================
2306 *
2307 * .. Parameters ..
2308  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2309  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2310  $ RSRC_
2311  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
2312  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2313  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2314  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2315 * ..
2316 * .. Common Blocks ..
2317  CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
2318  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
2319  $ JC, JX, JY, KDIM, MDIM, NDIM
2320  REAL USCLR, SCLR
2321  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
2322  $ DESCX( DLEN_ ), DESCY( DLEN_ )
2323  REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
2324  COMMON /pblasc/diag, side, transa, transb, uplo
2325  COMMON /pblasd/desca, descb, descc, descx, descy
2326  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
2327  $ ja, jb, jc, jx, jy
2328  COMMON /pblasm/a, b, c
2329  COMMON /pblasn/kdim, mdim, ndim
2330  COMMON /pblass/sclr, usclr
2331  COMMON /pblasv/x, y
2332 * ..
2333 * .. Executable Statements ..
2334 *
2335 * Level 1 PBLAS
2336 *
2337  IF( scode.EQ.11 ) THEN
2338 *
2339  CALL subptr( ndim, x, ix, jx, descx, incx, y, iy, jy, descy,
2340  $ incy )
2341 *
2342  ELSE IF( scode.EQ.12 ) THEN
2343 *
2344  CALL subptr( ndim, sclr, x, ix, jx, descx, incx )
2345 *
2346  ELSE IF( scode.EQ.13 ) THEN
2347 *
2348  CALL subptr( ndim, sclr, x, ix, jx, descx, incx, y, iy, jy,
2349  $ descy, incy )
2350 *
2351  ELSE IF( scode.EQ.14 ) THEN
2352 *
2353  CALL subptr( ndim, sclr, isclr, x, ix, jx, descx, incx )
2354 *
2355  ELSE IF( scode.EQ.15 ) THEN
2356 *
2357  CALL subptr( ndim, usclr, x, ix, jx, descx, incx )
2358 *
2359 * Level 2 PBLAS
2360 *
2361  ELSE IF( scode.EQ.21 ) THEN
2362 *
2363  CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, x, ix,
2364  $ jx, descx, incx, sclr, y, iy, jy, descy, incy )
2365 *
2366  ELSE IF( scode.EQ.22 ) THEN
2367 *
2368  CALL subptr( uplo, ndim, sclr, a, ia, ja, desca, x, ix, jx,
2369  $ descx, incx, sclr, y, iy, jy, descy, incy )
2370 *
2371  ELSE IF( scode.EQ.23 ) THEN
2372 *
2373  CALL subptr( uplo, transa, diag, ndim, a, ia, ja, desca, x, ix,
2374  $ jx, descx, incx )
2375 *
2376  ELSE IF( scode.EQ.24 ) THEN
2377 *
2378  CALL subptr( mdim, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2379  $ jy, descy, incy, a, ia, ja, desca )
2380 *
2381  ELSE IF( scode.EQ.25 ) THEN
2382 *
2383  CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, a, ia,
2384  $ ja, desca )
2385 *
2386  ELSE IF( scode.EQ.26 ) THEN
2387 *
2388  CALL subptr( uplo, ndim, usclr, x, ix, jx, descx, incx, a, ia,
2389  $ ja, desca )
2390 *
2391  ELSE IF( scode.EQ.27 ) THEN
2392 *
2393  CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2394  $ jy, descy, incy, a, ia, ja, desca )
2395 *
2396 * Level 3 PBLAS
2397 *
2398  ELSE IF( scode.EQ.31 ) THEN
2399 *
2400  CALL subptr( transa, transb, mdim, ndim, kdim, sclr, a, ia, ja,
2401  $ desca, b, ib, jb, descb, sclr, c, ic, jc, descc )
2402 *
2403  ELSE IF( scode.EQ.32 ) THEN
2404 *
2405  CALL subptr( side, uplo, mdim, ndim, sclr, a, ia, ja, desca, b,
2406  $ ib, jb, descb, sclr, c, ic, jc, descc )
2407 *
2408  ELSE IF( scode.EQ.33 ) THEN
2409 *
2410  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2411  $ sclr, c, ic, jc, descc )
2412 *
2413  ELSE IF( scode.EQ.34 ) THEN
2414 *
2415  CALL subptr( uplo, transa, ndim, kdim, usclr, a, ia, ja, desca,
2416  $ usclr, c, ic, jc, descc )
2417 *
2418  ELSE IF( scode.EQ.35 ) THEN
2419 *
2420  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2421  $ b, ib, jb, descb, sclr, c, ic, jc, descc )
2422 *
2423  ELSE IF( scode.EQ.36 ) THEN
2424 *
2425  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2426  $ b, ib, jb, descb, usclr, c, ic, jc, descc )
2427 *
2428  ELSE IF( scode.EQ.37 ) THEN
2429 *
2430  CALL subptr( mdim, ndim, sclr, a, ia, ja, desca, sclr, c, ic,
2431  $ jc, descc )
2432 *
2433  ELSE IF( scode.EQ.38 ) THEN
2434 *
2435  CALL subptr( side, uplo, transa, diag, mdim, ndim, sclr, a, ia,
2436  $ ja, desca, b, ib, jb, descb )
2437 *
2438  ELSE IF( scode.EQ.39 ) THEN
2439 *
2440  CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, sclr,
2441  $ c, ic, jc, descc )
2442 *
2443  ELSE IF( scode.EQ.40 ) THEN
2444 *
2445  CALL subptr( uplo, transa, mdim, ndim, sclr, a, ia, ja, desca,
2446  $ sclr, c, ic, jc, descc )
2447 *
2448  END IF
2449 *
2450  RETURN
2451 *
2452 * End of PSCALLSUB
2453 *
2454  END
2455  SUBROUTINE pserrset( ERR, ERRMAX, XTRUE, X )
2457 * -- PBLAS test routine (version 2.0) --
2458 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2459 * and University of California, Berkeley.
2460 * April 1, 1998
2461 *
2462 * .. Scalar Arguments ..
2463  REAL ERR, ERRMAX, X, XTRUE
2464 * ..
2465 *
2466 * Purpose
2467 * =======
2468 *
2469 * PSERRSET computes the absolute difference ERR = |XTRUE - X| and com-
2470 * pares it with zero. ERRMAX accumulates the absolute error difference.
2471 *
2472 * Notes
2473 * =====
2474 *
2475 * A description vector is associated with each 2D block-cyclicly dis-
2476 * tributed matrix. This vector stores the information required to
2477 * establish the mapping between a matrix entry and its corresponding
2478 * process and memory location.
2479 *
2480 * In the following comments, the character _ should be read as
2481 * "of the distributed matrix". Let A be a generic term for any 2D
2482 * block cyclicly distributed matrix. Its description vector is DESCA:
2483 *
2484 * NOTATION STORED IN EXPLANATION
2485 * ---------------- --------------- ------------------------------------
2486 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2487 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2488 * the NPROW x NPCOL BLACS process grid
2489 * A is distributed over. The context
2490 * itself is global, but the handle
2491 * (the integer value) may vary.
2492 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2493 * ted matrix A, M_A >= 0.
2494 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2495 * buted matrix A, N_A >= 0.
2496 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2497 * block of the matrix A, IMB_A > 0.
2498 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2499 * left block of the matrix A,
2500 * INB_A > 0.
2501 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2502 * bute the last M_A-IMB_A rows of A,
2503 * MB_A > 0.
2504 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2505 * bute the last N_A-INB_A columns of
2506 * A, NB_A > 0.
2507 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2508 * row of the matrix A is distributed,
2509 * NPROW > RSRC_A >= 0.
2510 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2511 * first column of A is distributed.
2512 * NPCOL > CSRC_A >= 0.
2513 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2514 * array storing the local blocks of
2515 * the distributed matrix A,
2516 * IF( Lc( 1, N_A ) > 0 )
2517 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2518 * ELSE
2519 * LLD_A >= 1.
2520 *
2521 * Let K be the number of rows of a matrix A starting at the global in-
2522 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2523 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2524 * receive if these K rows were distributed over NPROW processes. If K
2525 * is the number of columns of a matrix A starting at the global index
2526 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2527 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2528 * these K columns were distributed over NPCOL processes.
2529 *
2530 * The values of Lr() and Lc() may be determined via a call to the func-
2531 * tion PB_NUMROC:
2532 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2533 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2534 *
2535 * Arguments
2536 * =========
2537 *
2538 * ERR (local output) REAL
2539 * On exit, ERR specifies the absolute difference |XTRUE - X|.
2540 *
2541 * ERRMAX (local input/local output) REAL
2542 * On entry, ERRMAX specifies a previously computed error. On
2543 * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ).
2544 *
2545 * XTRUE (local input) REAL
2546 * On entry, XTRUE specifies the true value.
2547 *
2548 * X (local input) REAL
2549 * On entry, X specifies the value to be compared to XTRUE.
2550 *
2551 * -- Written on April 1, 1998 by
2552 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2553 *
2554 * =====================================================================
2555 *
2556 * .. External Functions ..
2557  REAL PSDIFF
2558  EXTERNAL PSDIFF
2559 * ..
2560 * .. Intrinsic Functions ..
2561  INTRINSIC abs, max
2562 * ..
2563 * .. Executable Statements ..
2564 *
2565  err = abs( psdiff( xtrue, x ) )
2566 *
2567  errmax = max( errmax, err )
2568 *
2569  RETURN
2570 *
2571 * End of PSERRSET
2572 *
2573  END
2574  SUBROUTINE pschkvin( ERRMAX, N, X, PX, IX, JX, DESCX, INCX,
2575  $ INFO )
2577 * -- PBLAS test routine (version 2.0) --
2578 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2579 * and University of California, Berkeley.
2580 * April 1, 1998
2581 *
2582 * .. Scalar Arguments ..
2583  INTEGER INCX, INFO, IX, JX, N
2584  REAL ERRMAX
2585 * ..
2586 * .. Array Arguments ..
2587  INTEGER DESCX( * )
2588  REAL PX( * ), X( * )
2589 * ..
2590 *
2591 * Purpose
2592 * =======
2593 *
2594 * PSCHKVIN checks that the submatrix sub( PX ) remained unchanged. The
2595 * local array entries are compared element by element, and their dif-
2596 * ference is tested against 0.0 as well as the epsilon machine. Notice
2597 * that this difference should be numerically exactly the zero machine,
2598 * but because of the possible fluctuation of some of the data we flag-
2599 * ged differently a difference less than twice the epsilon machine. The
2600 * largest error is also returned.
2601 *
2602 * Notes
2603 * =====
2604 *
2605 * A description vector is associated with each 2D block-cyclicly dis-
2606 * tributed matrix. This vector stores the information required to
2607 * establish the mapping between a matrix entry and its corresponding
2608 * process and memory location.
2609 *
2610 * In the following comments, the character _ should be read as
2611 * "of the distributed matrix". Let A be a generic term for any 2D
2612 * block cyclicly distributed matrix. Its description vector is DESCA:
2613 *
2614 * NOTATION STORED IN EXPLANATION
2615 * ---------------- --------------- ------------------------------------
2616 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2617 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2618 * the NPROW x NPCOL BLACS process grid
2619 * A is distributed over. The context
2620 * itself is global, but the handle
2621 * (the integer value) may vary.
2622 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2623 * ted matrix A, M_A >= 0.
2624 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2625 * buted matrix A, N_A >= 0.
2626 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2627 * block of the matrix A, IMB_A > 0.
2628 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2629 * left block of the matrix A,
2630 * INB_A > 0.
2631 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2632 * bute the last M_A-IMB_A rows of A,
2633 * MB_A > 0.
2634 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2635 * bute the last N_A-INB_A columns of
2636 * A, NB_A > 0.
2637 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2638 * row of the matrix A is distributed,
2639 * NPROW > RSRC_A >= 0.
2640 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2641 * first column of A is distributed.
2642 * NPCOL > CSRC_A >= 0.
2643 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2644 * array storing the local blocks of
2645 * the distributed matrix A,
2646 * IF( Lc( 1, N_A ) > 0 )
2647 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2648 * ELSE
2649 * LLD_A >= 1.
2650 *
2651 * Let K be the number of rows of a matrix A starting at the global in-
2652 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2653 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2654 * receive if these K rows were distributed over NPROW processes. If K
2655 * is the number of columns of a matrix A starting at the global index
2656 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2657 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2658 * these K columns were distributed over NPCOL processes.
2659 *
2660 * The values of Lr() and Lc() may be determined via a call to the func-
2661 * tion PB_NUMROC:
2662 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2663 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2664 *
2665 * Arguments
2666 * =========
2667 *
2668 * ERRMAX (global output) REAL
2669 * On exit, ERRMAX specifies the largest absolute element-wise
2670 * difference between sub( X ) and sub( PX ).
2671 *
2672 * N (global input) INTEGER
2673 * On entry, N specifies the length of the subvector operand
2674 * sub( X ). N must be at least zero.
2675 *
2676 * X (local input) REAL array
2677 * On entry, X is an array of dimension (DESCX( M_ ),*). This
2678 * array contains a local copy of the initial entire matrix PX.
2679 *
2680 * PX (local input) REAL array
2681 * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2682 * array contains the local entries of the matrix PX.
2683 *
2684 * IX (global input) INTEGER
2685 * On entry, IX specifies X's global row index, which points to
2686 * the beginning of the submatrix sub( X ).
2687 *
2688 * JX (global input) INTEGER
2689 * On entry, JX specifies X's global column index, which points
2690 * to the beginning of the submatrix sub( X ).
2691 *
2692 * DESCX (global and local input) INTEGER array
2693 * On entry, DESCX is an integer array of dimension DLEN_. This
2694 * is the array descriptor for the matrix X.
2695 *
2696 * INCX (global input) INTEGER
2697 * On entry, INCX specifies the global increment for the
2698 * elements of X. Only two values of INCX are supported in
2699 * this version, namely 1 and M_X. INCX must not be zero.
2700 *
2701 * INFO (global output) INTEGER
2702 * On exit, if INFO = 0, no error has been found,
2703 * If INFO > 0, the maximum abolute error found is in (0,eps],
2704 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
2705 *
2706 * -- Written on April 1, 1998 by
2707 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2708 *
2709 * =====================================================================
2710 *
2711 * .. Parameters ..
2712  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2713  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2714  $ RSRC_
2715  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
2716  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2717  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2718  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2719  REAL ZERO
2720  PARAMETER ( ZERO = 0.0e+0 )
2721 * ..
2722 * .. Local Scalars ..
2723  LOGICAL COLREP, ROWREP
2724  INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL,
2725  $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL,
2726  $ MYCOL, MYROW, NPCOL, NPROW
2727  REAL ERR, EPS
2728 * ..
2729 * .. External Subroutines ..
2730  EXTERNAL blacs_gridinfo, pb_infog2l, pserrset, sgamx2d
2731 * ..
2732 * .. External Functions ..
2733  REAL PSLAMCH
2734  EXTERNAL pslamch
2735 * ..
2736 * .. Intrinsic Functions ..
2737  INTRINSIC abs, max, min, mod
2738 * ..
2739 * .. Executable Statements ..
2740 *
2741  info = 0
2742  errmax = zero
2743 *
2744 * Quick return if possible
2745 *
2746  IF( n.LE.0 )
2747  $ RETURN
2748 *
2749  ictxt = descx( ctxt_ )
2750  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
2751 *
2752  eps = pslamch( ictxt, 'eps' )
2753 *
2754  CALL pb_infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix,
2755  $ jjx, ixrow, ixcol )
2756 *
2757  ldx = descx( m_ )
2758  ldpx = descx( lld_ )
2759  rowrep = ( ixrow.EQ.-1 )
2760  colrep = ( ixcol.EQ.-1 )
2761 *
2762  IF( n.EQ.1 ) THEN
2763 *
2764  IF( ( myrow.EQ.ixrow .OR. rowrep ) .AND.
2765  $ ( mycol.EQ.ixcol .OR. colrep ) )
2766  $ CALL pserrset( err, errmax, x( ix+(jx-1)*ldx ),
2767  $ px( iix+(jjx-1)*ldpx ) )
2768 *
2769  ELSE IF( incx.EQ.descx( m_ ) ) THEN
2770 *
2771 * sub( X ) is a row vector
2772 *
2773  jb = descx( inb_ ) - jx + 1
2774  IF( jb.LE.0 )
2775  $ jb = ( ( -jb ) / descx( nb_ ) + 1 ) * descx( nb_ ) + jb
2776  jb = min( jb, n )
2777  jn = jx + jb - 1
2778 *
2779  IF( myrow.EQ.ixrow .OR. rowrep ) THEN
2780 *
2781  icurcol = ixcol
2782  IF( mycol.EQ.icurcol .OR. colrep ) THEN
2783  DO 10 j = jx, jn
2784  CALL pserrset( err, errmax, x( ix+(j-1)*ldx ),
2785  $ px( iix+(jjx-1)*ldpx ) )
2786  jjx = jjx + 1
2787  10 CONTINUE
2788  END IF
2789  icurcol = mod( icurcol+1, npcol )
2790 *
2791  DO 30 j = jn+1, jx+n-1, descx( nb_ )
2792  jb = min( jx+n-j, descx( nb_ ) )
2793 *
2794  IF( mycol.EQ.icurcol .OR. colrep ) THEN
2795 *
2796  DO 20 kk = 0, jb-1
2797  CALL pserrset( err, errmax, x( ix+(j+kk-1)*ldx ),
2798  $ px( iix+(jjx+kk-1)*ldpx ) )
2799  20 CONTINUE
2800 *
2801  jjx = jjx + jb
2802 *
2803  END IF
2804 *
2805  icurcol = mod( icurcol+1, npcol )
2806 *
2807  30 CONTINUE
2808 *
2809  END IF
2810 *
2811  ELSE
2812 *
2813 * sub( X ) is a column vector
2814 *
2815  ib = descx( imb_ ) - ix + 1
2816  IF( ib.LE.0 )
2817  $ ib = ( ( -ib ) / descx( mb_ ) + 1 ) * descx( mb_ ) + ib
2818  ib = min( ib, n )
2819  in = ix + ib - 1
2820 *
2821  IF( mycol.EQ.ixcol .OR. colrep ) THEN
2822 *
2823  icurrow = ixrow
2824  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2825  DO 40 i = ix, in
2826  CALL pserrset( err, errmax, x( i+(jx-1)*ldx ),
2827  $ px( iix+(jjx-1)*ldpx ) )
2828  iix = iix + 1
2829  40 CONTINUE
2830  END IF
2831  icurrow = mod( icurrow+1, nprow )
2832 *
2833  DO 60 i = in+1, ix+n-1, descx( mb_ )
2834  ib = min( ix+n-i, descx( mb_ ) )
2835 *
2836  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2837 *
2838  DO 50 kk = 0, ib-1
2839  CALL pserrset( err, errmax, x( i+kk+(jx-1)*ldx ),
2840  $ px( iix+kk+(jjx-1)*ldpx ) )
2841  50 CONTINUE
2842 *
2843  iix = iix + ib
2844 *
2845  END IF
2846 *
2847  icurrow = mod( icurrow+1, nprow )
2848 *
2849  60 CONTINUE
2850 *
2851  END IF
2852 *
2853  END IF
2854 *
2855  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
2856  $ -1, -1 )
2857 *
2858  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
2859  info = 1
2860  ELSE IF( errmax.GT.eps ) THEN
2861  info = -1
2862  END IF
2863 *
2864  RETURN
2865 *
2866 * End of PSCHKVIN
2867 *
2868  END
2869  SUBROUTINE pschkvout( N, X, PX, IX, JX, DESCX, INCX, INFO )
2871 * -- PBLAS test routine (version 2.0) --
2872 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2873 * and University of California, Berkeley.
2874 * April 1, 1998
2875 *
2876 * .. Scalar Arguments ..
2877  INTEGER INCX, INFO, IX, JX, N
2878 * ..
2879 * .. Array Arguments ..
2880  INTEGER DESCX( * )
2881  REAL PX( * ), X( * )
2882 * ..
2883 *
2884 * Purpose
2885 * =======
2886 *
2887 * PSCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged.
2888 * The local array entries are compared element by element, and their
2889 * difference is tested against 0.0 as well as the epsilon machine. No-
2890 * tice that this difference should be numerically exactly the zero ma-
2891 * chine, but because of the possible movement of some of the data we
2892 * flagged differently a difference less than twice the epsilon machine.
2893 * The largest error is reported.
2894 *
2895 * Notes
2896 * =====
2897 *
2898 * A description vector is associated with each 2D block-cyclicly dis-
2899 * tributed matrix. This vector stores the information required to
2900 * establish the mapping between a matrix entry and its corresponding
2901 * process and memory location.
2902 *
2903 * In the following comments, the character _ should be read as
2904 * "of the distributed matrix". Let A be a generic term for any 2D
2905 * block cyclicly distributed matrix. Its description vector is DESCA:
2906 *
2907 * NOTATION STORED IN EXPLANATION
2908 * ---------------- --------------- ------------------------------------
2909 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2910 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2911 * the NPROW x NPCOL BLACS process grid
2912 * A is distributed over. The context
2913 * itself is global, but the handle
2914 * (the integer value) may vary.
2915 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2916 * ted matrix A, M_A >= 0.
2917 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2918 * buted matrix A, N_A >= 0.
2919 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2920 * block of the matrix A, IMB_A > 0.
2921 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2922 * left block of the matrix A,
2923 * INB_A > 0.
2924 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2925 * bute the last M_A-IMB_A rows of A,
2926 * MB_A > 0.
2927 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2928 * bute the last N_A-INB_A columns of
2929 * A, NB_A > 0.
2930 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2931 * row of the matrix A is distributed,
2932 * NPROW > RSRC_A >= 0.
2933 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2934 * first column of A is distributed.
2935 * NPCOL > CSRC_A >= 0.
2936 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2937 * array storing the local blocks of
2938 * the distributed matrix A,
2939 * IF( Lc( 1, N_A ) > 0 )
2940 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2941 * ELSE
2942 * LLD_A >= 1.
2943 *
2944 * Let K be the number of rows of a matrix A starting at the global in-
2945 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2946 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2947 * receive if these K rows were distributed over NPROW processes. If K
2948 * is the number of columns of a matrix A starting at the global index
2949 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2950 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2951 * these K columns were distributed over NPCOL processes.
2952 *
2953 * The values of Lr() and Lc() may be determined via a call to the func-
2954 * tion PB_NUMROC:
2955 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2956 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2957 *
2958 * Arguments
2959 * =========
2960 *
2961 * N (global input) INTEGER
2962 * On entry, N specifies the length of the subvector operand
2963 * sub( X ). N must be at least zero.
2964 *
2965 * X (local input) REAL array
2966 * On entry, X is an array of dimension (DESCX( M_ ),*). This
2967 * array contains a local copy of the initial entire matrix PX.
2968 *
2969 * PX (local input) REAL array
2970 * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2971 * array contains the local entries of the matrix PX.
2972 *
2973 * IX (global input) INTEGER
2974 * On entry, IX specifies X's global row index, which points to
2975 * the beginning of the submatrix sub( X ).
2976 *
2977 * JX (global input) INTEGER
2978 * On entry, JX specifies X's global column index, which points
2979 * to the beginning of the submatrix sub( X ).
2980 *
2981 * DESCX (global and local input) INTEGER array
2982 * On entry, DESCX is an integer array of dimension DLEN_. This
2983 * is the array descriptor for the matrix X.
2984 *
2985 * INCX (global input) INTEGER
2986 * On entry, INCX specifies the global increment for the
2987 * elements of X. Only two values of INCX are supported in
2988 * this version, namely 1 and M_X. INCX must not be zero.
2989 *
2990 * INFO (global output) INTEGER
2991 * On exit, if INFO = 0, no error has been found,
2992 * If INFO > 0, the maximum abolute error found is in (0,eps],
2993 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
2994 *
2995 * -- Written on April 1, 1998 by
2996 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2997 *
2998 * =====================================================================
2999 *
3000 * .. Parameters ..
3001  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3002  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3003  $ RSRC_
3004  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3005  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3006  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3007  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3008  REAL ZERO
3009  PARAMETER ( ZERO = 0.0e+0 )
3010 * ..
3011 * .. Local Scalars ..
3012  LOGICAL COLREP, ROWREP
3013  INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX,
3014  $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL,
3015  $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL,
3016  $ nprow, nqall
3017  REAL EPS, ERR, ERRMAX
3018 * ..
3019 * .. External Subroutines ..
3020  EXTERNAL BLACS_GRIDINFO, PSERRSET, SGAMX2D
3021 * ..
3022 * .. External Functions ..
3023  INTEGER PB_NUMROC
3024  REAL PSLAMCH
3025  EXTERNAL PSLAMCH, PB_NUMROC
3026 * ..
3027 * .. Intrinsic Functions ..
3028  INTRINSIC abs, max, min, mod
3029 * ..
3030 * .. Executable Statements ..
3031 *
3032  info = 0
3033  errmax = zero
3034 *
3035 * Quick return if possible
3036 *
3037  IF( ( descx( m_ ).LE.0 ).OR.( descx( n_ ).LE.0 ) )
3038  $ RETURN
3039 *
3040 * Start the operations
3041 *
3042  ictxt = descx( ctxt_ )
3043  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3044 *
3045  eps = pslamch( ictxt, 'eps' )
3046 *
3047  mpall = pb_numroc( descx( m_ ), 1, descx( imb_ ), descx( mb_ ),
3048  $ myrow, descx( rsrc_ ), nprow )
3049  nqall = pb_numroc( descx( n_ ), 1, descx( inb_ ), descx( nb_ ),
3050  $ mycol, descx( csrc_ ), npcol )
3051 *
3052  mbx = descx( mb_ )
3053  nbx = descx( nb_ )
3054  ldx = descx( m_ )
3055  ldpx = descx( lld_ )
3056  icurrow = descx( rsrc_ )
3057  icurcol = descx( csrc_ )
3058  rowrep = ( icurrow.EQ.-1 )
3059  colrep = ( icurcol.EQ.-1 )
3060  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3061  imbx = descx( imb_ )
3062  ELSE
3063  imbx = mbx
3064  END IF
3065  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3066  inbx = descx( inb_ )
3067  ELSE
3068  inbx = nbx
3069  END IF
3070  IF( rowrep ) THEN
3071  myrowdist = 0
3072  ELSE
3073  myrowdist = mod( myrow - icurrow + nprow, nprow )
3074  END IF
3075  IF( colrep ) THEN
3076  mycoldist = 0
3077  ELSE
3078  mycoldist = mod( mycol - icurcol + npcol, npcol )
3079  END IF
3080  ii = 1
3081  jj = 1
3082 *
3083  IF( incx.EQ.descx( m_ ) ) THEN
3084 *
3085 * sub( X ) is a row vector
3086 *
3087  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3088 *
3089  i = 1
3090  IF( mycoldist.EQ.0 ) THEN
3091  j = 1
3092  ELSE
3093  j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3094  END IF
3095  jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3096  ib = min( descx( m_ ), descx( imb_ ) )
3097 *
3098  DO 20 kk = 0, jb-1
3099  DO 10 ll = 0, ib-1
3100  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR. j+kk.GT.jx+n-1 )
3101  $ CALL pserrset( err, errmax,
3102  $ x( i+ll+(j+kk-1)*ldx ),
3103  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3104  10 CONTINUE
3105  20 CONTINUE
3106  IF( colrep ) THEN
3107  j = j + inbx
3108  ELSE
3109  j = j + inbx + ( npcol - 1 ) * nbx
3110  END IF
3111 *
3112  DO 50 jj = inbx+1, nqall, nbx
3113  jb = min( nqall-jj+1, nbx )
3114 *
3115  DO 40 kk = 0, jb-1
3116  DO 30 ll = 0, ib-1
3117  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3118  $ j+kk.GT.jx+n-1 )
3119  $ CALL pserrset( err, errmax,
3120  $ x( i+ll+(j+kk-1)*ldx ),
3121  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3122  30 CONTINUE
3123  40 CONTINUE
3124 *
3125  IF( colrep ) THEN
3126  j = j + nbx
3127  ELSE
3128  j = j + npcol * nbx
3129  END IF
3130 *
3131  50 CONTINUE
3132 *
3133  ii = ii + ib
3134 *
3135  END IF
3136 *
3137  icurrow = mod( icurrow + 1, nprow )
3138 *
3139  DO 110 i = descx( imb_ ) + 1, descx( m_ ), mbx
3140  ib = min( descx( m_ ) - i + 1, mbx )
3141 *
3142  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3143 *
3144  IF( mycoldist.EQ.0 ) THEN
3145  j = 1
3146  ELSE
3147  j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3148  END IF
3149 *
3150  jj = 1
3151  jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3152  DO 70 kk = 0, jb-1
3153  DO 60 ll = 0, ib-1
3154  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3155  $ j+kk.GT.jx+n-1 )
3156  $ CALL pserrset( err, errmax,
3157  $ x( i+ll+(j+kk-1)*ldx ),
3158  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3159  60 CONTINUE
3160  70 CONTINUE
3161  IF( colrep ) THEN
3162  j = j + inbx
3163  ELSE
3164  j = j + inbx + ( npcol - 1 ) * nbx
3165  END IF
3166 *
3167  DO 100 jj = inbx+1, nqall, nbx
3168  jb = min( nqall-jj+1, nbx )
3169 *
3170  DO 90 kk = 0, jb-1
3171  DO 80 ll = 0, ib-1
3172  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3173  $ j+kk.GT.jx+n-1 )
3174  $ CALL pserrset( err, errmax,
3175  $ x( i+ll+(j+kk-1)*ldx ),
3176  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3177  80 CONTINUE
3178  90 CONTINUE
3179 *
3180  IF( colrep ) THEN
3181  j = j + nbx
3182  ELSE
3183  j = j + npcol * nbx
3184  END IF
3185 *
3186  100 CONTINUE
3187 *
3188  ii = ii + ib
3189 *
3190  END IF
3191 *
3192  icurrow = mod( icurrow + 1, nprow )
3193 *
3194  110 CONTINUE
3195 *
3196  ELSE
3197 *
3198 * sub( X ) is a column vector
3199 *
3200  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3201 *
3202  j = 1
3203  IF( myrowdist.EQ.0 ) THEN
3204  i = 1
3205  ELSE
3206  i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3207  END IF
3208  ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3209  jb = min( descx( n_ ), descx( inb_ ) )
3210 *
3211  DO 130 kk = 0, jb-1
3212  DO 120 ll = 0, ib-1
3213  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR. i+ll.GT.ix+n-1 )
3214  $ CALL pserrset( err, errmax,
3215  $ x( i+ll+(j+kk-1)*ldx ),
3216  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3217  120 CONTINUE
3218  130 CONTINUE
3219  IF( rowrep ) THEN
3220  i = i + imbx
3221  ELSE
3222  i = i + imbx + ( nprow - 1 ) * mbx
3223  END IF
3224 *
3225  DO 160 ii = imbx+1, mpall, mbx
3226  ib = min( mpall-ii+1, mbx )
3227 *
3228  DO 150 kk = 0, jb-1
3229  DO 140 ll = 0, ib-1
3230  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3231  $ i+ll.GT.ix+n-1 )
3232  $ CALL pserrset( err, errmax,
3233  $ x( i+ll+(j+kk-1)*ldx ),
3234  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3235  140 CONTINUE
3236  150 CONTINUE
3237 *
3238  IF( rowrep ) THEN
3239  i = i + mbx
3240  ELSE
3241  i = i + nprow * mbx
3242  END IF
3243 *
3244  160 CONTINUE
3245 *
3246  jj = jj + jb
3247 *
3248  END IF
3249 *
3250  icurcol = mod( icurcol + 1, npcol )
3251 *
3252  DO 220 j = descx( inb_ ) + 1, descx( n_ ), nbx
3253  jb = min( descx( n_ ) - j + 1, nbx )
3254 *
3255  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3256 *
3257  IF( myrowdist.EQ.0 ) THEN
3258  i = 1
3259  ELSE
3260  i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3261  END IF
3262 *
3263  ii = 1
3264  ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3265  DO 180 kk = 0, jb-1
3266  DO 170 ll = 0, ib-1
3267  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3268  $ i+ll.GT.ix+n-1 )
3269  $ CALL pserrset( err, errmax,
3270  $ x( i+ll+(j+kk-1)*ldx ),
3271  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3272  170 CONTINUE
3273  180 CONTINUE
3274  IF( rowrep ) THEN
3275  i = i + imbx
3276  ELSE
3277  i = i + imbx + ( nprow - 1 ) * mbx
3278  END IF
3279 *
3280  DO 210 ii = imbx+1, mpall, mbx
3281  ib = min( mpall-ii+1, mbx )
3282 *
3283  DO 200 kk = 0, jb-1
3284  DO 190 ll = 0, ib-1
3285  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3286  $ i+ll.GT.ix+n-1 )
3287  $ CALL pserrset( err, errmax,
3288  $ x( i+ll+(j+kk-1)*ldx ),
3289  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3290  190 CONTINUE
3291  200 CONTINUE
3292 *
3293  IF( rowrep ) THEN
3294  i = i + mbx
3295  ELSE
3296  i = i + nprow * mbx
3297  END IF
3298 *
3299  210 CONTINUE
3300 *
3301  jj = jj + jb
3302 *
3303  END IF
3304 *
3305  icurcol = mod( icurcol + 1, npcol )
3306 *
3307  220 CONTINUE
3308 *
3309  END IF
3310 *
3311  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3312  $ -1, -1 )
3313 *
3314  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3315  info = 1
3316  ELSE IF( errmax.GT.eps ) THEN
3317  info = -1
3318  END IF
3319 *
3320  RETURN
3321 *
3322 * End of PSCHKVOUT
3323 *
3324  END
3325  SUBROUTINE pschkmin( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO )
3327 * -- PBLAS test routine (version 2.0) --
3328 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3329 * and University of California, Berkeley.
3330 * April 1, 1998
3331 *
3332 * .. Scalar Arguments ..
3333  INTEGER IA, INFO, JA, M, N
3334  REAL ERRMAX
3335 * ..
3336 * .. Array Arguments ..
3337  INTEGER DESCA( * )
3338  REAL PA( * ), A( * )
3339 * ..
3340 *
3341 * Purpose
3342 * =======
3343 *
3344 * PSCHKMIN checks that the submatrix sub( PA ) remained unchanged. The
3345 * local array entries are compared element by element, and their dif-
3346 * ference is tested against 0.0 as well as the epsilon machine. Notice
3347 * that this difference should be numerically exactly the zero machine,
3348 * but because of the possible fluctuation of some of the data we flag-
3349 * ged differently a difference less than twice the epsilon machine. The
3350 * largest error is also returned.
3351 *
3352 * Notes
3353 * =====
3354 *
3355 * A description vector is associated with each 2D block-cyclicly dis-
3356 * tributed matrix. This vector stores the information required to
3357 * establish the mapping between a matrix entry and its corresponding
3358 * process and memory location.
3359 *
3360 * In the following comments, the character _ should be read as
3361 * "of the distributed matrix". Let A be a generic term for any 2D
3362 * block cyclicly distributed matrix. Its description vector is DESCA:
3363 *
3364 * NOTATION STORED IN EXPLANATION
3365 * ---------------- --------------- ------------------------------------
3366 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3367 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3368 * the NPROW x NPCOL BLACS process grid
3369 * A is distributed over. The context
3370 * itself is global, but the handle
3371 * (the integer value) may vary.
3372 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
3373 * ted matrix A, M_A >= 0.
3374 * N_A (global) DESCA( N_ ) The number of columns in the distri-
3375 * buted matrix A, N_A >= 0.
3376 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3377 * block of the matrix A, IMB_A > 0.
3378 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
3379 * left block of the matrix A,
3380 * INB_A > 0.
3381 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3382 * bute the last M_A-IMB_A rows of A,
3383 * MB_A > 0.
3384 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3385 * bute the last N_A-INB_A columns of
3386 * A, NB_A > 0.
3387 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3388 * row of the matrix A is distributed,
3389 * NPROW > RSRC_A >= 0.
3390 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3391 * first column of A is distributed.
3392 * NPCOL > CSRC_A >= 0.
3393 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3394 * array storing the local blocks of
3395 * the distributed matrix A,
3396 * IF( Lc( 1, N_A ) > 0 )
3397 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
3398 * ELSE
3399 * LLD_A >= 1.
3400 *
3401 * Let K be the number of rows of a matrix A starting at the global in-
3402 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3403 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3404 * receive if these K rows were distributed over NPROW processes. If K
3405 * is the number of columns of a matrix A starting at the global index
3406 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3407 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3408 * these K columns were distributed over NPCOL processes.
3409 *
3410 * The values of Lr() and Lc() may be determined via a call to the func-
3411 * tion PB_NUMROC:
3412 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3413 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3414 *
3415 * Arguments
3416 * =========
3417 *
3418 * ERRMAX (global output) REAL
3419 * On exit, ERRMAX specifies the largest absolute element-wise
3420 * difference between sub( A ) and sub( PA ).
3421 *
3422 * M (global input) INTEGER
3423 * On entry, M specifies the number of rows of the submatrix
3424 * operand sub( A ). M must be at least zero.
3425 *
3426 * N (global input) INTEGER
3427 * On entry, N specifies the number of columns of the submatrix
3428 * operand sub( A ). N must be at least zero.
3429 *
3430 * A (local input) REAL array
3431 * On entry, A is an array of dimension (DESCA( M_ ),*). This
3432 * array contains a local copy of the initial entire matrix PA.
3433 *
3434 * PA (local input) REAL array
3435 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3436 * array contains the local entries of the matrix PA.
3437 *
3438 * IA (global input) INTEGER
3439 * On entry, IA specifies A's global row index, which points to
3440 * the beginning of the submatrix sub( A ).
3441 *
3442 * JA (global input) INTEGER
3443 * On entry, JA specifies A's global column index, which points
3444 * to the beginning of the submatrix sub( A ).
3445 *
3446 * DESCA (global and local input) INTEGER array
3447 * On entry, DESCA is an integer array of dimension DLEN_. This
3448 * is the array descriptor for the matrix A.
3449 *
3450 * INFO (global output) INTEGER
3451 * On exit, if INFO = 0, no error has been found,
3452 * If INFO > 0, the maximum abolute error found is in (0,eps],
3453 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
3454 *
3455 * -- Written on April 1, 1998 by
3456 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3457 *
3458 * =====================================================================
3459 *
3460 * .. Parameters ..
3461  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3462  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3463  $ RSRC_
3464  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3465  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3466  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3467  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3468  REAL ZERO
3469  PARAMETER ( ZERO = 0.0e+0 )
3470 * ..
3471 * .. Local Scalars ..
3472  LOGICAL COLREP, ROWREP
3473  INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
3474  $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
3475  $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW
3476  REAL ERR, EPS
3477 * ..
3478 * .. External Subroutines ..
3479  EXTERNAL blacs_gridinfo, pb_infog2l, pserrset, sgamx2d
3480 * ..
3481 * .. External Functions ..
3482  REAL PSLAMCH
3483  EXTERNAL pslamch
3484 * ..
3485 * .. Intrinsic Functions ..
3486  INTRINSIC abs, max, min, mod
3487 * ..
3488 * .. Executable Statements ..
3489 *
3490  info = 0
3491  errmax = zero
3492 *
3493 * Quick return if posssible
3494 *
3495  IF( ( m.EQ.0 ).OR.( n.EQ.0 ) )
3496  $ RETURN
3497 *
3498 * Start the operations
3499 *
3500  ictxt = desca( ctxt_ )
3501  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3502 *
3503  eps = pslamch( ictxt, 'eps' )
3504 *
3505  CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
3506  $ jja, iarow, iacol )
3507 *
3508  ii = iia
3509  jj = jja
3510  lda = desca( m_ )
3511  ldpa = desca( lld_ )
3512  icurrow = iarow
3513  icurcol = iacol
3514  rowrep = ( iarow.EQ.-1 )
3515  colrep = ( iacol.EQ.-1 )
3516 *
3517 * Handle the first block of column separately
3518 *
3519  jb = desca( inb_ ) - ja + 1
3520  IF( jb.LE.0 )
3521  $ jb = ( ( -jb ) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
3522  jb = min( jb, n )
3523  jn = ja + jb - 1
3524 *
3525  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3526 *
3527  DO 40 h = 0, jb-1
3528  ib = desca( imb_ ) - ia + 1
3529  IF( ib.LE.0 )
3530  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
3531  ib = min( ib, m )
3532  in = ia + ib - 1
3533  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3534  DO 10 k = 0, ib-1
3535  CALL pserrset( err, errmax, a( ia+k+(ja+h-1)*lda ),
3536  $ pa( ii+k+(jj+h-1)*ldpa ) )
3537  10 CONTINUE
3538  ii = ii + ib
3539  END IF
3540  icurrow = mod( icurrow+1, nprow )
3541 *
3542 * Loop over remaining block of rows
3543 *
3544  DO 30 i = in+1, ia+m-1, desca( mb_ )
3545  ib = min( desca( mb_ ), ia+m-i )
3546  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3547  DO 20 k = 0, ib-1
3548  CALL pserrset( err, errmax, a( i+k+(ja+h-1)*lda ),
3549  $ pa( ii+k+(jj+h-1)*ldpa ) )
3550  20 CONTINUE
3551  ii = ii + ib
3552  END IF
3553  icurrow = mod( icurrow+1, nprow )
3554  30 CONTINUE
3555 *
3556  ii = iia
3557  icurrow = iarow
3558  40 CONTINUE
3559 *
3560  jj = jj + jb
3561 *
3562  END IF
3563 *
3564  icurcol = mod( icurcol+1, npcol )
3565 *
3566 * Loop over remaining column blocks
3567 *
3568  DO 90 j = jn+1, ja+n-1, desca( nb_ )
3569  jb = min( desca( nb_ ), ja+n-j )
3570  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3571  DO 80 h = 0, jb-1
3572  ib = desca( imb_ ) - ia + 1
3573  IF( ib.LE.0 )
3574  $ ib = ( ( -ib ) / desca( mb_ ) + 1 )*desca( mb_ ) + ib
3575  ib = min( ib, m )
3576  in = ia + ib - 1
3577  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3578  DO 50 k = 0, ib-1
3579  CALL pserrset( err, errmax, a( ia+k+(j+h-1)*lda ),
3580  $ pa( ii+k+(jj+h-1)*ldpa ) )
3581  50 CONTINUE
3582  ii = ii + ib
3583  END IF
3584  icurrow = mod( icurrow+1, nprow )
3585 *
3586 * Loop over remaining block of rows
3587 *
3588  DO 70 i = in+1, ia+m-1, desca( mb_ )
3589  ib = min( desca( mb_ ), ia+m-i )
3590  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3591  DO 60 k = 0, ib-1
3592  CALL pserrset( err, errmax,
3593  $ a( i+k+(j+h-1)*lda ),
3594  $ pa( ii+k+(jj+h-1)*ldpa ) )
3595  60 CONTINUE
3596  ii = ii + ib
3597  END IF
3598  icurrow = mod( icurrow+1, nprow )
3599  70 CONTINUE
3600 *
3601  ii = iia
3602  icurrow = iarow
3603  80 CONTINUE
3604 *
3605  jj = jj + jb
3606  END IF
3607 *
3608  icurcol = mod( icurcol+1, npcol )
3609 *
3610  90 CONTINUE
3611 *
3612  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3613  $ -1, -1 )
3614 *
3615  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3616  info = 1
3617  ELSE IF( errmax.GT.eps ) THEN
3618  info = -1
3619  END IF
3620 *
3621  RETURN
3622 *
3623 * End of PSCHKMIN
3624 *
3625  END
3626  SUBROUTINE pschkmout( M, N, A, PA, IA, JA, DESCA, INFO )
3628 * -- PBLAS test routine (version 2.0) --
3629 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3630 * and University of California, Berkeley.
3631 * April 1, 1998
3632 *
3633 * .. Scalar Arguments ..
3634  INTEGER IA, INFO, JA, M, N
3635 * ..
3636 * .. Array Arguments ..
3637  INTEGER DESCA( * )
3638  REAL A( * ), PA( * )
3639 * ..
3640 *
3641 * Purpose
3642 * =======
3643 *
3644 * PSCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged.
3645 * The local array entries are compared element by element, and their
3646 * difference is tested against 0.0 as well as the epsilon machine. No-
3647 * tice that this difference should be numerically exactly the zero ma-
3648 * chine, but because of the possible movement of some of the data we
3649 * flagged differently a difference less than twice the epsilon machine.
3650 * The largest error is reported.
3651 *
3652 * Notes
3653 * =====
3654 *
3655 * A description vector is associated with each 2D block-cyclicly dis-
3656 * tributed matrix. This vector stores the information required to
3657 * establish the mapping between a matrix entry and its corresponding
3658 * process and memory location.
3659 *
3660 * In the following comments, the character _ should be read as
3661 * "of the distributed matrix". Let A be a generic term for any 2D
3662 * block cyclicly distributed matrix. Its description vector is DESCA:
3663 *
3664 * NOTATION STORED IN EXPLANATION
3665 * ---------------- --------------- ------------------------------------
3666 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3667 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3668 * the NPROW x NPCOL BLACS process grid
3669 * A is distributed over. The context
3670 * itself is global, but the handle
3671 * (the integer value) may vary.
3672 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
3673 * ted matrix A, M_A >= 0.
3674 * N_A (global) DESCA( N_ ) The number of columns in the distri-
3675 * buted matrix A, N_A >= 0.
3676 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3677 * block of the matrix A, IMB_A > 0.
3678 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
3679 * left block of the matrix A,
3680 * INB_A > 0.
3681 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3682 * bute the last M_A-IMB_A rows of A,
3683 * MB_A > 0.
3684 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3685 * bute the last N_A-INB_A columns of
3686 * A, NB_A > 0.
3687 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3688 * row of the matrix A is distributed,
3689 * NPROW > RSRC_A >= 0.
3690 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3691 * first column of A is distributed.
3692 * NPCOL > CSRC_A >= 0.
3693 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3694 * array storing the local blocks of
3695 * the distributed matrix A,
3696 * IF( Lc( 1, N_A ) > 0 )
3697 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
3698 * ELSE
3699 * LLD_A >= 1.
3700 *
3701 * Let K be the number of rows of a matrix A starting at the global in-
3702 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3703 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3704 * receive if these K rows were distributed over NPROW processes. If K
3705 * is the number of columns of a matrix A starting at the global index
3706 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3707 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3708 * these K columns were distributed over NPCOL processes.
3709 *
3710 * The values of Lr() and Lc() may be determined via a call to the func-
3711 * tion PB_NUMROC:
3712 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3713 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3714 *
3715 * Arguments
3716 * =========
3717 *
3718 * M (global input) INTEGER
3719 * On entry, M specifies the number of rows of the submatrix
3720 * sub( PA ). M must be at least zero.
3721 *
3722 * N (global input) INTEGER
3723 * On entry, N specifies the number of columns of the submatrix
3724 * sub( PA ). N must be at least zero.
3725 *
3726 * A (local input) REAL array
3727 * On entry, A is an array of dimension (DESCA( M_ ),*). This
3728 * array contains a local copy of the initial entire matrix PA.
3729 *
3730 * PA (local input) REAL array
3731 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3732 * array contains the local entries of the matrix PA.
3733 *
3734 * IA (global input) INTEGER
3735 * On entry, IA specifies A's global row index, which points to
3736 * the beginning of the submatrix sub( A ).
3737 *
3738 * JA (global input) INTEGER
3739 * On entry, JA specifies A's global column index, which points
3740 * to the beginning of the submatrix sub( A ).
3741 *
3742 * DESCA (global and local input) INTEGER array
3743 * On entry, DESCA is an integer array of dimension DLEN_. This
3744 * is the array descriptor for the matrix A.
3745 *
3746 * INFO (global output) INTEGER
3747 * On exit, if INFO = 0, no error has been found,
3748 * If INFO > 0, the maximum abolute error found is in (0,eps],
3749 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
3750 *
3751 * -- Written on April 1, 1998 by
3752 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3753 *
3754 * =====================================================================
3755 *
3756 * .. Parameters ..
3757  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3758  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3759  $ RSRC_
3760  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3761  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3762  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3763  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3764  REAL ZERO
3765  PARAMETER ( ZERO = 0.0e+0 )
3766 * ..
3767 * .. Local Scalars ..
3768  LOGICAL COLREP, ROWREP
3769  INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK,
3770  $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST,
3771  $ NPCOL, NPROW
3772  REAL EPS, ERR, ERRMAX
3773 * ..
3774 * .. External Subroutines ..
3775  EXTERNAL blacs_gridinfo, pserrset, sgamx2d
3776 * ..
3777 * .. External Functions ..
3778  INTEGER PB_NUMROC
3779  REAL PSLAMCH
3780  EXTERNAL PSLAMCH, PB_NUMROC
3781 * ..
3782 * .. Intrinsic Functions ..
3783  INTRINSIC max, min, mod
3784 * ..
3785 * .. Executable Statements ..
3786 *
3787  info = 0
3788  errmax = zero
3789 *
3790 * Quick return if possible
3791 *
3792  IF( ( desca( m_ ).LE.0 ).OR.( desca( n_ ).LE.0 ) )
3793  $ RETURN
3794 *
3795 * Start the operations
3796 *
3797  ictxt = desca( ctxt_ )
3798  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3799 *
3800  eps = pslamch( ictxt, 'eps' )
3801 *
3802  mpall = pb_numroc( desca( m_ ), 1, desca( imb_ ), desca( mb_ ),
3803  $ myrow, desca( rsrc_ ), nprow )
3804 *
3805  lda = desca( m_ )
3806  ldpa = desca( lld_ )
3807 *
3808  ii = 1
3809  jj = 1
3810  rowrep = ( desca( rsrc_ ).EQ.-1 )
3811  colrep = ( desca( csrc_ ).EQ.-1 )
3812  icurcol = desca( csrc_ )
3813  IF( myrow.EQ.desca( rsrc_ ) .OR. rowrep ) THEN
3814  imba = desca( imb_ )
3815  ELSE
3816  imba = desca( mb_ )
3817  END IF
3818  IF( rowrep ) THEN
3819  myrowdist = 0
3820  ELSE
3821  myrowdist = mod( myrow - desca( rsrc_ ) + nprow, nprow )
3822  END IF
3823 *
3824  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3825 *
3826  j = 1
3827  IF( myrowdist.EQ.0 ) THEN
3828  i = 1
3829  ELSE
3830  i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3831  END IF
3832  ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3833  jb = min( desca( n_ ), desca( inb_ ) )
3834 *
3835  DO 20 kk = 0, jb-1
3836  DO 10 ll = 0, ib-1
3837  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3838  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3839  $ CALL pserrset( err, errmax, a( i+ll+(j+kk-1)*lda ),
3840  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3841  10 CONTINUE
3842  20 CONTINUE
3843  IF( rowrep ) THEN
3844  i = i + imba
3845  ELSE
3846  i = i + imba + ( nprow - 1 ) * desca( mb_ )
3847  END IF
3848 *
3849  DO 50 ii = imba + 1, mpall, desca( mb_ )
3850  ib = min( mpall-ii+1, desca( mb_ ) )
3851 *
3852  DO 40 kk = 0, jb-1
3853  DO 30 ll = 0, ib-1
3854  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3855  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3856  $ CALL pserrset( err, errmax,
3857  $ a( i+ll+(j+kk-1)*lda ),
3858  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3859  30 CONTINUE
3860  40 CONTINUE
3861 *
3862  IF( rowrep ) THEN
3863  i = i + desca( mb_ )
3864  ELSE
3865  i = i + nprow * desca( mb_ )
3866  END IF
3867 *
3868  50 CONTINUE
3869 *
3870  jj = jj + jb
3871 *
3872  END IF
3873 *
3874  icurcol = mod( icurcol + 1, npcol )
3875 *
3876  DO 110 j = desca( inb_ ) + 1, desca( n_ ), desca( nb_ )
3877  jb = min( desca( n_ ) - j + 1, desca( nb_ ) )
3878 *
3879  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3880 *
3881  IF( myrowdist.EQ.0 ) THEN
3882  i = 1
3883  ELSE
3884  i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3885  END IF
3886 *
3887  ii = 1
3888  ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3889  DO 70 kk = 0, jb-1
3890  DO 60 ll = 0, ib-1
3891  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3892  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3893  $ CALL pserrset( err, errmax,
3894  $ a( i+ll+(j+kk-1)*lda ),
3895  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3896  60 CONTINUE
3897  70 CONTINUE
3898  IF( rowrep ) THEN
3899  i = i + imba
3900  ELSE
3901  i = i + imba + ( nprow - 1 ) * desca( mb_ )
3902  END IF
3903 *
3904  DO 100 ii = imba+1, mpall, desca( mb_ )
3905  ib = min( mpall-ii+1, desca( mb_ ) )
3906 *
3907  DO 90 kk = 0, jb-1
3908  DO 80 ll = 0, ib-1
3909  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3910  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3911  $ CALL pserrset( err, errmax,
3912  $ a( i+ll+(j+kk-1)*lda ),
3913  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3914  80 CONTINUE
3915  90 CONTINUE
3916 *
3917  IF( rowrep ) THEN
3918  i = i + desca( mb_ )
3919  ELSE
3920  i = i + nprow * desca( mb_ )
3921  END IF
3922 *
3923  100 CONTINUE
3924 *
3925  jj = jj + jb
3926 *
3927  END IF
3928 *
3929  icurcol = mod( icurcol + 1, npcol )
3930 * INSERT MODE
3931  110 CONTINUE
3932 *
3933  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3934  $ -1, -1 )
3935 *
3936  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3937  info = 1
3938  ELSE IF( errmax.GT.eps ) THEN
3939  info = -1
3940  END IF
3941 *
3942  RETURN
3943 *
3944 * End of PSCHKMOUT
3945 *
3946  END
3947  SUBROUTINE psmprnt( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT,
3948  $ CMATNM )
3950 * -- PBLAS test routine (version 2.0) --
3951 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3952 * and University of California, Berkeley.
3953 * April 1, 1998
3954 *
3955 * .. Scalar Arguments ..
3956  INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT
3957 * ..
3958 * .. Array Arguments ..
3959  CHARACTER*(*) CMATNM
3960  REAL A( LDA, * )
3961 * ..
3962 *
3963 * Purpose
3964 * =======
3965 *
3966 * PSMPRNT prints to the standard output an array A of size m by n. Only
3967 * the process of coordinates ( IRPRNT, ICPRNT ) is printing.
3968 *
3969 * Arguments
3970 * =========
3971 *
3972 * ICTXT (local input) INTEGER
3973 * On entry, ICTXT specifies the BLACS context handle, indica-
3974 * ting the global context of the operation. The context itself
3975 * is global, but the value of ICTXT is local.
3976 *
3977 * NOUT (global input) INTEGER
3978 * On entry, NOUT specifies the unit number for the output file.
3979 * When NOUT is 6, output to screen, when NOUT is 0, output to
3980 * stderr. NOUT is only defined for process 0.
3981 *
3982 * M (global input) INTEGER
3983 * On entry, M specifies the number of rows of the matrix A. M
3984 * must be at least zero.
3985 *
3986 * N (global input) INTEGER
3987 * On entry, N specifies the number of columns of the matrix A.
3988 * N must be at least zero.
3989 *
3990 * A (local input) REAL array
3991 * On entry, A is an array of dimension (LDA,N). The leading m
3992 * by n part of this array is printed.
3993 *
3994 * LDA (local input) INTEGER
3995 * On entry, LDA specifies the leading dimension of the local
3996 * array A to be printed. LDA must be at least MAX( 1, M ).
3997 *
3998 * IRPRNT (global input) INTEGER
3999 * On entry, IRPRNT specifies the process row coordinate of the
4000 * printing process.
4001 *
4002 * ICPRNT (global input) INTEGER
4003 * On entry, ICPRNT specifies the process column coordinate of
4004 * the printing process.
4005 *
4006 * CMATNM (global input) CHARACTER*(*)
4007 * On entry, CMATNM specifies the identifier of the matrix to be
4008 * printed.
4009 *
4010 * -- Written on April 1, 1998 by
4011 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4012 *
4013 * =====================================================================
4014 *
4015 * .. Local Scalars ..
4016  INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW
4017 * ..
4018 * .. External Subroutines ..
4019  EXTERNAL BLACS_GRIDINFO
4020 * ..
4021 * .. Executable Statements ..
4022 *
4023 * Quick return if possible
4024 *
4025  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
4026  $ RETURN
4027 *
4028 * Get grid parameters
4029 *
4030  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4031 *
4032  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4033 *
4034  WRITE( nout, fmt = * )
4035  DO 20 j = 1, n
4036 *
4037  DO 10 i = 1, m
4038 *
4039  WRITE( nout, fmt = 9999 ) cmatnm, i, j, a( i, j )
4040 *
4041  10 CONTINUE
4042 *
4043  20 CONTINUE
4044 *
4045  END IF
4046 *
4047  9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8 )
4048 *
4049  RETURN
4050 *
4051 * End of PSMPRNT
4052 *
4053  END
4054  SUBROUTINE psvprnt( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT,
4055  $ CVECNM )
4057 * -- PBLAS test routine (version 2.0) --
4058 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4059 * and University of California, Berkeley.
4060 * April 1, 1998
4061 *
4062 * .. Scalar Arguments ..
4063  INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT
4064 * ..
4065 * .. Array Arguments ..
4066  CHARACTER*(*) CVECNM
4067  REAL X( * )
4068 * ..
4069 *
4070 * Purpose
4071 * =======
4072 *
4073 * PSVPRNT prints to the standard output an vector x of length n. Only
4074 * the process of coordinates ( IRPRNT, ICPRNT ) is printing.
4075 *
4076 * Arguments
4077 * =========
4078 *
4079 * ICTXT (local input) INTEGER
4080 * On entry, ICTXT specifies the BLACS context handle, indica-
4081 * ting the global context of the operation. The context itself
4082 * is global, but the value of ICTXT is local.
4083 *
4084 * NOUT (global input) INTEGER
4085 * On entry, NOUT specifies the unit number for the output file.
4086 * When NOUT is 6, output to screen, when NOUT is 0, output to
4087 * stderr. NOUT is only defined for process 0.
4088 *
4089 * N (global input) INTEGER
4090 * On entry, N specifies the length of the vector X. N must be
4091 * at least zero.
4092 *
4093 * X (global input) REAL array
4094 * On entry, X is an array of dimension at least
4095 * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen-
4096 * ted array X must contain the vector x.
4097 *
4098 * INCX (global input) INTEGER.
4099 * On entry, INCX specifies the increment for the elements of X.
4100 * INCX must not be zero.
4101 *
4102 * IRPRNT (global input) INTEGER
4103 * On entry, IRPRNT specifies the process row coordinate of the
4104 * printing process.
4105 *
4106 * ICPRNT (global input) INTEGER
4107 * On entry, ICPRNT specifies the process column coordinate of
4108 * the printing process.
4109 *
4110 * CVECNM (global input) CHARACTER*(*)
4111 * On entry, CVECNM specifies the identifier of the vector to be
4112 * printed.
4113 *
4114 * -- Written on April 1, 1998 by
4115 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4116 *
4117 * =====================================================================
4118 *
4119 * .. Local Scalars ..
4120  INTEGER I, MYCOL, MYROW, NPCOL, NPROW
4121 * ..
4122 * .. External Subroutines ..
4123  EXTERNAL BLACS_GRIDINFO
4124 * ..
4125 * .. Executable Statements ..
4126 *
4127 * Quick return if possible
4128 *
4129  IF( n.LE.0 )
4130  $ RETURN
4131 *
4132 * Get grid parameters
4133 *
4134  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4135 *
4136  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4137 *
4138  WRITE( nout, fmt = * )
4139  DO 10 i = 1, 1 + ( n-1 )*incx, incx
4140 *
4141  WRITE( nout, fmt = 9999 ) cvecnm, i, x( i )
4142 *
4143  10 CONTINUE
4144 *
4145  END IF
4146 *
4147  9999 FORMAT( 1x, a, '(', i6, ')=', e16.8 )
4148 *
4149  RETURN
4150 *
4151 * End of PSVPRNT
4152 *
4153  END
4154  SUBROUTINE psmvch( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
4155  $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY,
4156  $ DESCY, INCY, G, ERR, INFO )
4158 * -- PBLAS test routine (version 2.0) --
4159 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4160 * and University of California, Berkeley.
4161 * April 1, 1998
4162 *
4163 * .. Scalar Arguments ..
4164  CHARACTER*1 TRANS
4165  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4166  $ JY, M, N
4167  REAL ALPHA, BETA, ERR
4168 * ..
4169 * .. Array Arguments ..
4170  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4171  REAL A( * ), G( * ), PY( * ), X( * ), Y( * )
4172 * ..
4173 *
4174 * Purpose
4175 * =======
4176 *
4177 * PSMVCH checks the results of the computational tests.
4178 *
4179 * Notes
4180 * =====
4181 *
4182 * A description vector is associated with each 2D block-cyclicly dis-
4183 * tributed matrix. This vector stores the information required to
4184 * establish the mapping between a matrix entry and its corresponding
4185 * process and memory location.
4186 *
4187 * In the following comments, the character _ should be read as
4188 * "of the distributed matrix". Let A be a generic term for any 2D
4189 * block cyclicly distributed matrix. Its description vector is DESCA:
4190 *
4191 * NOTATION STORED IN EXPLANATION
4192 * ---------------- --------------- ------------------------------------
4193 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4194 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4195 * the NPROW x NPCOL BLACS process grid
4196 * A is distributed over. The context
4197 * itself is global, but the handle
4198 * (the integer value) may vary.
4199 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4200 * ted matrix A, M_A >= 0.
4201 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4202 * buted matrix A, N_A >= 0.
4203 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4204 * block of the matrix A, IMB_A > 0.
4205 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4206 * left block of the matrix A,
4207 * INB_A > 0.
4208 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4209 * bute the last M_A-IMB_A rows of A,
4210 * MB_A > 0.
4211 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4212 * bute the last N_A-INB_A columns of
4213 * A, NB_A > 0.
4214 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4215 * row of the matrix A is distributed,
4216 * NPROW > RSRC_A >= 0.
4217 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4218 * first column of A is distributed.
4219 * NPCOL > CSRC_A >= 0.
4220 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4221 * array storing the local blocks of
4222 * the distributed matrix A,
4223 * IF( Lc( 1, N_A ) > 0 )
4224 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4225 * ELSE
4226 * LLD_A >= 1.
4227 *
4228 * Let K be the number of rows of a matrix A starting at the global in-
4229 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4230 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4231 * receive if these K rows were distributed over NPROW processes. If K
4232 * is the number of columns of a matrix A starting at the global index
4233 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4234 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4235 * these K columns were distributed over NPCOL processes.
4236 *
4237 * The values of Lr() and Lc() may be determined via a call to the func-
4238 * tion PB_NUMROC:
4239 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4240 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4241 *
4242 * Arguments
4243 * =========
4244 *
4245 * ICTXT (local input) INTEGER
4246 * On entry, ICTXT specifies the BLACS context handle, indica-
4247 * ting the global context of the operation. The context itself
4248 * is global, but the value of ICTXT is local.
4249 *
4250 * TRANS (global input) CHARACTER*1
4251 * On entry, TRANS specifies which matrix-vector product is to
4252 * be computed as follows:
4253 * If TRANS = 'N',
4254 * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ),
4255 * otherwise
4256 * sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ).
4257 *
4258 * M (global input) INTEGER
4259 * On entry, M specifies the number of rows of the submatrix
4260 * operand matrix A. M must be at least zero.
4261 *
4262 * N (global input) INTEGER
4263 * On entry, N specifies the number of columns of the subma-
4264 * trix operand matrix A. N must be at least zero.
4265 *
4266 * ALPHA (global input) REAL
4267 * On entry, ALPHA specifies the scalar alpha.
4268 *
4269 * A (local input) REAL array
4270 * On entry, A is an array of dimension (DESCA( M_ ),*). This
4271 * array contains a local copy of the initial entire matrix PA.
4272 *
4273 * IA (global input) INTEGER
4274 * On entry, IA specifies A's global row index, which points to
4275 * the beginning of the submatrix sub( A ).
4276 *
4277 * JA (global input) INTEGER
4278 * On entry, JA specifies A's global column index, which points
4279 * to the beginning of the submatrix sub( A ).
4280 *
4281 * DESCA (global and local input) INTEGER array
4282 * On entry, DESCA is an integer array of dimension DLEN_. This
4283 * is the array descriptor for the matrix A.
4284 *
4285 * X (local input) REAL array
4286 * On entry, X is an array of dimension (DESCX( M_ ),*). This
4287 * array contains a local copy of the initial entire matrix PX.
4288 *
4289 * IX (global input) INTEGER
4290 * On entry, IX specifies X's global row index, which points to
4291 * the beginning of the submatrix sub( X ).
4292 *
4293 * JX (global input) INTEGER
4294 * On entry, JX specifies X's global column index, which points
4295 * to the beginning of the submatrix sub( X ).
4296 *
4297 * DESCX (global and local input) INTEGER array
4298 * On entry, DESCX is an integer array of dimension DLEN_. This
4299 * is the array descriptor for the matrix X.
4300 *
4301 * INCX (global input) INTEGER
4302 * On entry, INCX specifies the global increment for the
4303 * elements of X. Only two values of INCX are supported in
4304 * this version, namely 1 and M_X. INCX must not be zero.
4305 *
4306 * BETA (global input) REAL
4307 * On entry, BETA specifies the scalar beta.
4308 *
4309 * Y (local input/local output) REAL array
4310 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
4311 * array contains a local copy of the initial entire matrix PY.
4312 *
4313 * PY (local input) REAL array
4314 * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This
4315 * array contains the local entries of the matrix PY.
4316 *
4317 * IY (global input) INTEGER
4318 * On entry, IY specifies Y's global row index, which points to
4319 * the beginning of the submatrix sub( Y ).
4320 *
4321 * JY (global input) INTEGER
4322 * On entry, JY specifies Y's global column index, which points
4323 * to the beginning of the submatrix sub( Y ).
4324 *
4325 * DESCY (global and local input) INTEGER array
4326 * On entry, DESCY is an integer array of dimension DLEN_. This
4327 * is the array descriptor for the matrix Y.
4328 *
4329 * INCY (global input) INTEGER
4330 * On entry, INCY specifies the global increment for the
4331 * elements of Y. Only two values of INCY are supported in
4332 * this version, namely 1 and M_Y. INCY must not be zero.
4333 *
4334 * G (workspace) REAL array
4335 * On entry, G is an array of dimension at least MAX( M, N ). G
4336 * is used to compute the gauges.
4337 *
4338 * ERR (global output) REAL
4339 * On exit, ERR specifies the largest error in absolute value.
4340 *
4341 * INFO (global output) INTEGER
4342 * On exit, if INFO <> 0, the result is less than half accurate.
4343 *
4344 * -- Written on April 1, 1998 by
4345 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4346 *
4347 * =====================================================================
4348 *
4349 * .. Parameters ..
4350  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4351  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4352  $ RSRC_
4353  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4354  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4355  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4356  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4357  REAL ZERO, ONE
4358  parameter( zero = 0.0e+0, one = 1.0e+0 )
4359 * ..
4360 * .. Local Scalars ..
4361  LOGICAL COLREP, ROWREP, TRAN
4362  INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX,
4363  $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA,
4364  $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL,
4365  $ nprow
4366  REAL EPS, ERRI, GTMP, TBETA, YTMP
4367 * ..
4368 * .. External Subroutines ..
4369  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4370 * ..
4371 * .. External Functions ..
4372  LOGICAL LSAME
4373  REAL PSLAMCH
4374  EXTERNAL lsame, pslamch
4375 * ..
4376 * .. Intrinsic Functions ..
4377  INTRINSIC abs, max, min, mod, sqrt
4378 * ..
4379 * .. Executable Statements ..
4380 *
4381  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4382 *
4383  eps = pslamch( ictxt, 'eps' )
4384 *
4385  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
4386  tbeta = one
4387  ELSE
4388  tbeta = beta
4389  END IF
4390 *
4391  tran = lsame( trans, 'T' ).OR.lsame( trans, 'C' )
4392  IF( tran ) THEN
4393  ml = n
4394  nl = m
4395  ELSE
4396  ml = m
4397  nl = n
4398  END IF
4399 *
4400  lda = max( 1, desca( m_ ) )
4401  ldx = max( 1, descx( m_ ) )
4402  ldy = max( 1, descy( m_ ) )
4403 *
4404 * Compute expected result in Y using data in A, X and Y.
4405 * Compute gauges in G. This part of the computation is performed
4406 * by every process in the grid.
4407 *
4408  ioffy = iy + ( jy - 1 ) * ldy
4409  DO 30 i = 1, ml
4410  ytmp = zero
4411  gtmp = zero
4412  ioffx = ix + ( jx - 1 ) * ldx
4413  IF( tran )THEN
4414  ioffa = ia + ( ja + i - 2 ) * lda
4415  DO 10 j = 1, nl
4416  ytmp = ytmp + a( ioffa ) * x( ioffx )
4417  gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4418  ioffa = ioffa + 1
4419  ioffx = ioffx + incx
4420  10 CONTINUE
4421  ELSE
4422  ioffa = ia + i - 1 + ( ja - 1 ) * lda
4423  DO 20 j = 1, nl
4424  ytmp = ytmp + a( ioffa ) * x( ioffx )
4425  gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4426  ioffa = ioffa + lda
4427  ioffx = ioffx + incx
4428  20 CONTINUE
4429  END IF
4430  g( i ) = abs( alpha ) * gtmp + abs( tbeta * y( ioffy ) )
4431  y( ioffy ) = alpha * ytmp + tbeta * y( ioffy )
4432  ioffy = ioffy + incy
4433  30 CONTINUE
4434 *
4435 * Compute the error ratio for this result.
4436 *
4437  err = zero
4438  info = 0
4439  ldpy = descy( lld_ )
4440  ioffy = iy + ( jy - 1 ) * ldy
4441  CALL pb_infog2l( iy, jy, descy, nprow, npcol, myrow, mycol, iiy,
4442  $ jjy, iyrow, iycol )
4443  icurrow = iyrow
4444  icurcol = iycol
4445  rowrep = ( iyrow.EQ.-1 )
4446  colrep = ( iycol.EQ.-1 )
4447 *
4448  IF( incy.EQ.descy( m_ ) ) THEN
4449 *
4450 * sub( Y ) is a row vector
4451 *
4452  jb = descy( inb_ ) - jy + 1
4453  IF( jb.LE.0 )
4454  $ jb = ( ( -jb ) / descy( nb_ ) + 1 ) * descy( nb_ ) + jb
4455  jb = min( jb, ml )
4456  jn = jy + jb - 1
4457 *
4458  DO 50 j = jy, jn
4459 *
4460  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4461  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4462  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4463  IF( g( j-jy+1 ).NE.zero )
4464  $ erri = erri / g( j-jy+1 )
4465  err = max( err, erri )
4466  IF( err*sqrt( eps ).GE.one )
4467  $ info = 1
4468  jjy = jjy + 1
4469  END IF
4470 *
4471  ioffy = ioffy + incy
4472 *
4473  50 CONTINUE
4474 *
4475  icurcol = mod( icurcol+1, npcol )
4476 *
4477  DO 70 j = jn+1, jy+ml-1, descy( nb_ )
4478  jb = min( jy+ml-j, descy( nb_ ) )
4479 *
4480  DO 60 kk = 0, jb-1
4481 *
4482  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4483  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4484  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4485  IF( g( j+kk-jy+1 ).NE.zero )
4486  $ erri = erri / g( j+kk-jy+1 )
4487  err = max( err, erri )
4488  IF( err*sqrt( eps ).GE.one )
4489  $ info = 1
4490  jjy = jjy + 1
4491  END IF
4492 *
4493  ioffy = ioffy + incy
4494 *
4495  60 CONTINUE
4496 *
4497  icurcol = mod( icurcol+1, npcol )
4498 *
4499  70 CONTINUE
4500 *
4501  ELSE
4502 *
4503 * sub( Y ) is a column vector
4504 *
4505  ib = descy( imb_ ) - iy + 1
4506  IF( ib.LE.0 )
4507  $ ib = ( ( -ib ) / descy( mb_ ) + 1 ) * descy( mb_ ) + ib
4508  ib = min( ib, ml )
4509  in = iy + ib - 1
4510 *
4511  DO 80 i = iy, in
4512 *
4513  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4514  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4515  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4516  IF( g( i-iy+1 ).NE.zero )
4517  $ erri = erri / g( i-iy+1 )
4518  err = max( err, erri )
4519  IF( err*sqrt( eps ).GE.one )
4520  $ info = 1
4521  iiy = iiy + 1
4522  END IF
4523 *
4524  ioffy = ioffy + incy
4525 *
4526  80 CONTINUE
4527 *
4528  icurrow = mod( icurrow+1, nprow )
4529 *
4530  DO 100 i = in+1, iy+ml-1, descy( mb_ )
4531  ib = min( iy+ml-i, descy( mb_ ) )
4532 *
4533  DO 90 kk = 0, ib-1
4534 *
4535  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4536  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4537  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4538  IF( g( i+kk-iy+1 ).NE.zero )
4539  $ erri = erri / g( i+kk-iy+1 )
4540  err = max( err, erri )
4541  IF( err*sqrt( eps ).GE.one )
4542  $ info = 1
4543  iiy = iiy + 1
4544  END IF
4545 *
4546  ioffy = ioffy + incy
4547 *
4548  90 CONTINUE
4549 *
4550  icurrow = mod( icurrow+1, nprow )
4551 *
4552  100 CONTINUE
4553 *
4554  END IF
4555 *
4556 * If INFO = 0, all results are at least half accurate.
4557 *
4558  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4559  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4560  $ mycol )
4561 *
4562  RETURN
4563 *
4564 * End of PSMVCH
4565 *
4566  END
4567  SUBROUTINE psvmch( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4568  $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA,
4569  $ DESCA, G, ERR, INFO )
4571 * -- PBLAS test routine (version 2.0) --
4572 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4573 * and University of California, Berkeley.
4574 * April 1, 1998
4575 *
4576 * .. Scalar Arguments ..
4577  CHARACTER*1 UPLO
4578  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4579  $ JY, M, N
4580  REAL ALPHA, ERR
4581 * ..
4582 * .. Array Arguments ..
4583  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4584  REAL A( * ), G( * ), PA( * ), X( * ), Y( * )
4585 * ..
4586 *
4587 * Purpose
4588 * =======
4589 *
4590 * PSVMCH checks the results of the computational tests.
4591 *
4592 * Notes
4593 * =====
4594 *
4595 * A description vector is associated with each 2D block-cyclicly dis-
4596 * tributed matrix. This vector stores the information required to
4597 * establish the mapping between a matrix entry and its corresponding
4598 * process and memory location.
4599 *
4600 * In the following comments, the character _ should be read as
4601 * "of the distributed matrix". Let A be a generic term for any 2D
4602 * block cyclicly distributed matrix. Its description vector is DESCA:
4603 *
4604 * NOTATION STORED IN EXPLANATION
4605 * ---------------- --------------- ------------------------------------
4606 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4607 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4608 * the NPROW x NPCOL BLACS process grid
4609 * A is distributed over. The context
4610 * itself is global, but the handle
4611 * (the integer value) may vary.
4612 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4613 * ted matrix A, M_A >= 0.
4614 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4615 * buted matrix A, N_A >= 0.
4616 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4617 * block of the matrix A, IMB_A > 0.
4618 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4619 * left block of the matrix A,
4620 * INB_A > 0.
4621 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4622 * bute the last M_A-IMB_A rows of A,
4623 * MB_A > 0.
4624 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4625 * bute the last N_A-INB_A columns of
4626 * A, NB_A > 0.
4627 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4628 * row of the matrix A is distributed,
4629 * NPROW > RSRC_A >= 0.
4630 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4631 * first column of A is distributed.
4632 * NPCOL > CSRC_A >= 0.
4633 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4634 * array storing the local blocks of
4635 * the distributed matrix A,
4636 * IF( Lc( 1, N_A ) > 0 )
4637 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4638 * ELSE
4639 * LLD_A >= 1.
4640 *
4641 * Let K be the number of rows of a matrix A starting at the global in-
4642 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4643 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4644 * receive if these K rows were distributed over NPROW processes. If K
4645 * is the number of columns of a matrix A starting at the global index
4646 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4647 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4648 * these K columns were distributed over NPCOL processes.
4649 *
4650 * The values of Lr() and Lc() may be determined via a call to the func-
4651 * tion PB_NUMROC:
4652 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4653 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4654 *
4655 * Arguments
4656 * =========
4657 *
4658 * ICTXT (local input) INTEGER
4659 * On entry, ICTXT specifies the BLACS context handle, indica-
4660 * ting the global context of the operation. The context itself
4661 * is global, but the value of ICTXT is local.
4662 *
4663 * UPLO (global input) CHARACTER*1
4664 * On entry, UPLO specifies which part of the submatrix sub( A )
4665 * is to be referenced as follows:
4666 * If UPLO = 'L', only the lower triangular part,
4667 * If UPLO = 'U', only the upper triangular part,
4668 * else the entire matrix is to be referenced.
4669 *
4670 * M (global input) INTEGER
4671 * On entry, M specifies the number of rows of the submatrix
4672 * operand matrix A. M must be at least zero.
4673 *
4674 * N (global input) INTEGER
4675 * On entry, N specifies the number of columns of the subma-
4676 * trix operand matrix A. N must be at least zero.
4677 *
4678 * ALPHA (global input) REAL
4679 * On entry, ALPHA specifies the scalar alpha.
4680 *
4681 * X (local input) REAL array
4682 * On entry, X is an array of dimension (DESCX( M_ ),*). This
4683 * array contains a local copy of the initial entire matrix PX.
4684 *
4685 * IX (global input) INTEGER
4686 * On entry, IX specifies X's global row index, which points to
4687 * the beginning of the submatrix sub( X ).
4688 *
4689 * JX (global input) INTEGER
4690 * On entry, JX specifies X's global column index, which points
4691 * to the beginning of the submatrix sub( X ).
4692 *
4693 * DESCX (global and local input) INTEGER array
4694 * On entry, DESCX is an integer array of dimension DLEN_. This
4695 * is the array descriptor for the matrix X.
4696 *
4697 * INCX (global input) INTEGER
4698 * On entry, INCX specifies the global increment for the
4699 * elements of X. Only two values of INCX are supported in
4700 * this version, namely 1 and M_X. INCX must not be zero.
4701 *
4702 * Y (local input) REAL array
4703 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
4704 * array contains a local copy of the initial entire matrix PY.
4705 *
4706 * IY (global input) INTEGER
4707 * On entry, IY specifies Y's global row index, which points to
4708 * the beginning of the submatrix sub( Y ).
4709 *
4710 * JY (global input) INTEGER
4711 * On entry, JY specifies Y's global column index, which points
4712 * to the beginning of the submatrix sub( Y ).
4713 *
4714 * DESCY (global and local input) INTEGER array
4715 * On entry, DESCY is an integer array of dimension DLEN_. This
4716 * is the array descriptor for the matrix Y.
4717 *
4718 * INCY (global input) INTEGER
4719 * On entry, INCY specifies the global increment for the
4720 * elements of Y. Only two values of INCY are supported in
4721 * this version, namely 1 and M_Y. INCY must not be zero.
4722 *
4723 * A (local input/local output) REAL array
4724 * On entry, A is an array of dimension (DESCA( M_ ),*). This
4725 * array contains a local copy of the initial entire matrix PA.
4726 *
4727 * PA (local input) REAL array
4728 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
4729 * array contains the local entries of the matrix PA.
4730 *
4731 * IA (global input) INTEGER
4732 * On entry, IA specifies A's global row index, which points to
4733 * the beginning of the submatrix sub( A ).
4734 *
4735 * JA (global input) INTEGER
4736 * On entry, JA specifies A's global column index, which points
4737 * to the beginning of the submatrix sub( A ).
4738 *
4739 * DESCA (global and local input) INTEGER array
4740 * On entry, DESCA is an integer array of dimension DLEN_. This
4741 * is the array descriptor for the matrix A.
4742 *
4743 * G (workspace) REAL array
4744 * On entry, G is an array of dimension at least MAX( M, N ). G
4745 * is used to compute the gauges.
4746 *
4747 * ERR (global output) REAL
4748 * On exit, ERR specifies the largest error in absolute value.
4749 *
4750 * INFO (global output) INTEGER
4751 * On exit, if INFO <> 0, the result is less than half accurate.
4752 *
4753 * -- Written on April 1, 1998 by
4754 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4755 *
4756 * =====================================================================
4757 *
4758 * .. Parameters ..
4759  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4760  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4761  $ RSRC_
4762  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4763  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4764  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4765  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4766  REAL ZERO, ONE
4767  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
4768 * ..
4769 * .. Local Scalars ..
4770  LOGICAL COLREP, LOWER, ROWREP, UPPER
4771  INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
4772  $ in, ioffa, ioffx, ioffy, j, jja, kk, lda, ldpa,
4773  $ ldx, ldy, mycol, myrow, npcol, nprow
4774  REAL ATMP, EPS, ERRI, GTMP
4775 * ..
4776 * .. External Subroutines ..
4777  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4778 * ..
4779 * .. External Functions ..
4780  LOGICAL LSAME
4781  REAL PSLAMCH
4782  EXTERNAL LSAME, PSLAMCH
4783 * ..
4784 * .. Intrinsic Functions ..
4785  INTRINSIC abs, max, min, mod, sqrt
4786 * ..
4787 * .. Executable Statements ..
4788 *
4789  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4790 *
4791  eps = pslamch( ictxt, 'eps' )
4792 *
4793  upper = lsame( uplo, 'U' )
4794  lower = lsame( uplo, 'L' )
4795 *
4796  lda = max( 1, desca( m_ ) )
4797  ldx = max( 1, descx( m_ ) )
4798  ldy = max( 1, descy( m_ ) )
4799 *
4800 * Compute expected result in A using data in A, X and Y.
4801 * Compute gauges in G. This part of the computation is performed
4802 * by every process in the grid.
4803 *
4804  DO 70 j = 1, n
4805 *
4806  ioffy = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
4807 *
4808  IF( lower ) THEN
4809  ibeg = j
4810  iend = m
4811  DO 10 i = 1, j-1
4812  g( i ) = zero
4813  10 CONTINUE
4814  ELSE IF( upper ) THEN
4815  ibeg = 1
4816  iend = j
4817  DO 20 i = j+1, m
4818  g( i ) = zero
4819  20 CONTINUE
4820  ELSE
4821  ibeg = 1
4822  iend = m
4823  END IF
4824 *
4825  DO 30 i = ibeg, iend
4826 *
4827  ioffx = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
4828  ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
4829  atmp = x( ioffx ) * y( ioffy )
4830  gtmp = abs( x( ioffx ) * y( ioffy ) )
4831  g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
4832  a( ioffa ) = alpha * atmp + a( ioffa )
4833 *
4834  30 CONTINUE
4835 *
4836 * Compute the error ratio for this result.
4837 *
4838  info = 0
4839  err = zero
4840  ldpa = desca( lld_ )
4841  ioffa = ia + ( ja + j - 2 ) * lda
4842  CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
4843  $ iia, jja, iarow, iacol )
4844  rowrep = ( iarow.EQ.-1 )
4845  colrep = ( iacol.EQ.-1 )
4846 *
4847  IF( mycol.EQ.iacol .OR. colrep ) THEN
4848 *
4849  icurrow = iarow
4850  ib = desca( imb_ ) - ia + 1
4851  IF( ib.LE.0 )
4852  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
4853  ib = min( ib, m )
4854  in = ia + ib - 1
4855 *
4856  DO 40 i = ia, in
4857 *
4858  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4859  erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
4860  IF( g( i-ia+1 ).NE.zero )
4861  $ erri = erri / g( i-ia+1 )
4862  err = max( err, erri )
4863  IF( err*sqrt( eps ).GE.one )
4864  $ info = 1
4865  iia = iia + 1
4866  END IF
4867 *
4868  ioffa = ioffa + 1
4869 *
4870  40 CONTINUE
4871 *
4872  icurrow = mod( icurrow+1, nprow )
4873 *
4874  DO 60 i = in+1, ia+m-1, desca( mb_ )
4875  ib = min( ia+m-i, desca( mb_ ) )
4876 *
4877  DO 50 kk = 0, ib-1
4878 *
4879  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4880  erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
4881  IF( g( i+kk-ia+1 ).NE.zero )
4882  $ erri = erri / g( i+kk-ia+1 )
4883  err = max( err, erri )
4884  IF( err*sqrt( eps ).GE.one )
4885  $ info = 1
4886  iia = iia + 1
4887  END IF
4888 *
4889  ioffa = ioffa + 1
4890 *
4891  50 CONTINUE
4892 *
4893  icurrow = mod( icurrow+1, nprow )
4894 *
4895  60 CONTINUE
4896 *
4897  END IF
4898 *
4899 * If INFO = 0, all results are at least half accurate.
4900 *
4901  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4902  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4903  $ mycol )
4904  IF( info.NE.0 )
4905  $ GO TO 80
4906 *
4907  70 CONTINUE
4908 *
4909  80 CONTINUE
4910 *
4911  RETURN
4912 *
4913 * End of PSVMCH
4914 *
4915  END
4916  SUBROUTINE psvmch2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4917  $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA,
4918  $ JA, DESCA, G, ERR, INFO )
4920 * -- PBLAS test routine (version 2.0) --
4921 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4922 * and University of California, Berkeley.
4923 * April 1, 1998
4924 *
4925 * .. Scalar Arguments ..
4926  CHARACTER*1 UPLO
4927  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4928  $ jy, m, n
4929  REAL ALPHA, ERR
4930 * ..
4931 * .. Array Arguments ..
4932  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4933  REAL A( * ), G( * ), PA( * ), X( * ), Y( * )
4934 * ..
4935 *
4936 * Purpose
4937 * =======
4938 *
4939 * PSVMCH2 checks the results of the computational tests.
4940 *
4941 * Notes
4942 * =====
4943 *
4944 * A description vector is associated with each 2D block-cyclicly dis-
4945 * tributed matrix. This vector stores the information required to
4946 * establish the mapping between a matrix entry and its corresponding
4947 * process and memory location.
4948 *
4949 * In the following comments, the character _ should be read as
4950 * "of the distributed matrix". Let A be a generic term for any 2D
4951 * block cyclicly distributed matrix. Its description vector is DESCA:
4952 *
4953 * NOTATION STORED IN EXPLANATION
4954 * ---------------- --------------- ------------------------------------
4955 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4956 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4957 * the NPROW x NPCOL BLACS process grid
4958 * A is distributed over. The context
4959 * itself is global, but the handle
4960 * (the integer value) may vary.
4961 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4962 * ted matrix A, M_A >= 0.
4963 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4964 * buted matrix A, N_A >= 0.
4965 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4966 * block of the matrix A, IMB_A > 0.
4967 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4968 * left block of the matrix A,
4969 * INB_A > 0.
4970 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4971 * bute the last M_A-IMB_A rows of A,
4972 * MB_A > 0.
4973 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4974 * bute the last N_A-INB_A columns of
4975 * A, NB_A > 0.
4976 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4977 * row of the matrix A is distributed,
4978 * NPROW > RSRC_A >= 0.
4979 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4980 * first column of A is distributed.
4981 * NPCOL > CSRC_A >= 0.
4982 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4983 * array storing the local blocks of
4984 * the distributed matrix A,
4985 * IF( Lc( 1, N_A ) > 0 )
4986 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4987 * ELSE
4988 * LLD_A >= 1.
4989 *
4990 * Let K be the number of rows of a matrix A starting at the global in-
4991 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4992 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4993 * receive if these K rows were distributed over NPROW processes. If K
4994 * is the number of columns of a matrix A starting at the global index
4995 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4996 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4997 * these K columns were distributed over NPCOL processes.
4998 *
4999 * The values of Lr() and Lc() may be determined via a call to the func-
5000 * tion PB_NUMROC:
5001 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5002 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5003 *
5004 * Arguments
5005 * =========
5006 *
5007 * ICTXT (local input) INTEGER
5008 * On entry, ICTXT specifies the BLACS context handle, indica-
5009 * ting the global context of the operation. The context itself
5010 * is global, but the value of ICTXT is local.
5011 *
5012 * UPLO (global input) CHARACTER*1
5013 * On entry, UPLO specifies which part of the submatrix sub( A )
5014 * is to be referenced as follows:
5015 * If UPLO = 'L', only the lower triangular part,
5016 * If UPLO = 'U', only the upper triangular part,
5017 * else the entire matrix is to be referenced.
5018 *
5019 * M (global input) INTEGER
5020 * On entry, M specifies the number of rows of the submatrix
5021 * operand matrix A. M must be at least zero.
5022 *
5023 * N (global input) INTEGER
5024 * On entry, N specifies the number of columns of the subma-
5025 * trix operand matrix A. N must be at least zero.
5026 *
5027 * ALPHA (global input) REAL
5028 * On entry, ALPHA specifies the scalar alpha.
5029 *
5030 * X (local input) REAL array
5031 * On entry, X is an array of dimension (DESCX( M_ ),*). This
5032 * array contains a local copy of the initial entire matrix PX.
5033 *
5034 * IX (global input) INTEGER
5035 * On entry, IX specifies X's global row index, which points to
5036 * the beginning of the submatrix sub( X ).
5037 *
5038 * JX (global input) INTEGER
5039 * On entry, JX specifies X's global column index, which points
5040 * to the beginning of the submatrix sub( X ).
5041 *
5042 * DESCX (global and local input) INTEGER array
5043 * On entry, DESCX is an integer array of dimension DLEN_. This
5044 * is the array descriptor for the matrix X.
5045 *
5046 * INCX (global input) INTEGER
5047 * On entry, INCX specifies the global increment for the
5048 * elements of X. Only two values of INCX are supported in
5049 * this version, namely 1 and M_X. INCX must not be zero.
5050 *
5051 * Y (local input) REAL array
5052 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
5053 * array contains a local copy of the initial entire matrix PY.
5054 *
5055 * IY (global input) INTEGER
5056 * On entry, IY specifies Y's global row index, which points to
5057 * the beginning of the submatrix sub( Y ).
5058 *
5059 * JY (global input) INTEGER
5060 * On entry, JY specifies Y's global column index, which points
5061 * to the beginning of the submatrix sub( Y ).
5062 *
5063 * DESCY (global and local input) INTEGER array
5064 * On entry, DESCY is an integer array of dimension DLEN_. This
5065 * is the array descriptor for the matrix Y.
5066 *
5067 * INCY (global input) INTEGER
5068 * On entry, INCY specifies the global increment for the
5069 * elements of Y. Only two values of INCY are supported in
5070 * this version, namely 1 and M_Y. INCY must not be zero.
5071 *
5072 * A (local input/local output) REAL array
5073 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5074 * array contains a local copy of the initial entire matrix PA.
5075 *
5076 * PA (local input) REAL array
5077 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
5078 * array contains the local entries of the matrix PA.
5079 *
5080 * IA (global input) INTEGER
5081 * On entry, IA specifies A's global row index, which points to
5082 * the beginning of the submatrix sub( A ).
5083 *
5084 * JA (global input) INTEGER
5085 * On entry, JA specifies A's global column index, which points
5086 * to the beginning of the submatrix sub( A ).
5087 *
5088 * DESCA (global and local input) INTEGER array
5089 * On entry, DESCA is an integer array of dimension DLEN_. This
5090 * is the array descriptor for the matrix A.
5091 *
5092 * G (workspace) REAL array
5093 * On entry, G is an array of dimension at least MAX( M, N ). G
5094 * is used to compute the gauges.
5095 *
5096 * ERR (global output) REAL
5097 * On exit, ERR specifies the largest error in absolute value.
5098 *
5099 * INFO (global output) INTEGER
5100 * On exit, if INFO <> 0, the result is less than half accurate.
5101 *
5102 * -- Written on April 1, 1998 by
5103 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5104 *
5105 * =====================================================================
5106 *
5107 * .. Parameters ..
5108  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5109  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5110  $ RSRC_
5111  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5112  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5113  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5114  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5115  REAL ZERO, ONE
5116  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5117 * ..
5118 * .. Local Scalars ..
5119  LOGICAL COLREP, LOWER, ROWREP, UPPER
5120  INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
5121  $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J,
5122  $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW,
5123  $ npcol, nprow
5124  REAL EPS, ERRI, GTMP, ATMP
5125 * ..
5126 * .. External Subroutines ..
5127  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5128 * ..
5129 * .. External Functions ..
5130  LOGICAL LSAME
5131  REAL PSLAMCH
5132  EXTERNAL lsame, pslamch
5133 * ..
5134 * .. Intrinsic Functions ..
5135  INTRINSIC abs, max, min, mod, sqrt
5136 * ..
5137 * .. Executable Statements ..
5138 *
5139  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5140 *
5141  eps = pslamch( ictxt, 'eps' )
5142 *
5143  upper = lsame( uplo, 'U' )
5144  lower = lsame( uplo, 'L' )
5145 *
5146  lda = max( 1, desca( m_ ) )
5147  ldx = max( 1, descx( m_ ) )
5148  ldy = max( 1, descy( m_ ) )
5149 *
5150 * Compute expected result in A using data in A, X and Y.
5151 * Compute gauges in G. This part of the computation is performed
5152 * by every process in the grid.
5153 *
5154  DO 70 j = 1, n
5155 *
5156  ioffxj = ix + ( jx - 1 ) * ldx + ( j - 1 ) * incx
5157  ioffyj = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
5158 *
5159  IF( lower ) THEN
5160  ibeg = j
5161  iend = m
5162  DO 10 i = 1, j-1
5163  g( i ) = zero
5164  10 CONTINUE
5165  ELSE IF( upper ) THEN
5166  ibeg = 1
5167  iend = j
5168  DO 20 i = j+1, m
5169  g( i ) = zero
5170  20 CONTINUE
5171  ELSE
5172  ibeg = 1
5173  iend = m
5174  END IF
5175 *
5176  DO 30 i = ibeg, iend
5177  ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
5178  ioffxi = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
5179  ioffyi = iy + ( jy - 1 ) * ldy + ( i - 1 ) * incy
5180  atmp = x( ioffxi ) * y( ioffyj )
5181  atmp = atmp + y( ioffyi ) * x( ioffxj )
5182  gtmp = abs( x( ioffxi ) * y( ioffyj ) )
5183  gtmp = gtmp + abs( y( ioffyi ) * x( ioffxj ) )
5184  g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
5185  a( ioffa ) = alpha*atmp + a( ioffa )
5186 *
5187  30 CONTINUE
5188 *
5189 * Compute the error ratio for this result.
5190 *
5191  info = 0
5192  err = zero
5193  ldpa = desca( lld_ )
5194  ioffa = ia + ( ja + j - 2 ) * lda
5195  CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
5196  $ iia, jja, iarow, iacol )
5197  rowrep = ( iarow.EQ.-1 )
5198  colrep = ( iacol.EQ.-1 )
5199 *
5200  IF( mycol.EQ.iacol .OR. colrep ) THEN
5201 *
5202  icurrow = iarow
5203  ib = desca( imb_ ) - ia + 1
5204  IF( ib.LE.0 )
5205  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
5206  ib = min( ib, m )
5207  in = ia + ib - 1
5208 *
5209  DO 40 i = ia, in
5210 *
5211  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5212  erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
5213  IF( g( i-ia+1 ).NE.zero )
5214  $ erri = erri / g( i-ia+1 )
5215  err = max( err, erri )
5216  IF( err*sqrt( eps ).GE.one )
5217  $ info = 1
5218  iia = iia + 1
5219  END IF
5220 *
5221  ioffa = ioffa + 1
5222 *
5223  40 CONTINUE
5224 *
5225  icurrow = mod( icurrow+1, nprow )
5226 *
5227  DO 60 i = in+1, ia+m-1, desca( mb_ )
5228  ib = min( ia+m-i, desca( mb_ ) )
5229 *
5230  DO 50 kk = 0, ib-1
5231 *
5232  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5233  erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
5234  IF( g( i+kk-ia+1 ).NE.zero )
5235  $ erri = erri / g( i+kk-ia+1 )
5236  err = max( err, erri )
5237  IF( err*sqrt( eps ).GE.one )
5238  $ info = 1
5239  iia = iia + 1
5240  END IF
5241 *
5242  ioffa = ioffa + 1
5243 *
5244  50 CONTINUE
5245 *
5246  icurrow = mod( icurrow+1, nprow )
5247 *
5248  60 CONTINUE
5249 *
5250  END IF
5251 *
5252 * If INFO = 0, all results are at least half accurate.
5253 *
5254  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5255  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5256  $ mycol )
5257  IF( info.NE.0 )
5258  $ GO TO 80
5259 *
5260  70 CONTINUE
5261 *
5262  80 CONTINUE
5263 *
5264  RETURN
5265 *
5266 * End of PSVMCH2
5267 *
5268  END
5269  SUBROUTINE psmmch( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA,
5270  $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5271  $ JC, DESCC, CT, G, ERR, INFO )
5273 * -- PBLAS test routine (version 2.0) --
5274 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5275 * and University of California, Berkeley.
5276 * April 1, 1998
5277 *
5278 * .. Scalar Arguments ..
5279  CHARACTER*1 TRANSA, TRANSB
5280  INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5281  REAL ALPHA, BETA, ERR
5282 * ..
5283 * .. Array Arguments ..
5284  INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5285  REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
5286  $ PC( * )
5287 * ..
5288 *
5289 * Purpose
5290 * =======
5291 *
5292 * PSMMCH checks the results of the computational tests.
5293 *
5294 * Notes
5295 * =====
5296 *
5297 * A description vector is associated with each 2D block-cyclicly dis-
5298 * tributed matrix. This vector stores the information required to
5299 * establish the mapping between a matrix entry and its corresponding
5300 * process and memory location.
5301 *
5302 * In the following comments, the character _ should be read as
5303 * "of the distributed matrix". Let A be a generic term for any 2D
5304 * block cyclicly distributed matrix. Its description vector is DESCA:
5305 *
5306 * NOTATION STORED IN EXPLANATION
5307 * ---------------- --------------- ------------------------------------
5308 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5309 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5310 * the NPROW x NPCOL BLACS process grid
5311 * A is distributed over. The context
5312 * itself is global, but the handle
5313 * (the integer value) may vary.
5314 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
5315 * ted matrix A, M_A >= 0.
5316 * N_A (global) DESCA( N_ ) The number of columns in the distri-
5317 * buted matrix A, N_A >= 0.
5318 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5319 * block of the matrix A, IMB_A > 0.
5320 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
5321 * left block of the matrix A,
5322 * INB_A > 0.
5323 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5324 * bute the last M_A-IMB_A rows of A,
5325 * MB_A > 0.
5326 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5327 * bute the last N_A-INB_A columns of
5328 * A, NB_A > 0.
5329 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5330 * row of the matrix A is distributed,
5331 * NPROW > RSRC_A >= 0.
5332 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5333 * first column of A is distributed.
5334 * NPCOL > CSRC_A >= 0.
5335 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5336 * array storing the local blocks of
5337 * the distributed matrix A,
5338 * IF( Lc( 1, N_A ) > 0 )
5339 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
5340 * ELSE
5341 * LLD_A >= 1.
5342 *
5343 * Let K be the number of rows of a matrix A starting at the global in-
5344 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5345 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5346 * receive if these K rows were distributed over NPROW processes. If K
5347 * is the number of columns of a matrix A starting at the global index
5348 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5349 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5350 * these K columns were distributed over NPCOL processes.
5351 *
5352 * The values of Lr() and Lc() may be determined via a call to the func-
5353 * tion PB_NUMROC:
5354 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5355 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5356 *
5357 * Arguments
5358 * =========
5359 *
5360 * ICTXT (local input) INTEGER
5361 * On entry, ICTXT specifies the BLACS context handle, indica-
5362 * ting the global context of the operation. The context itself
5363 * is global, but the value of ICTXT is local.
5364 *
5365 * TRANSA (global input) CHARACTER*1
5366 * On entry, TRANSA specifies if the matrix operand A is to be
5367 * transposed.
5368 *
5369 * TRANSB (global input) CHARACTER*1
5370 * On entry, TRANSB specifies if the matrix operand B is to be
5371 * transposed.
5372 *
5373 * M (global input) INTEGER
5374 * On entry, M specifies the number of rows of C.
5375 *
5376 * N (global input) INTEGER
5377 * On entry, N specifies the number of columns of C.
5378 *
5379 * K (global input) INTEGER
5380 * On entry, K specifies the number of columns (resp. rows) of A
5381 * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5382 * PxSYR2K, PxHERK and PxHER2K.
5383 *
5384 * ALPHA (global input) REAL
5385 * On entry, ALPHA specifies the scalar alpha.
5386 *
5387 * A (local input) REAL array
5388 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5389 * array contains a local copy of the initial entire matrix PA.
5390 *
5391 * IA (global input) INTEGER
5392 * On entry, IA specifies A's global row index, which points to
5393 * the beginning of the submatrix sub( A ).
5394 *
5395 * JA (global input) INTEGER
5396 * On entry, JA specifies A's global column index, which points
5397 * to the beginning of the submatrix sub( A ).
5398 *
5399 * DESCA (global and local input) INTEGER array
5400 * On entry, DESCA is an integer array of dimension DLEN_. This
5401 * is the array descriptor for the matrix A.
5402 *
5403 * B (local input) REAL array
5404 * On entry, B is an array of dimension (DESCB( M_ ),*). This
5405 * array contains a local copy of the initial entire matrix PB.
5406 *
5407 * IB (global input) INTEGER
5408 * On entry, IB specifies B's global row index, which points to
5409 * the beginning of the submatrix sub( B ).
5410 *
5411 * JB (global input) INTEGER
5412 * On entry, JB specifies B's global column index, which points
5413 * to the beginning of the submatrix sub( B ).
5414 *
5415 * DESCB (global and local input) INTEGER array
5416 * On entry, DESCB is an integer array of dimension DLEN_. This
5417 * is the array descriptor for the matrix B.
5418 *
5419 * BETA (global input) REAL
5420 * On entry, BETA specifies the scalar beta.
5421 *
5422 * C (local input/local output) REAL array
5423 * On entry, C is an array of dimension (DESCC( M_ ),*). This
5424 * array contains a local copy of the initial entire matrix PC.
5425 *
5426 * PC (local input) REAL array
5427 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5428 * array contains the local pieces of the matrix PC.
5429 *
5430 * IC (global input) INTEGER
5431 * On entry, IC specifies C's global row index, which points to
5432 * the beginning of the submatrix sub( C ).
5433 *
5434 * JC (global input) INTEGER
5435 * On entry, JC specifies C's global column index, which points
5436 * to the beginning of the submatrix sub( C ).
5437 *
5438 * DESCC (global and local input) INTEGER array
5439 * On entry, DESCC is an integer array of dimension DLEN_. This
5440 * is the array descriptor for the matrix C.
5441 *
5442 * CT (workspace) REAL array
5443 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
5444 * holds a copy of the current column of C.
5445 *
5446 * G (workspace) REAL array
5447 * On entry, G is an array of dimension at least MAX(M,N,K). G
5448 * is used to compute the gauges.
5449 *
5450 * ERR (global output) REAL
5451 * On exit, ERR specifies the largest error in absolute value.
5452 *
5453 * INFO (global output) INTEGER
5454 * On exit, if INFO <> 0, the result is less than half accurate.
5455 *
5456 * -- Written on April 1, 1998 by
5457 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5458 *
5459 * =====================================================================
5460 *
5461 * .. Parameters ..
5462  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5463  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5464  $ RSRC_
5465  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5466  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5467  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5468  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5469  REAL ZERO, ONE
5470  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5471 * ..
5472 * .. Local Scalars ..
5473  LOGICAL COLREP, ROWREP, TRANA, TRANB
5474  INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5475  $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5476  $ mycol, myrow, npcol, nprow
5477  REAL EPS, ERRI
5478 * ..
5479 * .. External Subroutines ..
5480  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5481 * ..
5482 * .. External Functions ..
5483  LOGICAL LSAME
5484  REAL PSLAMCH
5485  EXTERNAL LSAME, PSLAMCH
5486 * ..
5487 * .. Intrinsic Functions ..
5488  INTRINSIC abs, max, min, mod, sqrt
5489 * ..
5490 * .. Executable Statements ..
5491 *
5492  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5493 *
5494  eps = pslamch( ictxt, 'eps' )
5495 *
5496  trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5497  tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5498 *
5499  lda = max( 1, desca( m_ ) )
5500  ldb = max( 1, descb( m_ ) )
5501  ldc = max( 1, descc( m_ ) )
5502 *
5503 * Compute expected result in C using data in A, B and C.
5504 * Compute gauges in G. This part of the computation is performed
5505 * by every process in the grid.
5506 *
5507  DO 240 j = 1, n
5508 *
5509  ioffc = ic + ( jc + j - 2 ) * ldc
5510  DO 10 i = 1, m
5511  ct( i ) = zero
5512  g( i ) = zero
5513  10 CONTINUE
5514 *
5515  IF( .NOT.trana .AND. .NOT.tranb ) THEN
5516  DO 30 kk = 1, k
5517  ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5518  DO 20 i = 1, m
5519  ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5520  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5521  g( i ) = g( i ) + abs( a( ioffa ) ) *
5522  $ abs( b( ioffb ) )
5523  20 CONTINUE
5524  30 CONTINUE
5525  ELSE IF( trana .AND. .NOT.tranb ) THEN
5526  DO 50 kk = 1, k
5527  ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5528  DO 40 i = 1, m
5529  ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5530  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5531  g( i ) = g( i ) + abs( a( ioffa ) ) *
5532  $ abs( b( ioffb ) )
5533  40 CONTINUE
5534  50 CONTINUE
5535  ELSE IF( .NOT.trana .AND. tranb ) THEN
5536  DO 70 kk = 1, k
5537  ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5538  DO 60 i = 1, m
5539  ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5540  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5541  g( i ) = g( i ) + abs( a( ioffa ) ) *
5542  $ abs( b( ioffb ) )
5543  60 CONTINUE
5544  70 CONTINUE
5545  ELSE IF( trana .AND. tranb ) THEN
5546  DO 90 kk = 1, k
5547  ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5548  DO 80 i = 1, m
5549  ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5550  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5551  g( i ) = g( i ) + abs( a( ioffa ) ) *
5552  $ abs( b( ioffb ) )
5553  80 CONTINUE
5554  90 CONTINUE
5555  END IF
5556 *
5557  DO 200 i = 1, m
5558  ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5559  g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5560  c( ioffc ) = ct( i )
5561  ioffc = ioffc + 1
5562  200 CONTINUE
5563 *
5564 * Compute the error ratio for this result.
5565 *
5566  err = zero
5567  info = 0
5568  ldpc = descc( lld_ )
5569  ioffc = ic + ( jc + j - 2 ) * ldc
5570  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5571  $ iic, jjc, icrow, iccol )
5572  icurrow = icrow
5573  rowrep = ( icrow.EQ.-1 )
5574  colrep = ( iccol.EQ.-1 )
5575 *
5576  IF( mycol.EQ.iccol .OR. colrep ) THEN
5577 *
5578  ibb = descc( imb_ ) - ic + 1
5579  IF( ibb.LE.0 )
5580  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5581  ibb = min( ibb, m )
5582  in = ic + ibb - 1
5583 *
5584  DO 210 i = ic, in
5585 *
5586  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5587  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5588  $ c( ioffc ) ) / eps
5589  IF( g( i-ic+1 ).NE.zero )
5590  $ erri = erri / g( i-ic+1 )
5591  err = max( err, erri )
5592  IF( err*sqrt( eps ).GE.one )
5593  $ info = 1
5594  iic = iic + 1
5595  END IF
5596 *
5597  ioffc = ioffc + 1
5598 *
5599  210 CONTINUE
5600 *
5601  icurrow = mod( icurrow+1, nprow )
5602 *
5603  DO 230 i = in+1, ic+m-1, descc( mb_ )
5604  ibb = min( ic+m-i, descc( mb_ ) )
5605 *
5606  DO 220 kk = 0, ibb-1
5607 *
5608  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5609  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5610  $ c( ioffc ) )/eps
5611  IF( g( i+kk-ic+1 ).NE.zero )
5612  $ erri = erri / g( i+kk-ic+1 )
5613  err = max( err, erri )
5614  IF( err*sqrt( eps ).GE.one )
5615  $ info = 1
5616  iic = iic + 1
5617  END IF
5618 *
5619  ioffc = ioffc + 1
5620 *
5621  220 CONTINUE
5622 *
5623  icurrow = mod( icurrow+1, nprow )
5624 *
5625  230 CONTINUE
5626 *
5627  END IF
5628 *
5629 * If INFO = 0, all results are at least half accurate.
5630 *
5631  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5632  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5633  $ mycol )
5634  IF( info.NE.0 )
5635  $ GO TO 250
5636 *
5637  240 CONTINUE
5638 *
5639  250 CONTINUE
5640 *
5641  RETURN
5642 *
5643 * End of PSMMCH
5644 *
5645  END
5646  SUBROUTINE psmmch1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5647  $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G,
5648  $ ERR, INFO )
5650 * -- PBLAS test routine (version 2.0) --
5651 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5652 * and University of California, Berkeley.
5653 * April 1, 1998
5654 *
5655 * .. Scalar Arguments ..
5656  CHARACTER*1 TRANS, UPLO
5657  INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5658  REAL ALPHA, BETA, ERR
5659 * ..
5660 * .. Array Arguments ..
5661  INTEGER DESCA( * ), DESCC( * )
5662  REAL A( * ), C( * ), CT( * ), G( * ), PC( * )
5663 * ..
5664 *
5665 * Purpose
5666 * =======
5667 *
5668 * PSMMCH1 checks the results of the computational tests.
5669 *
5670 * Notes
5671 * =====
5672 *
5673 * A description vector is associated with each 2D block-cyclicly dis-
5674 * tributed matrix. This vector stores the information required to
5675 * establish the mapping between a matrix entry and its corresponding
5676 * process and memory location.
5677 *
5678 * In the following comments, the character _ should be read as
5679 * "of the distributed matrix". Let A be a generic term for any 2D
5680 * block cyclicly distributed matrix. Its description vector is DESCA:
5681 *
5682 * NOTATION STORED IN EXPLANATION
5683 * ---------------- --------------- ------------------------------------
5684 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5685 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5686 * the NPROW x NPCOL BLACS process grid
5687 * A is distributed over. The context
5688 * itself is global, but the handle
5689 * (the integer value) may vary.
5690 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
5691 * ted matrix A, M_A >= 0.
5692 * N_A (global) DESCA( N_ ) The number of columns in the distri-
5693 * buted matrix A, N_A >= 0.
5694 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5695 * block of the matrix A, IMB_A > 0.
5696 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
5697 * left block of the matrix A,
5698 * INB_A > 0.
5699 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5700 * bute the last M_A-IMB_A rows of A,
5701 * MB_A > 0.
5702 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5703 * bute the last N_A-INB_A columns of
5704 * A, NB_A > 0.
5705 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5706 * row of the matrix A is distributed,
5707 * NPROW > RSRC_A >= 0.
5708 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5709 * first column of A is distributed.
5710 * NPCOL > CSRC_A >= 0.
5711 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5712 * array storing the local blocks of
5713 * the distributed matrix A,
5714 * IF( Lc( 1, N_A ) > 0 )
5715 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
5716 * ELSE
5717 * LLD_A >= 1.
5718 *
5719 * Let K be the number of rows of a matrix A starting at the global in-
5720 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5721 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5722 * receive if these K rows were distributed over NPROW processes. If K
5723 * is the number of columns of a matrix A starting at the global index
5724 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5725 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5726 * these K columns were distributed over NPCOL processes.
5727 *
5728 * The values of Lr() and Lc() may be determined via a call to the func-
5729 * tion PB_NUMROC:
5730 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5731 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5732 *
5733 * Arguments
5734 * =========
5735 *
5736 * ICTXT (local input) INTEGER
5737 * On entry, ICTXT specifies the BLACS context handle, indica-
5738 * ting the global context of the operation. The context itself
5739 * is global, but the value of ICTXT is local.
5740 *
5741 * UPLO (global input) CHARACTER*1
5742 * On entry, UPLO specifies which part of C should contain the
5743 * result.
5744 *
5745 * TRANS (global input) CHARACTER*1
5746 * On entry, TRANS specifies whether the matrix A has to be
5747 * transposed or not before computing the matrix-matrix product.
5748 *
5749 * N (global input) INTEGER
5750 * On entry, N specifies the order the submatrix operand C. N
5751 * must be at least zero.
5752 *
5753 * K (global input) INTEGER
5754 * On entry, K specifies the number of columns (resp. rows) of A
5755 * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5756 * zero.
5757 *
5758 * ALPHA (global input) REAL
5759 * On entry, ALPHA specifies the scalar alpha.
5760 *
5761 * A (local input) REAL array
5762 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5763 * array contains a local copy of the initial entire matrix PA.
5764 *
5765 * IA (global input) INTEGER
5766 * On entry, IA specifies A's global row index, which points to
5767 * the beginning of the submatrix sub( A ).
5768 *
5769 * JA (global input) INTEGER
5770 * On entry, JA specifies A's global column index, which points
5771 * to the beginning of the submatrix sub( A ).
5772 *
5773 * DESCA (global and local input) INTEGER array
5774 * On entry, DESCA is an integer array of dimension DLEN_. This
5775 * is the array descriptor for the matrix A.
5776 *
5777 * BETA (global input) REAL
5778 * On entry, BETA specifies the scalar beta.
5779 *
5780 * C (local input/local output) REAL array
5781 * On entry, C is an array of dimension (DESCC( M_ ),*). This
5782 * array contains a local copy of the initial entire matrix PC.
5783 *
5784 * PC (local input) REAL array
5785 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5786 * array contains the local pieces of the matrix PC.
5787 *
5788 * IC (global input) INTEGER
5789 * On entry, IC specifies C's global row index, which points to
5790 * the beginning of the submatrix sub( C ).
5791 *
5792 * JC (global input) INTEGER
5793 * On entry, JC specifies C's global column index, which points
5794 * to the beginning of the submatrix sub( C ).
5795 *
5796 * DESCC (global and local input) INTEGER array
5797 * On entry, DESCC is an integer array of dimension DLEN_. This
5798 * is the array descriptor for the matrix C.
5799 *
5800 * CT (workspace) REAL array
5801 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
5802 * holds a copy of the current column of C.
5803 *
5804 * G (workspace) REAL array
5805 * On entry, G is an array of dimension at least MAX(M,N,K). G
5806 * is used to compute the gauges.
5807 *
5808 * ERR (global output) REAL
5809 * On exit, ERR specifies the largest error in absolute value.
5810 *
5811 * INFO (global output) INTEGER
5812 * On exit, if INFO <> 0, the result is less than half accurate.
5813 *
5814 * -- Written on April 1, 1998 by
5815 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5816 *
5817 * =====================================================================
5818 *
5819 * .. Parameters ..
5820  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5821  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5822  $ RSRC_
5823  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5824  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5825  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5826  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5827  REAL ZERO, ONE
5828  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5829 * ..
5830 * .. Local Scalars ..
5831  LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
5832  INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5833  $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5834  $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5835  REAL EPS, ERRI
5836 * ..
5837 * .. External Subroutines ..
5838  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5839 * ..
5840 * .. External Functions ..
5841  LOGICAL LSAME
5842  REAL PSLAMCH
5843  EXTERNAL lsame, pslamch
5844 * ..
5845 * .. Intrinsic Functions ..
5846  INTRINSIC abs, max, min, mod, sqrt
5847 * ..
5848 * .. Executable Statements ..
5849 *
5850  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5851 *
5852  eps = pslamch( ictxt, 'eps' )
5853 *
5854  upper = lsame( uplo, 'U' )
5855  notran = lsame( trans, 'N' )
5856  tran = lsame( trans, 'T' )
5857 *
5858  lda = max( 1, desca( m_ ) )
5859  ldc = max( 1, descc( m_ ) )
5860 *
5861 * Compute expected result in C using data in A, B and C.
5862 * Compute gauges in G. This part of the computation is performed
5863 * by every process in the grid.
5864 *
5865  DO 140 j = 1, n
5866 *
5867  IF( upper ) THEN
5868  ibeg = 1
5869  iend = j
5870  ELSE
5871  ibeg = j
5872  iend = n
5873  END IF
5874 *
5875  DO 10 i = 1, n
5876  ct( i ) = zero
5877  g( i ) = zero
5878  10 CONTINUE
5879 *
5880  IF( notran ) THEN
5881  DO 30 kk = 1, k
5882  ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
5883  DO 20 i = ibeg, iend
5884  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
5885  ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5886  g( i ) = g( i ) + abs( a( ioffak ) ) *
5887  $ abs( a( ioffan ) )
5888  20 CONTINUE
5889  30 CONTINUE
5890  ELSE IF( tran ) THEN
5891  DO 50 kk = 1, k
5892  ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
5893  DO 40 i = ibeg, iend
5894  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
5895  ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5896  g( i ) = g( i ) + abs( a( ioffak ) ) *
5897  $ abs( a( ioffan ) )
5898  40 CONTINUE
5899  50 CONTINUE
5900  END IF
5901 *
5902  ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
5903 *
5904  DO 100 i = ibeg, iend
5905  ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5906  g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5907  c( ioffc ) = ct( i )
5908  ioffc = ioffc + 1
5909  100 CONTINUE
5910 *
5911 * Compute the error ratio for this result.
5912 *
5913  err = zero
5914  info = 0
5915  ldpc = descc( lld_ )
5916  ioffc = ic + ( jc + j - 2 ) * ldc
5917  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5918  $ iic, jjc, icrow, iccol )
5919  icurrow = icrow
5920  rowrep = ( icrow.EQ.-1 )
5921  colrep = ( iccol.EQ.-1 )
5922 *
5923  IF( mycol.EQ.iccol .OR. colrep ) THEN
5924 *
5925  ibb = descc( imb_ ) - ic + 1
5926  IF( ibb.LE.0 )
5927  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5928  ibb = min( ibb, n )
5929  in = ic + ibb - 1
5930 *
5931  DO 110 i = ic, in
5932 *
5933  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5934  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5935  $ c( ioffc ) ) / eps
5936  IF( g( i-ic+1 ).NE.zero )
5937  $ erri = erri / g( i-ic+1 )
5938  err = max( err, erri )
5939  IF( err*sqrt( eps ).GE.one )
5940  $ info = 1
5941  iic = iic + 1
5942  END IF
5943 *
5944  ioffc = ioffc + 1
5945 *
5946  110 CONTINUE
5947 *
5948  icurrow = mod( icurrow+1, nprow )
5949 *
5950  DO 130 i = in+1, ic+n-1, descc( mb_ )
5951  ibb = min( ic+n-i, descc( mb_ ) )
5952 *
5953  DO 120 kk = 0, ibb-1
5954 *
5955  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5956  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5957  $ c( ioffc ) )/eps
5958  IF( g( i+kk-ic+1 ).NE.zero )
5959  $ erri = erri / g( i+kk-ic+1 )
5960  err = max( err, erri )
5961  IF( err*sqrt( eps ).GE.one )
5962  $ info = 1
5963  iic = iic + 1
5964  END IF
5965 *
5966  ioffc = ioffc + 1
5967 *
5968  120 CONTINUE
5969 *
5970  icurrow = mod( icurrow+1, nprow )
5971 *
5972  130 CONTINUE
5973 *
5974  END IF
5975 *
5976 * If INFO = 0, all results are at least half accurate.
5977 *
5978  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5979  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5980  $ mycol )
5981  IF( info.NE.0 )
5982  $ GO TO 150
5983 *
5984  140 CONTINUE
5985 *
5986  150 CONTINUE
5987 *
5988  RETURN
5989 *
5990 * End of PSMMCH1
5991 *
5992  END
5993  SUBROUTINE psmmch2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5994  $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5995  $ JC, DESCC, CT, G, ERR, INFO )
5997 * -- PBLAS test routine (version 2.0) --
5998 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5999 * and University of California, Berkeley.
6000 * April 1, 1998
6001 *
6002 * .. Scalar Arguments ..
6003  CHARACTER*1 TRANS, UPLO
6004  INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6005  REAL ALPHA, BETA, ERR
6006 * ..
6007 * .. Array Arguments ..
6008  INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6009  REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
6010  $ pc( * )
6011 * ..
6012 *
6013 * Purpose
6014 * =======
6015 *
6016 * PSMMCH2 checks the results of the computational tests.
6017 *
6018 * Notes
6019 * =====
6020 *
6021 * A description vector is associated with each 2D block-cyclicly dis-
6022 * tributed matrix. This vector stores the information required to
6023 * establish the mapping between a matrix entry and its corresponding
6024 * process and memory location.
6025 *
6026 * In the following comments, the character _ should be read as
6027 * "of the distributed matrix". Let A be a generic term for any 2D
6028 * block cyclicly distributed matrix. Its description vector is DESCA:
6029 *
6030 * NOTATION STORED IN EXPLANATION
6031 * ---------------- --------------- ------------------------------------
6032 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6033 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6034 * the NPROW x NPCOL BLACS process grid
6035 * A is distributed over. The context
6036 * itself is global, but the handle
6037 * (the integer value) may vary.
6038 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6039 * ted matrix A, M_A >= 0.
6040 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6041 * buted matrix A, N_A >= 0.
6042 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6043 * block of the matrix A, IMB_A > 0.
6044 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6045 * left block of the matrix A,
6046 * INB_A > 0.
6047 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6048 * bute the last M_A-IMB_A rows of A,
6049 * MB_A > 0.
6050 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6051 * bute the last N_A-INB_A columns of
6052 * A, NB_A > 0.
6053 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6054 * row of the matrix A is distributed,
6055 * NPROW > RSRC_A >= 0.
6056 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6057 * first column of A is distributed.
6058 * NPCOL > CSRC_A >= 0.
6059 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6060 * array storing the local blocks of
6061 * the distributed matrix A,
6062 * IF( Lc( 1, N_A ) > 0 )
6063 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6064 * ELSE
6065 * LLD_A >= 1.
6066 *
6067 * Let K be the number of rows of a matrix A starting at the global in-
6068 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6069 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6070 * receive if these K rows were distributed over NPROW processes. If K
6071 * is the number of columns of a matrix A starting at the global index
6072 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6073 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6074 * these K columns were distributed over NPCOL processes.
6075 *
6076 * The values of Lr() and Lc() may be determined via a call to the func-
6077 * tion PB_NUMROC:
6078 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6079 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6080 *
6081 * Arguments
6082 * =========
6083 *
6084 * ICTXT (local input) INTEGER
6085 * On entry, ICTXT specifies the BLACS context handle, indica-
6086 * ting the global context of the operation. The context itself
6087 * is global, but the value of ICTXT is local.
6088 *
6089 * UPLO (global input) CHARACTER*1
6090 * On entry, UPLO specifies which part of C should contain the
6091 * result.
6092 *
6093 * TRANS (global input) CHARACTER*1
6094 * On entry, TRANS specifies whether the matrices A and B have
6095 * to be transposed or not before computing the matrix-matrix
6096 * product.
6097 *
6098 * N (global input) INTEGER
6099 * On entry, N specifies the order the submatrix operand C. N
6100 * must be at least zero.
6101 *
6102 * K (global input) INTEGER
6103 * On entry, K specifies the number of columns (resp. rows) of A
6104 * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6105 * least zero.
6106 *
6107 * ALPHA (global input) REAL
6108 * On entry, ALPHA specifies the scalar alpha.
6109 *
6110 * A (local input) REAL array
6111 * On entry, A is an array of dimension (DESCA( M_ ),*). This
6112 * array contains a local copy of the initial entire matrix PA.
6113 *
6114 * IA (global input) INTEGER
6115 * On entry, IA specifies A's global row index, which points to
6116 * the beginning of the submatrix sub( A ).
6117 *
6118 * JA (global input) INTEGER
6119 * On entry, JA specifies A's global column index, which points
6120 * to the beginning of the submatrix sub( A ).
6121 *
6122 * DESCA (global and local input) INTEGER array
6123 * On entry, DESCA is an integer array of dimension DLEN_. This
6124 * is the array descriptor for the matrix A.
6125 *
6126 * B (local input) REAL array
6127 * On entry, B is an array of dimension (DESCB( M_ ),*). This
6128 * array contains a local copy of the initial entire matrix PB.
6129 *
6130 * IB (global input) INTEGER
6131 * On entry, IB specifies B's global row index, which points to
6132 * the beginning of the submatrix sub( B ).
6133 *
6134 * JB (global input) INTEGER
6135 * On entry, JB specifies B's global column index, which points
6136 * to the beginning of the submatrix sub( B ).
6137 *
6138 * DESCB (global and local input) INTEGER array
6139 * On entry, DESCB is an integer array of dimension DLEN_. This
6140 * is the array descriptor for the matrix B.
6141 *
6142 * BETA (global input) REAL
6143 * On entry, BETA specifies the scalar beta.
6144 *
6145 * C (local input/local output) REAL array
6146 * On entry, C is an array of dimension (DESCC( M_ ),*). This
6147 * array contains a local copy of the initial entire matrix PC.
6148 *
6149 * PC (local input) REAL array
6150 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6151 * array contains the local pieces of the matrix PC.
6152 *
6153 * IC (global input) INTEGER
6154 * On entry, IC specifies C's global row index, which points to
6155 * the beginning of the submatrix sub( C ).
6156 *
6157 * JC (global input) INTEGER
6158 * On entry, JC specifies C's global column index, which points
6159 * to the beginning of the submatrix sub( C ).
6160 *
6161 * DESCC (global and local input) INTEGER array
6162 * On entry, DESCC is an integer array of dimension DLEN_. This
6163 * is the array descriptor for the matrix C.
6164 *
6165 * CT (workspace) REAL array
6166 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
6167 * holds a copy of the current column of C.
6168 *
6169 * G (workspace) REAL array
6170 * On entry, G is an array of dimension at least MAX(M,N,K). G
6171 * is used to compute the gauges.
6172 *
6173 * ERR (global output) REAL
6174 * On exit, ERR specifies the largest error in absolute value.
6175 *
6176 * INFO (global output) INTEGER
6177 * On exit, if INFO <> 0, the result is less than half accurate.
6178 *
6179 * -- Written on April 1, 1998 by
6180 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6181 *
6182 * =====================================================================
6183 *
6184 * .. Parameters ..
6185  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6186  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6187  $ RSRC_
6188  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6189  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6190  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6191  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6192  REAL ZERO, ONE
6193  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
6194 * ..
6195 * .. Local Scalars ..
6196  LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
6197  INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6198  $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6199  $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6200  $ NPCOL, NPROW
6201  REAL EPS, ERRI
6202 * ..
6203 * .. External Subroutines ..
6204  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
6205 * ..
6206 * .. External Functions ..
6207  LOGICAL LSAME
6208  REAL PSLAMCH
6209  EXTERNAL LSAME, PSLAMCH
6210 * ..
6211 * .. Intrinsic Functions ..
6212  INTRINSIC abs, max, min, mod, sqrt
6213 * ..
6214 * .. Executable Statements ..
6215 *
6216  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6217 *
6218  eps = pslamch( ictxt, 'eps' )
6219 *
6220  upper = lsame( uplo, 'U' )
6221  notran = lsame( trans, 'N' )
6222  tran = lsame( trans, 'T' )
6223 *
6224  lda = max( 1, desca( m_ ) )
6225  ldb = max( 1, descb( m_ ) )
6226  ldc = max( 1, descc( m_ ) )
6227 *
6228 * Compute expected result in C using data in A, B and C.
6229 * Compute gauges in G. This part of the computation is performed
6230 * by every process in the grid.
6231 *
6232  DO 140 j = 1, n
6233 *
6234  IF( upper ) THEN
6235  ibeg = 1
6236  iend = j
6237  ELSE
6238  ibeg = j
6239  iend = n
6240  END IF
6241 *
6242  DO 10 i = 1, n
6243  ct( i ) = zero
6244  g( i ) = zero
6245  10 CONTINUE
6246 *
6247  IF( notran ) THEN
6248  DO 30 kk = 1, k
6249  ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6250  ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6251  DO 20 i = ibeg, iend
6252  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6253  ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6254  ct( i ) = ct( i ) + alpha * (
6255  $ a( ioffan ) * b( ioffbk ) +
6256  $ b( ioffbn ) * a( ioffak ) )
6257  g( i ) = g( i ) + abs( alpha ) * (
6258  $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6259  $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6260  20 CONTINUE
6261  30 CONTINUE
6262  ELSE IF( tran ) THEN
6263  DO 50 kk = 1, k
6264  ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6265  ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6266  DO 40 i = ibeg, iend
6267  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6268  ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6269  ct( i ) = ct( i ) + alpha * (
6270  $ a( ioffan ) * b( ioffbk ) +
6271  $ b( ioffbn ) * a( ioffak ) )
6272  g( i ) = g( i ) + abs( alpha ) * (
6273  $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6274  $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6275  40 CONTINUE
6276  50 CONTINUE
6277  END IF
6278 *
6279  ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6280 *
6281  DO 100 i = ibeg, iend
6282  ct( i ) = ct( i ) + beta * c( ioffc )
6283  g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
6284  c( ioffc ) = ct( i )
6285  ioffc = ioffc + 1
6286  100 CONTINUE
6287 *
6288 * Compute the error ratio for this result.
6289 *
6290  err = zero
6291  info = 0
6292  ldpc = descc( lld_ )
6293  ioffc = ic + ( jc + j - 2 ) * ldc
6294  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6295  $ iic, jjc, icrow, iccol )
6296  icurrow = icrow
6297  rowrep = ( icrow.EQ.-1 )
6298  colrep = ( iccol.EQ.-1 )
6299 *
6300  IF( mycol.EQ.iccol .OR. colrep ) THEN
6301 *
6302  ibb = descc( imb_ ) - ic + 1
6303  IF( ibb.LE.0 )
6304  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6305  ibb = min( ibb, n )
6306  in = ic + ibb - 1
6307 *
6308  DO 110 i = ic, in
6309 *
6310  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6311  erri = abs( pc( iic+(jjc-1)*ldpc ) -
6312  $ c( ioffc ) ) / eps
6313  IF( g( i-ic+1 ).NE.zero )
6314  $ erri = erri / g( i-ic+1 )
6315  err = max( err, erri )
6316  IF( err*sqrt( eps ).GE.one )
6317  $ info = 1
6318  iic = iic + 1
6319  END IF
6320 *
6321  ioffc = ioffc + 1
6322 *
6323  110 CONTINUE
6324 *
6325  icurrow = mod( icurrow+1, nprow )
6326 *
6327  DO 130 i = in+1, ic+n-1, descc( mb_ )
6328  ibb = min( ic+n-i, descc( mb_ ) )
6329 *
6330  DO 120 kk = 0, ibb-1
6331 *
6332  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6333  erri = abs( pc( iic+(jjc-1)*ldpc ) -
6334  $ c( ioffc ) )/eps
6335  IF( g( i+kk-ic+1 ).NE.zero )
6336  $ erri = erri / g( i+kk-ic+1 )
6337  err = max( err, erri )
6338  IF( err*sqrt( eps ).GE.one )
6339  $ info = 1
6340  iic = iic + 1
6341  END IF
6342 *
6343  ioffc = ioffc + 1
6344 *
6345  120 CONTINUE
6346 *
6347  icurrow = mod( icurrow+1, nprow )
6348 *
6349  130 CONTINUE
6350 *
6351  END IF
6352 *
6353 * If INFO = 0, all results are at least half accurate.
6354 *
6355  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6356  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6357  $ mycol )
6358  IF( info.NE.0 )
6359  $ GO TO 150
6360 *
6361  140 CONTINUE
6362 *
6363  150 CONTINUE
6364 *
6365  RETURN
6366 *
6367 * End of PSMMCH2
6368 *
6369  END
6370  SUBROUTINE psmmch3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
6371  $ BETA, C, PC, IC, JC, DESCC, ERR, INFO )
6373 * -- PBLAS test routine (version 2.0) --
6374 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6375 * and University of California, Berkeley.
6376 * April 1, 1998
6377 *
6378 * .. Scalar Arguments ..
6379  CHARACTER*1 TRANS, UPLO
6380  INTEGER IA, IC, INFO, JA, JC, M, N
6381  REAL ALPHA, BETA, ERR
6382 * ..
6383 * .. Array Arguments ..
6384  INTEGER DESCA( * ), DESCC( * )
6385  REAL A( * ), C( * ), PC( * )
6386 * ..
6387 *
6388 * Purpose
6389 * =======
6390 *
6391 * PSMMCH3 checks the results of the computational tests.
6392 *
6393 * Notes
6394 * =====
6395 *
6396 * A description vector is associated with each 2D block-cyclicly dis-
6397 * tributed matrix. This vector stores the information required to
6398 * establish the mapping between a matrix entry and its corresponding
6399 * process and memory location.
6400 *
6401 * In the following comments, the character _ should be read as
6402 * "of the distributed matrix". Let A be a generic term for any 2D
6403 * block cyclicly distributed matrix. Its description vector is DESCA:
6404 *
6405 * NOTATION STORED IN EXPLANATION
6406 * ---------------- --------------- ------------------------------------
6407 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6408 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6409 * the NPROW x NPCOL BLACS process grid
6410 * A is distributed over. The context
6411 * itself is global, but the handle
6412 * (the integer value) may vary.
6413 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6414 * ted matrix A, M_A >= 0.
6415 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6416 * buted matrix A, N_A >= 0.
6417 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6418 * block of the matrix A, IMB_A > 0.
6419 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6420 * left block of the matrix A,
6421 * INB_A > 0.
6422 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6423 * bute the last M_A-IMB_A rows of A,
6424 * MB_A > 0.
6425 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6426 * bute the last N_A-INB_A columns of
6427 * A, NB_A > 0.
6428 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6429 * row of the matrix A is distributed,
6430 * NPROW > RSRC_A >= 0.
6431 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6432 * first column of A is distributed.
6433 * NPCOL > CSRC_A >= 0.
6434 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6435 * array storing the local blocks of
6436 * the distributed matrix A,
6437 * IF( Lc( 1, N_A ) > 0 )
6438 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6439 * ELSE
6440 * LLD_A >= 1.
6441 *
6442 * Let K be the number of rows of a matrix A starting at the global in-
6443 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6444 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6445 * receive if these K rows were distributed over NPROW processes. If K
6446 * is the number of columns of a matrix A starting at the global index
6447 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6448 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6449 * these K columns were distributed over NPCOL processes.
6450 *
6451 * The values of Lr() and Lc() may be determined via a call to the func-
6452 * tion PB_NUMROC:
6453 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6454 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6455 *
6456 * Arguments
6457 * =========
6458 *
6459 * UPLO (global input) CHARACTER*1
6460 * On entry, UPLO specifies which part of C should contain the
6461 * result.
6462 *
6463 * TRANS (global input) CHARACTER*1
6464 * On entry, TRANS specifies whether the matrix A has to be
6465 * transposed or not before computing the matrix-matrix addi-
6466 * tion.
6467 *
6468 * M (global input) INTEGER
6469 * On entry, M specifies the number of rows of C.
6470 *
6471 * N (global input) INTEGER
6472 * On entry, N specifies the number of columns of C.
6473 *
6474 * ALPHA (global input) REAL
6475 * On entry, ALPHA specifies the scalar alpha.
6476 *
6477 * A (local input) REAL array
6478 * On entry, A is an array of dimension (DESCA( M_ ),*). This
6479 * array contains a local copy of the initial entire matrix PA.
6480 *
6481 * IA (global input) INTEGER
6482 * On entry, IA specifies A's global row index, which points to
6483 * the beginning of the submatrix sub( A ).
6484 *
6485 * JA (global input) INTEGER
6486 * On entry, JA specifies A's global column index, which points
6487 * to the beginning of the submatrix sub( A ).
6488 *
6489 * DESCA (global and local input) INTEGER array
6490 * On entry, DESCA is an integer array of dimension DLEN_. This
6491 * is the array descriptor for the matrix A.
6492 *
6493 * BETA (global input) REAL
6494 * On entry, BETA specifies the scalar beta.
6495 *
6496 * C (local input/local output) REAL array
6497 * On entry, C is an array of dimension (DESCC( M_ ),*). This
6498 * array contains a local copy of the initial entire matrix PC.
6499 *
6500 * PC (local input) REAL array
6501 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6502 * array contains the local pieces of the matrix PC.
6503 *
6504 * IC (global input) INTEGER
6505 * On entry, IC specifies C's global row index, which points to
6506 * the beginning of the submatrix sub( C ).
6507 *
6508 * JC (global input) INTEGER
6509 * On entry, JC specifies C's global column index, which points
6510 * to the beginning of the submatrix sub( C ).
6511 *
6512 * DESCC (global and local input) INTEGER array
6513 * On entry, DESCC is an integer array of dimension DLEN_. This
6514 * is the array descriptor for the matrix C.
6515 *
6516 * ERR (global output) REAL
6517 * On exit, ERR specifies the largest error in absolute value.
6518 *
6519 * INFO (global output) INTEGER
6520 * On exit, if INFO <> 0, the result is less than half accurate.
6521 *
6522 * -- Written on April 1, 1998 by
6523 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6524 *
6525 * =====================================================================
6526 *
6527 * .. Parameters ..
6528  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6529  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6530  $ RSRC_
6531  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6532  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6533  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6534  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6535  REAL ZERO
6536  PARAMETER ( ZERO = 0.0e+0 )
6537 * ..
6538 * .. Local Scalars ..
6539  LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER
6540  INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J,
6541  $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL,
6542  $ NPROW
6543  REAL ERR0, ERRI, PREC
6544 * ..
6545 * .. External Subroutines ..
6546  EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L,
6547  $ pserraxpby, sgamx2d
6548 * ..
6549 * .. External Functions ..
6550  LOGICAL LSAME
6551  REAL PSLAMCH
6552  EXTERNAL LSAME, PSLAMCH
6553 * ..
6554 * .. Intrinsic Functions ..
6555  INTRINSIC abs, max
6556 * ..
6557 * .. Executable Statements ..
6558 *
6559  ictxt = descc( ctxt_ )
6560  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6561 *
6562  prec = pslamch( ictxt, 'eps' )
6563 *
6564  upper = lsame( uplo, 'U' )
6565  lower = lsame( uplo, 'L' )
6566  notran = lsame( trans, 'N' )
6567 *
6568 * Compute expected result in C using data in A and C. This part of
6569 * the computation is performed by every process in the grid.
6570 *
6571  info = 0
6572  err = zero
6573 *
6574  lda = max( 1, desca( m_ ) )
6575  ldc = max( 1, descc( m_ ) )
6576  ldpc = max( 1, descc( lld_ ) )
6577  rowrep = ( descc( rsrc_ ).EQ.-1 )
6578  colrep = ( descc( csrc_ ).EQ.-1 )
6579 *
6580  IF( notran ) THEN
6581 *
6582  DO 20 j = jc, jc + n - 1
6583 *
6584  ioffc = ic + ( j - 1 ) * ldc
6585  ioffa = ia + ( ja - 1 + j - jc ) * lda
6586 *
6587  DO 10 i = ic, ic + m - 1
6588 *
6589  IF( upper ) THEN
6590  IF( ( j - jc ).GE.( i - ic ) ) THEN
6591  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6592  $ c( ioffc ), prec )
6593  ELSE
6594  erri = zero
6595  END IF
6596  ELSE IF( lower ) THEN
6597  IF( ( j - jc ).LE.( i - ic ) ) THEN
6598  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6599  $ c( ioffc ), prec )
6600  ELSE
6601  erri = zero
6602  END IF
6603  ELSE
6604  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6605  $ c( ioffc ), prec )
6606  END IF
6607 *
6608  CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6609  $ iic, jjc, icrow, iccol )
6610  IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6611  $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6612  err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6613  IF( err0.GT.erri )
6614  $ info = 1
6615  err = max( err, err0 )
6616  END IF
6617 *
6618  ioffa = ioffa + 1
6619  ioffc = ioffc + 1
6620 *
6621  10 CONTINUE
6622 *
6623  20 CONTINUE
6624 *
6625  ELSE
6626 *
6627  DO 40 j = jc, jc + n - 1
6628 *
6629  ioffc = ic + ( j - 1 ) * ldc
6630  ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6631 *
6632  DO 30 i = ic, ic + m - 1
6633 *
6634  IF( upper ) THEN
6635  IF( ( j - jc ).GE.( i - ic ) ) THEN
6636  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6637  $ c( ioffc ), prec )
6638  ELSE
6639  erri = zero
6640  END IF
6641  ELSE IF( lower ) THEN
6642  IF( ( j - jc ).LE.( i - ic ) ) THEN
6643  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6644  $ c( ioffc ), prec )
6645  ELSE
6646  erri = zero
6647  END IF
6648  ELSE
6649  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6650  $ c( ioffc ), prec )
6651  END IF
6652 *
6653  CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6654  $ iic, jjc, icrow, iccol )
6655  IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6656  $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6657  err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6658  IF( err0.GT.erri )
6659  $ info = 1
6660  err = max( err, err0 )
6661  END IF
6662 *
6663  ioffc = ioffc + 1
6664  ioffa = ioffa + lda
6665 *
6666  30 CONTINUE
6667 *
6668  40 CONTINUE
6669 *
6670  END IF
6671 *
6672 * If INFO = 0, all results are at least half accurate.
6673 *
6674  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6675  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6676  $ mycol )
6677 *
6678  RETURN
6679 *
6680 * End of PSMMCH3
6681 *
6682  END
6683  SUBROUTINE pserraxpby( ERRBND, ALPHA, X, BETA, Y, PREC )
6685 * -- PBLAS test routine (version 2.0) --
6686 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6687 * and University of California, Berkeley.
6688 * April 1, 1998
6689 *
6690 * .. Scalar Arguments ..
6691  REAL ALPHA, BETA, ERRBND, PREC, X, Y
6692 * ..
6693 *
6694 * Purpose
6695 * =======
6696 *
6697 * PSERRAXPBY serially computes y := beta*y + alpha * x and returns a
6698 * scaled relative acceptable error bound on the result.
6699 *
6700 * Arguments
6701 * =========
6702 *
6703 * ERRBND (global output) REAL
6704 * On exit, ERRBND specifies the scaled relative acceptable er-
6705 * ror bound.
6706 *
6707 * ALPHA (global input) REAL
6708 * On entry, ALPHA specifies the scalar alpha.
6709 *
6710 * X (global input) REAL
6711 * On entry, X specifies the scalar x to be scaled.
6712 *
6713 * BETA (global input) REAL
6714 * On entry, BETA specifies the scalar beta.
6715 *
6716 * Y (global input/global output) REAL
6717 * On entry, Y specifies the scalar y to be added. On exit, Y
6718 * contains the resulting scalar y.
6719 *
6720 * PREC (global input) REAL
6721 * On entry, PREC specifies the machine precision.
6722 *
6723 * -- Written on April 1, 1998 by
6724 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6725 *
6726 * =====================================================================
6727 *
6728 * .. Parameters ..
6729  REAL ONE, TWO, ZERO
6730  PARAMETER ( ONE = 1.0e+0, two = 2.0e+0,
6731  $ zero = 0.0e+0 )
6732 * ..
6733 * .. Local Scalars ..
6734  REAL ADDBND, FACT, SUMPOS, SUMNEG, TMP
6735 * ..
6736 * .. Intrinsic Functions ..
6737 * ..
6738 * .. Executable Statements ..
6739 *
6740  SUMPOS = zero
6741  sumneg = zero
6742  fact = one + two * prec
6743  addbnd = two * two * two * prec
6744 *
6745  tmp = alpha * x
6746  IF( tmp.GE.zero ) THEN
6747  sumpos = sumpos + tmp * fact
6748  ELSE
6749  sumneg = sumneg - tmp * fact
6750  END IF
6751 *
6752  tmp = beta * y
6753  IF( tmp.GE.zero ) THEN
6754  sumpos = sumpos + tmp * fact
6755  ELSE
6756  sumneg = sumneg - tmp * fact
6757  END IF
6758 *
6759  y = ( beta * y ) + ( alpha * x )
6760 *
6761  errbnd = addbnd * max( sumpos, sumneg )
6762 *
6763  RETURN
6764 *
6765 * End of PSERRAXPBY
6766 *
6767  END
6768  REAL FUNCTION PSLAMCH( ICTXT, CMACH )
6770 * -- PBLAS test routine (version 2.0) --
6771 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6772 * and University of California, Berkeley.
6773 * April 1, 1998
6774 *
6775 * .. Scalar Arguments ..
6776  CHARACTER*1 cmach
6777  INTEGER ictxt
6778 * ..
6779 *
6780 * Purpose
6781 * =======
6782 *
6783 * PSLAMCH determines single precision machine parameters.
6784 *
6785 * Arguments
6786 * =========
6787 *
6788 * ICTXT (local input) INTEGER
6789 * On entry, ICTXT specifies the BLACS context handle, indica-
6790 * ting the global context of the operation. The context itself
6791 * is global, but the value of ICTXT is local.