ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psblastst.f
Go to the documentation of this file.
1  SUBROUTINE psoptee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
2 *
3 * -- PBLAS test routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * .. Scalar Arguments ..
9  INTEGER ICTXT, NOUT, SCODE
10 * ..
11 * .. Array Arguments ..
12  CHARACTER*(*) SNAME
13 * ..
14 * .. Subroutine Arguments ..
15  EXTERNAL subptr
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSOPTEE tests whether the PBLAS respond correctly to a bad option
22 * argument.
23 *
24 * Notes
25 * =====
26 *
27 * A description vector is associated with each 2D block-cyclicly dis-
28 * tributed matrix. This vector stores the information required to
29 * establish the mapping between a matrix entry and its corresponding
30 * process and memory location.
31 *
32 * In the following comments, the character _ should be read as
33 * "of the distributed matrix". Let A be a generic term for any 2D
34 * block cyclicly distributed matrix. Its description vector is DESCA:
35 *
36 * NOTATION STORED IN EXPLANATION
37 * ---------------- --------------- ------------------------------------
38 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
39 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
40 * the NPROW x NPCOL BLACS process grid
41 * A is distributed over. The context
42 * itself is global, but the handle
43 * (the integer value) may vary.
44 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
45 * ted matrix A, M_A >= 0.
46 * N_A (global) DESCA( N_ ) The number of columns in the distri-
47 * buted matrix A, N_A >= 0.
48 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
49 * block of the matrix A, IMB_A > 0.
50 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
51 * left block of the matrix A,
52 * INB_A > 0.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
54 * bute the last M_A-IMB_A rows of A,
55 * MB_A > 0.
56 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
57 * bute the last N_A-INB_A columns of
58 * A, NB_A > 0.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the matrix A is distributed,
61 * NPROW > RSRC_A >= 0.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of A is distributed.
64 * NPCOL > CSRC_A >= 0.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array storing the local blocks of
67 * the distributed matrix A,
68 * IF( Lc( 1, N_A ) > 0 )
69 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
70 * ELSE
71 * LLD_A >= 1.
72 *
73 * Let K be the number of rows of a matrix A starting at the global in-
74 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
75 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
76 * receive if these K rows were distributed over NPROW processes. If K
77 * is the number of columns of a matrix A starting at the global index
78 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
79 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
80 * these K columns were distributed over NPCOL processes.
81 *
82 * The values of Lr() and Lc() may be determined via a call to the func-
83 * tion PB_NUMROC:
84 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
85 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
86 *
87 * Arguments
88 * =========
89 *
90 * ICTXT (local input) INTEGER
91 * On entry, ICTXT specifies the BLACS context handle, indica-
92 * ting the global context of the operation. The context itself
93 * is global, but the value of ICTXT is local.
94 *
95 * NOUT (global input) INTEGER
96 * On entry, NOUT specifies the unit number for the output file.
97 * When NOUT is 6, output to screen, when NOUT is 0, output to
98 * stderr. NOUT is only defined for process 0.
99 *
100 * SUBPTR (global input) SUBROUTINE
101 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
102 * EXTERNAL in the calling subroutine.
103 *
104 * SCODE (global input) INTEGER
105 * On entry, SCODE specifies the calling sequence code.
106 *
107 * SNAME (global input) CHARACTER*(*)
108 * On entry, SNAME specifies the subroutine name calling this
109 * subprogram.
110 *
111 * Calling sequence encodings
112 * ==========================
113 *
114 * code Formal argument list Examples
115 *
116 * 11 (n, v1,v2) _SWAP, _COPY
117 * 12 (n,s1, v1 ) _SCAL, _SCAL
118 * 13 (n,s1, v1,v2) _AXPY, _DOT_
119 * 14 (n,s1,i1,v1 ) _AMAX
120 * 15 (n,u1, v1 ) _ASUM, _NRM2
121 *
122 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
123 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
124 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
125 * 24 ( m,n,s1,v1,v2,m1) _GER_
126 * 25 (uplo, n,s1,v1, m1) _SYR
127 * 26 (uplo, n,u1,v1, m1) _HER
128 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
129 *
130 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
131 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
132 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
133 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
134 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
135 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
136 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
137 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
138 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
139 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
140 *
141 * -- Written on April 1, 1998 by
142 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
143 *
144 * =====================================================================
145 *
146 * .. Local Scalars ..
147  INTEGER APOS
148 * ..
149 * .. External Subroutines ..
150  EXTERNAL pschkopt
151 * ..
152 * .. Executable Statements ..
153 *
154 * Level 2 PBLAS
155 *
156  IF( scode.EQ.21 ) THEN
157 *
158 * Check 1st (and only) option
159 *
160  apos = 1
161  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
162 *
163  ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
164  $ scode.EQ.27 ) THEN
165 *
166 * Check 1st (and only) option
167 *
168  apos = 1
169  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
170 *
171  ELSE IF( scode.EQ.23 ) THEN
172 *
173 * Check 1st option
174 *
175  apos = 1
176  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
177 *
178 * Check 2nd option
179 *
180  apos = 2
181  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
182 *
183 * Check 3rd option
184 *
185  apos = 3
186  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
187 *
188 * Level 3 PBLAS
189 *
190  ELSE IF( scode.EQ.31 ) THEN
191 *
192 * Check 1st option
193 *
194  apos = 1
195  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
196 *
197 * Check 2'nd option
198 *
199  apos = 2
200  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'B', apos )
201 *
202  ELSE IF( scode.EQ.32 ) THEN
203 *
204 * Check 1st option
205 *
206  apos = 1
207  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
208 *
209 * Check 2nd option
210 *
211  apos = 2
212  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
213 *
214  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
215  $ scode.EQ.36 .OR. scode.EQ.40 ) THEN
216 *
217 * Check 1st option
218 *
219  apos = 1
220  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
221 *
222 * Check 2'nd option
223 *
224  apos = 2
225  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
226 *
227  ELSE IF( scode.EQ.38 ) THEN
228 *
229 * Check 1st option
230 *
231  apos = 1
232  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
233 *
234 * Check 2nd option
235 *
236  apos = 2
237  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
238 *
239 * Check 3rd option
240 *
241  apos = 3
242  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
243 *
244 * Check 4th option
245 *
246  apos = 4
247  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
248 *
249 *
250  ELSE IF( scode.EQ.39 ) THEN
251 *
252 * Check 1st option
253 *
254  apos = 1
255  CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
256 *
257  END IF
258 *
259  RETURN
260 *
261 * End of PSOPTEE
262 *
263  END
264  SUBROUTINE pschkopt( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
265  $ ARGPOS )
266 *
267 * -- PBLAS test routine (version 2.0) --
268 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
269 * and University of California, Berkeley.
270 * April 1, 1998
271 *
272 * .. Scalar Arguments ..
273  CHARACTER*1 ARGNAM
274  INTEGER ARGPOS, ICTXT, NOUT, SCODE
275 * ..
276 * .. Array Arguments ..
277  CHARACTER*(*) SNAME
278 * ..
279 * .. Subroutine Arguments ..
280  EXTERNAL subptr
281 * ..
282 *
283 * Purpose
284 * =======
285 *
286 * PSCHKOPT tests the option ARGNAM in any PBLAS routine.
287 *
288 * Notes
289 * =====
290 *
291 * A description vector is associated with each 2D block-cyclicly dis-
292 * tributed matrix. This vector stores the information required to
293 * establish the mapping between a matrix entry and its corresponding
294 * process and memory location.
295 *
296 * In the following comments, the character _ should be read as
297 * "of the distributed matrix". Let A be a generic term for any 2D
298 * block cyclicly distributed matrix. Its description vector is DESCA:
299 *
300 * NOTATION STORED IN EXPLANATION
301 * ---------------- --------------- ------------------------------------
302 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
303 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
304 * the NPROW x NPCOL BLACS process grid
305 * A is distributed over. The context
306 * itself is global, but the handle
307 * (the integer value) may vary.
308 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
309 * ted matrix A, M_A >= 0.
310 * N_A (global) DESCA( N_ ) The number of columns in the distri-
311 * buted matrix A, N_A >= 0.
312 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
313 * block of the matrix A, IMB_A > 0.
314 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
315 * left block of the matrix A,
316 * INB_A > 0.
317 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
318 * bute the last M_A-IMB_A rows of A,
319 * MB_A > 0.
320 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
321 * bute the last N_A-INB_A columns of
322 * A, NB_A > 0.
323 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
324 * row of the matrix A is distributed,
325 * NPROW > RSRC_A >= 0.
326 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
327 * first column of A is distributed.
328 * NPCOL > CSRC_A >= 0.
329 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
330 * array storing the local blocks of
331 * the distributed matrix A,
332 * IF( Lc( 1, N_A ) > 0 )
333 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
334 * ELSE
335 * LLD_A >= 1.
336 *
337 * Let K be the number of rows of a matrix A starting at the global in-
338 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
339 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
340 * receive if these K rows were distributed over NPROW processes. If K
341 * is the number of columns of a matrix A starting at the global index
342 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
343 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
344 * these K columns were distributed over NPCOL processes.
345 *
346 * The values of Lr() and Lc() may be determined via a call to the func-
347 * tion PB_NUMROC:
348 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
349 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
350 *
351 * Arguments
352 * =========
353 *
354 * ICTXT (local input) INTEGER
355 * On entry, ICTXT specifies the BLACS context handle, indica-
356 * ting the global context of the operation. The context itself
357 * is global, but the value of ICTXT is local.
358 *
359 * NOUT (global input) INTEGER
360 * On entry, NOUT specifies the unit number for the output file.
361 * When NOUT is 6, output to screen, when NOUT is 0, output to
362 * stderr. NOUT is only defined for process 0.
363 *
364 * SUBPTR (global input) SUBROUTINE
365 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
366 * EXTERNAL in the calling subroutine.
367 *
368 * SCODE (global input) INTEGER
369 * On entry, SCODE specifies the calling sequence code.
370 *
371 * SNAME (global input) CHARACTER*(*)
372 * On entry, SNAME specifies the subroutine name calling this
373 * subprogram.
374 *
375 * ARGNAM (global input) CHARACTER*(*)
376 * On entry, ARGNAM specifies the name of the option to be
377 * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'.
378 *
379 * ARGPOS (global input) INTEGER
380 * On entry, ARGPOS indicates the position of the option ARGNAM
381 * to be tested.
382 *
383 * -- Written on April 1, 1998 by
384 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
385 *
386 * =====================================================================
387 *
388 * .. Local Scalars ..
389  INTEGER INFOT
390 * ..
391 * .. External Subroutines ..
392  EXTERNAL pchkpbe, pscallsub, pssetpblas
393 * ..
394 * .. External Functions ..
395  LOGICAL LSAME
396  EXTERNAL lsame
397 * ..
398 * .. Common Blocks ..
399  CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO
400  COMMON /pblasc/diag, side, transa, transb, uplo
401 * ..
402 * .. Executable Statements ..
403 *
404 * Reiniatilize the dummy arguments to correct values
405 *
406  CALL pssetpblas( ictxt )
407 *
408  IF( lsame( argnam, 'D' ) ) THEN
409 *
410 * Generate bad DIAG option
411 *
412  diag = '/'
413 *
414  ELSE IF( lsame( argnam, 'S' ) ) THEN
415 *
416 * Generate bad SIDE option
417 *
418  side = '/'
419 *
420  ELSE IF( lsame( argnam, 'A' ) ) THEN
421 *
422 * Generate bad TRANSA option
423 *
424  transa = '/'
425 *
426  ELSE IF( lsame( argnam, 'B' ) ) THEN
427 *
428 * Generate bad TRANSB option
429 *
430  transb = '/'
431 *
432  ELSE IF( lsame( argnam, 'U' ) ) THEN
433 *
434 * Generate bad UPLO option
435 *
436  uplo = '/'
437 *
438  END IF
439 *
440 * Set INFOT to the position of the bad dimension argument
441 *
442  infot = argpos
443 *
444 * Call the PBLAS routine
445 *
446  CALL pscallsub( subptr, scode )
447  CALL pchkpbe( ictxt, nout, sname, infot )
448 *
449  RETURN
450 *
451 * End of PSCHKOPT
452 *
453  END
454  SUBROUTINE psdimee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
455 *
456 * -- PBLAS test routine (version 2.0) --
457 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
458 * and University of California, Berkeley.
459 * April 1, 1998
460 *
461 * .. Scalar Arguments ..
462  INTEGER ICTXT, NOUT, SCODE
463 * ..
464 * .. Array Arguments ..
465  CHARACTER*(*) SNAME
466 * ..
467 * .. Subroutine Arguments ..
468  EXTERNAL subptr
469 * ..
470 *
471 * Purpose
472 * =======
473 *
474 * PSDIMEE tests whether the PBLAS respond correctly to a bad dimension
475 * argument.
476 *
477 * Notes
478 * =====
479 *
480 * A description vector is associated with each 2D block-cyclicly dis-
481 * tributed matrix. This vector stores the information required to
482 * establish the mapping between a matrix entry and its corresponding
483 * process and memory location.
484 *
485 * In the following comments, the character _ should be read as
486 * "of the distributed matrix". Let A be a generic term for any 2D
487 * block cyclicly distributed matrix. Its description vector is DESCA:
488 *
489 * NOTATION STORED IN EXPLANATION
490 * ---------------- --------------- ------------------------------------
491 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
492 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
493 * the NPROW x NPCOL BLACS process grid
494 * A is distributed over. The context
495 * itself is global, but the handle
496 * (the integer value) may vary.
497 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
498 * ted matrix A, M_A >= 0.
499 * N_A (global) DESCA( N_ ) The number of columns in the distri-
500 * buted matrix A, N_A >= 0.
501 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
502 * block of the matrix A, IMB_A > 0.
503 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
504 * left block of the matrix A,
505 * INB_A > 0.
506 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
507 * bute the last M_A-IMB_A rows of A,
508 * MB_A > 0.
509 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
510 * bute the last N_A-INB_A columns of
511 * A, NB_A > 0.
512 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
513 * row of the matrix A is distributed,
514 * NPROW > RSRC_A >= 0.
515 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
516 * first column of A is distributed.
517 * NPCOL > CSRC_A >= 0.
518 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
519 * array storing the local blocks of
520 * the distributed matrix A,
521 * IF( Lc( 1, N_A ) > 0 )
522 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
523 * ELSE
524 * LLD_A >= 1.
525 *
526 * Let K be the number of rows of a matrix A starting at the global in-
527 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
528 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
529 * receive if these K rows were distributed over NPROW processes. If K
530 * is the number of columns of a matrix A starting at the global index
531 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
532 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
533 * these K columns were distributed over NPCOL processes.
534 *
535 * The values of Lr() and Lc() may be determined via a call to the func-
536 * tion PB_NUMROC:
537 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
538 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
539 *
540 * Arguments
541 * =========
542 *
543 * ICTXT (local input) INTEGER
544 * On entry, ICTXT specifies the BLACS context handle, indica-
545 * ting the global context of the operation. The context itself
546 * is global, but the value of ICTXT is local.
547 *
548 * NOUT (global input) INTEGER
549 * On entry, NOUT specifies the unit number for the output file.
550 * When NOUT is 6, output to screen, when NOUT is 0, output to
551 * stderr. NOUT is only defined for process 0.
552 *
553 * SUBPTR (global input) SUBROUTINE
554 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
555 * EXTERNAL in the calling subroutine.
556 *
557 * SCODE (global input) INTEGER
558 * On entry, SCODE specifies the calling sequence code.
559 *
560 * SNAME (global input) CHARACTER*(*)
561 * On entry, SNAME specifies the subroutine name calling this
562 * subprogram.
563 *
564 * Calling sequence encodings
565 * ==========================
566 *
567 * code Formal argument list Examples
568 *
569 * 11 (n, v1,v2) _SWAP, _COPY
570 * 12 (n,s1, v1 ) _SCAL, _SCAL
571 * 13 (n,s1, v1,v2) _AXPY, _DOT_
572 * 14 (n,s1,i1,v1 ) _AMAX
573 * 15 (n,u1, v1 ) _ASUM, _NRM2
574 *
575 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
576 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
577 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
578 * 24 ( m,n,s1,v1,v2,m1) _GER_
579 * 25 (uplo, n,s1,v1, m1) _SYR
580 * 26 (uplo, n,u1,v1, m1) _HER
581 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
582 *
583 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
584 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
585 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
586 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
587 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
588 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
589 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
590 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
591 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
592 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
593 *
594 * -- Written on April 1, 1998 by
595 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
596 *
597 * =====================================================================
598 *
599 * .. Local Scalars ..
600  INTEGER APOS
601 * ..
602 * .. External Subroutines ..
603  EXTERNAL pschkdim
604 * ..
605 * .. Executable Statements ..
606 *
607 * Level 1 PBLAS
608 *
609  IF( scode.EQ.11 .OR. scode.EQ.12 .OR. scode.EQ.13 .OR.
610  $ scode.EQ.14 .OR. scode.EQ.15 ) THEN
611 *
612 * Check 1st (and only) dimension
613 *
614  apos = 1
615  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
616 *
617 * Level 2 PBLAS
618 *
619  ELSE IF( scode.EQ.21 ) THEN
620 *
621 * Check 1st dimension
622 *
623  apos = 2
624  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
625 *
626 * Check 2nd dimension
627 *
628  apos = 3
629  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
630 *
631  ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
632  $ scode.EQ.27 ) THEN
633 *
634 * Check 1st (and only) dimension
635 *
636  apos = 2
637  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
638 *
639  ELSE IF( scode.EQ.23 ) THEN
640 *
641 * Check 1st (and only) dimension
642 *
643  apos = 4
644  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
645 *
646  ELSE IF( scode.EQ.24 ) THEN
647 *
648 * Check 1st dimension
649 *
650  apos = 1
651  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
652 *
653 * Check 2nd dimension
654 *
655  apos = 2
656  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
657 *
658 * Level 3 PBLAS
659 *
660  ELSE IF( scode.EQ.31 ) THEN
661 *
662 * Check 1st dimension
663 *
664  apos = 3
665  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
666 *
667 * Check 2nd dimension
668 *
669  apos = 4
670  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
671 *
672 * Check 3rd dimension
673 *
674  apos = 5
675  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
676 *
677  ELSE IF( scode.EQ.32 ) THEN
678 *
679 * Check 1st dimension
680 *
681  apos = 3
682  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
683 *
684 * Check 2nd dimension
685 *
686  apos = 4
687  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
688 *
689  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
690  $ scode.EQ.36 ) THEN
691 *
692 * Check 1st dimension
693 *
694  apos = 3
695  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
696 *
697 * Check 2nd dimension
698 *
699  apos = 4
700  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
701 *
702  ELSE IF( scode.EQ.37 ) THEN
703 *
704 * Check 1st dimension
705 *
706  apos = 1
707  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
708 *
709 * Check 2nd dimension
710 *
711  apos = 2
712  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
713 *
714  ELSE IF( scode.EQ.38 ) THEN
715 *
716 * Check 1st dimension
717 *
718  apos = 5
719  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
720 *
721 * Check 2nd dimension
722 *
723  apos = 6
724  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
725 *
726  ELSE IF( scode.EQ.39 ) THEN
727 *
728 * Check 1st dimension
729 *
730  apos = 2
731  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
732 *
733 * Check 2nd dimension
734 *
735  apos = 3
736  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
737 *
738  ELSE IF( scode.EQ.40 ) THEN
739 *
740 * Check 1st dimension
741 *
742  apos = 3
743  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
744 *
745 * Check 2nd dimension
746 *
747  apos = 4
748  CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
749 *
750  END IF
751 *
752  RETURN
753 *
754 * End of PSDIMEE
755 *
756  END
757  SUBROUTINE pschkdim( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
758  $ ARGPOS )
759 *
760 * -- PBLAS test routine (version 2.0) --
761 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
762 * and University of California, Berkeley.
763 * April 1, 1998
764 *
765 * .. Scalar Arguments ..
766  CHARACTER*1 ARGNAM
767  INTEGER ARGPOS, ICTXT, NOUT, SCODE
768 * ..
769 * .. Array Arguments ..
770  CHARACTER*(*) SNAME
771 * ..
772 * .. Subroutine Arguments ..
773  EXTERNAL subptr
774 * ..
775 *
776 * Purpose
777 * =======
778 *
779 * PSCHKDIM tests the dimension ARGNAM in any PBLAS routine.
780 *
781 * Notes
782 * =====
783 *
784 * A description vector is associated with each 2D block-cyclicly dis-
785 * tributed matrix. This vector stores the information required to
786 * establish the mapping between a matrix entry and its corresponding
787 * process and memory location.
788 *
789 * In the following comments, the character _ should be read as
790 * "of the distributed matrix". Let A be a generic term for any 2D
791 * block cyclicly distributed matrix. Its description vector is DESCA:
792 *
793 * NOTATION STORED IN EXPLANATION
794 * ---------------- --------------- ------------------------------------
795 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
796 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
797 * the NPROW x NPCOL BLACS process grid
798 * A is distributed over. The context
799 * itself is global, but the handle
800 * (the integer value) may vary.
801 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
802 * ted matrix A, M_A >= 0.
803 * N_A (global) DESCA( N_ ) The number of columns in the distri-
804 * buted matrix A, N_A >= 0.
805 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
806 * block of the matrix A, IMB_A > 0.
807 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
808 * left block of the matrix A,
809 * INB_A > 0.
810 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
811 * bute the last M_A-IMB_A rows of A,
812 * MB_A > 0.
813 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
814 * bute the last N_A-INB_A columns of
815 * A, NB_A > 0.
816 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
817 * row of the matrix A is distributed,
818 * NPROW > RSRC_A >= 0.
819 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
820 * first column of A is distributed.
821 * NPCOL > CSRC_A >= 0.
822 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
823 * array storing the local blocks of
824 * the distributed matrix A,
825 * IF( Lc( 1, N_A ) > 0 )
826 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
827 * ELSE
828 * LLD_A >= 1.
829 *
830 * Let K be the number of rows of a matrix A starting at the global in-
831 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
832 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
833 * receive if these K rows were distributed over NPROW processes. If K
834 * is the number of columns of a matrix A starting at the global index
835 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
836 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
837 * these K columns were distributed over NPCOL processes.
838 *
839 * The values of Lr() and Lc() may be determined via a call to the func-
840 * tion PB_NUMROC:
841 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
842 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
843 *
844 * Arguments
845 * =========
846 *
847 * ICTXT (local input) INTEGER
848 * On entry, ICTXT specifies the BLACS context handle, indica-
849 * ting the global context of the operation. The context itself
850 * is global, but the value of ICTXT is local.
851 *
852 * NOUT (global input) INTEGER
853 * On entry, NOUT specifies the unit number for the output file.
854 * When NOUT is 6, output to screen, when NOUT is 0, output to
855 * stderr. NOUT is only defined for process 0.
856 *
857 * SUBPTR (global input) SUBROUTINE
858 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
859 * EXTERNAL in the calling subroutine.
860 *
861 * SCODE (global input) INTEGER
862 * On entry, SCODE specifies the calling sequence code.
863 *
864 * SNAME (global input) CHARACTER*(*)
865 * On entry, SNAME specifies the subroutine name calling this
866 * subprogram.
867 *
868 * ARGNAM (global input) CHARACTER*(*)
869 * On entry, ARGNAM specifies the name of the dimension to be
870 * checked. ARGNAM can either be 'M', 'N' or 'K'.
871 *
872 * ARGPOS (global input) INTEGER
873 * On entry, ARGPOS indicates the position of the option ARGNAM
874 * to be tested.
875 *
876 * -- Written on April 1, 1998 by
877 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
878 *
879 * =====================================================================
880 *
881 * .. Local Scalars ..
882  INTEGER INFOT
883 * ..
884 * .. External Subroutines ..
885  EXTERNAL pchkpbe, pscallsub, pssetpblas
886 * ..
887 * .. External Functions ..
888  LOGICAL LSAME
889  EXTERNAL LSAME
890 * ..
891 * .. Common Blocks ..
892  INTEGER KDIM, MDIM, NDIM
893  COMMON /PBLASN/KDIM, MDIM, NDIM
894 * ..
895 * .. Executable Statements ..
896 *
897 * Reiniatilize the dummy arguments to correct values
898 *
899  CALL pssetpblas( ictxt )
900 *
901  IF( lsame( argnam, 'M' ) ) THEN
902 *
903 * Generate bad MDIM
904 *
905  mdim = -1
906 *
907  ELSE IF( lsame( argnam, 'N' ) ) THEN
908 *
909 * Generate bad NDIM
910 *
911  ndim = -1
912 *
913  ELSE
914 *
915 * Generate bad KDIM
916 *
917  kdim = -1
918 *
919  END IF
920 *
921 * Set INFOT to the position of the bad dimension argument
922 *
923  infot = argpos
924 *
925 * Call the PBLAS routine
926 *
927  CALL pscallsub( subptr, scode )
928  CALL pchkpbe( ictxt, nout, sname, infot )
929 *
930  RETURN
931 *
932 * End of PSCHKDIM
933 *
934  END
935  SUBROUTINE psvecee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
936 *
937 * -- PBLAS test routine (version 2.0) --
938 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
939 * and University of California, Berkeley.
940 * April 1, 1998
941 *
942 * .. Scalar Arguments ..
943  INTEGER ICTXT, NOUT, SCODE
944 * ..
945 * .. Array Arguments ..
946  CHARACTER*7 SNAME
947 * ..
948 * .. Subroutine Arguments ..
949  EXTERNAL subptr
950 * ..
951 *
952 * Purpose
953 * =======
954 *
955 * PSVECEE tests whether the PBLAS respond correctly to a bad vector
956 * argument. Each vector <vec> is described by: <vec>, I<vec>, J<vec>,
957 * DESC<vec>, INC<vec>. Out of all these, only I<vec>, J<vec>,
958 * DESC<vec>, and INC<vec> can be tested.
959 *
960 * Notes
961 * =====
962 *
963 * A description vector is associated with each 2D block-cyclicly dis-
964 * tributed matrix. This vector stores the information required to
965 * establish the mapping between a matrix entry and its corresponding
966 * process and memory location.
967 *
968 * In the following comments, the character _ should be read as
969 * "of the distributed matrix". Let A be a generic term for any 2D
970 * block cyclicly distributed matrix. Its description vector is DESCA:
971 *
972 * NOTATION STORED IN EXPLANATION
973 * ---------------- --------------- ------------------------------------
974 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
975 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
976 * the NPROW x NPCOL BLACS process grid
977 * A is distributed over. The context
978 * itself is global, but the handle
979 * (the integer value) may vary.
980 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
981 * ted matrix A, M_A >= 0.
982 * N_A (global) DESCA( N_ ) The number of columns in the distri-
983 * buted matrix A, N_A >= 0.
984 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
985 * block of the matrix A, IMB_A > 0.
986 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
987 * left block of the matrix A,
988 * INB_A > 0.
989 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
990 * bute the last M_A-IMB_A rows of A,
991 * MB_A > 0.
992 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
993 * bute the last N_A-INB_A columns of
994 * A, NB_A > 0.
995 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
996 * row of the matrix A is distributed,
997 * NPROW > RSRC_A >= 0.
998 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
999 * first column of A is distributed.
1000 * NPCOL > CSRC_A >= 0.
1001 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1002 * array storing the local blocks of
1003 * the distributed matrix A,
1004 * IF( Lc( 1, N_A ) > 0 )
1005 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1006 * ELSE
1007 * LLD_A >= 1.
1008 *
1009 * Let K be the number of rows of a matrix A starting at the global in-
1010 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1011 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1012 * receive if these K rows were distributed over NPROW processes. If K
1013 * is the number of columns of a matrix A starting at the global index
1014 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1015 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1016 * these K columns were distributed over NPCOL processes.
1017 *
1018 * The values of Lr() and Lc() may be determined via a call to the func-
1019 * tion PB_NUMROC:
1020 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1021 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1022 *
1023 * Arguments
1024 * =========
1025 *
1026 * ICTXT (local input) INTEGER
1027 * On entry, ICTXT specifies the BLACS context handle, indica-
1028 * ting the global context of the operation. The context itself
1029 * is global, but the value of ICTXT is local.
1030 *
1031 * NOUT (global input) INTEGER
1032 * On entry, NOUT specifies the unit number for the output file.
1033 * When NOUT is 6, output to screen, when NOUT is 0, output to
1034 * stderr. NOUT is only defined for process 0.
1035 *
1036 * SUBPTR (global input) SUBROUTINE
1037 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1038 * EXTERNAL in the calling subroutine.
1039 *
1040 * SCODE (global input) INTEGER
1041 * On entry, SCODE specifies the calling sequence code.
1042 *
1043 * SNAME (global input) CHARACTER*(*)
1044 * On entry, SNAME specifies the subroutine name calling this
1045 * subprogram.
1046 *
1047 * Calling sequence encodings
1048 * ==========================
1049 *
1050 * code Formal argument list Examples
1051 *
1052 * 11 (n, v1,v2) _SWAP, _COPY
1053 * 12 (n,s1, v1 ) _SCAL, _SCAL
1054 * 13 (n,s1, v1,v2) _AXPY, _DOT_
1055 * 14 (n,s1,i1,v1 ) _AMAX
1056 * 15 (n,u1, v1 ) _ASUM, _NRM2
1057 *
1058 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1059 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1060 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1061 * 24 ( m,n,s1,v1,v2,m1) _GER_
1062 * 25 (uplo, n,s1,v1, m1) _SYR
1063 * 26 (uplo, n,u1,v1, m1) _HER
1064 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1065 *
1066 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1067 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1068 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1069 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1070 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1071 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1072 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
1073 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1074 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1075 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1076 *
1077 * -- Written on April 1, 1998 by
1078 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1079 *
1080 * =====================================================================
1081 *
1082 * .. Local Scalars ..
1083  INTEGER APOS
1084 * ..
1085 * .. External Subroutines ..
1086  EXTERNAL pschkmat
1087 * ..
1088 * .. Executable Statements ..
1089 *
1090 * Level 1 PBLAS
1091 *
1092  IF( scode.EQ.11 ) THEN
1093 *
1094 * Check 1st vector
1095 *
1096  apos = 2
1097  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1098 *
1099 * Check 2nd vector
1100 *
1101  apos = 7
1102  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1103 *
1104  ELSE IF( scode.EQ.12 .OR. scode.EQ.15 ) THEN
1105 *
1106 * Check 1st (and only) vector
1107 *
1108  apos = 3
1109  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1110 *
1111  ELSE IF( scode.EQ.13 ) THEN
1112 *
1113 * Check 1st vector
1114 *
1115  apos = 3
1116  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1117 *
1118 * Check 2nd vector
1119 *
1120  apos = 8
1121  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1122 *
1123  ELSE IF( scode.EQ.14 ) THEN
1124 *
1125 * Check 1st (and only) vector
1126 *
1127  apos = 4
1128  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1129 *
1130 * Level 2 PBLAS
1131 *
1132  ELSE IF( scode.EQ.21 ) THEN
1133 *
1134 * Check 1st vector
1135 *
1136  apos = 9
1137  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1138 *
1139 * Check 2nd vector
1140 *
1141  apos = 15
1142  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1143 *
1144  ELSE IF( scode.EQ.22 ) THEN
1145 *
1146 * Check 1st vector
1147 *
1148  apos = 8
1149  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1150 *
1151 * Check 2nd vector
1152 *
1153  apos = 14
1154  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1155 *
1156  ELSE IF( scode.EQ.23 ) THEN
1157 *
1158 * Check 1st (and only) vector
1159 *
1160  apos = 9
1161  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1162 *
1163  ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1164 *
1165 * Check 1st vector
1166 *
1167  apos = 4
1168  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1169 *
1170 * Check 2nd vector
1171 *
1172  apos = 9
1173  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1174 *
1175  ELSE IF( scode.EQ.26 .OR. scode.EQ.27 ) THEN
1176 *
1177 * Check 1'st (and only) vector
1178 *
1179  apos = 4
1180  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1181 *
1182  END IF
1183 *
1184  RETURN
1185 *
1186 * End of PSVECEE
1187 *
1188  END
1189  SUBROUTINE psmatee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
1191 * -- PBLAS test routine (version 2.0) --
1192 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1193 * and University of California, Berkeley.
1194 * April 1, 1998
1195 *
1196 * .. Scalar Arguments ..
1197  INTEGER ICTXT, NOUT, SCODE
1198 * ..
1199 * .. Array Arguments ..
1200  CHARACTER*7 SNAME
1201 * ..
1202 * .. Subroutine Arguments ..
1203  EXTERNAL subptr
1204 * ..
1205 *
1206 * Purpose
1207 * =======
1208 *
1209 * PSMATEE tests whether the PBLAS respond correctly to a bad matrix
1210 * argument. Each matrix <mat> is described by: <mat>, I<mat>, J<mat>,
1211 * and DESC<mat>. Out of all these, only I<vec>, J<vec> and DESC<mat>
1212 * can be tested.
1213 *
1214 * Notes
1215 * =====
1216 *
1217 * A description vector is associated with each 2D block-cyclicly dis-
1218 * tributed matrix. This vector stores the information required to
1219 * establish the mapping between a matrix entry and its corresponding
1220 * process and memory location.
1221 *
1222 * In the following comments, the character _ should be read as
1223 * "of the distributed matrix". Let A be a generic term for any 2D
1224 * block cyclicly distributed matrix. Its description vector is DESCA:
1225 *
1226 * NOTATION STORED IN EXPLANATION
1227 * ---------------- --------------- ------------------------------------
1228 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1229 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1230 * the NPROW x NPCOL BLACS process grid
1231 * A is distributed over. The context
1232 * itself is global, but the handle
1233 * (the integer value) may vary.
1234 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1235 * ted matrix A, M_A >= 0.
1236 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1237 * buted matrix A, N_A >= 0.
1238 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1239 * block of the matrix A, IMB_A > 0.
1240 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1241 * left block of the matrix A,
1242 * INB_A > 0.
1243 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1244 * bute the last M_A-IMB_A rows of A,
1245 * MB_A > 0.
1246 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1247 * bute the last N_A-INB_A columns of
1248 * A, NB_A > 0.
1249 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1250 * row of the matrix A is distributed,
1251 * NPROW > RSRC_A >= 0.
1252 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1253 * first column of A is distributed.
1254 * NPCOL > CSRC_A >= 0.
1255 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1256 * array storing the local blocks of
1257 * the distributed matrix A,
1258 * IF( Lc( 1, N_A ) > 0 )
1259 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1260 * ELSE
1261 * LLD_A >= 1.
1262 *
1263 * Let K be the number of rows of a matrix A starting at the global in-
1264 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1265 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1266 * receive if these K rows were distributed over NPROW processes. If K
1267 * is the number of columns of a matrix A starting at the global index
1268 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1269 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1270 * these K columns were distributed over NPCOL processes.
1271 *
1272 * The values of Lr() and Lc() may be determined via a call to the func-
1273 * tion PB_NUMROC:
1274 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1275 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1276 *
1277 * Arguments
1278 * =========
1279 *
1280 * ICTXT (local input) INTEGER
1281 * On entry, ICTXT specifies the BLACS context handle, indica-
1282 * ting the global context of the operation. The context itself
1283 * is global, but the value of ICTXT is local.
1284 *
1285 * NOUT (global input) INTEGER
1286 * On entry, NOUT specifies the unit number for the output file.
1287 * When NOUT is 6, output to screen, when NOUT is 0, output to
1288 * stderr. NOUT is only defined for process 0.
1289 *
1290 * SUBPTR (global input) SUBROUTINE
1291 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1292 * EXTERNAL in the calling subroutine.
1293 *
1294 * SCODE (global input) INTEGER
1295 * On entry, SCODE specifies the calling sequence code.
1296 *
1297 * SNAME (global input) CHARACTER*(*)
1298 * On entry, SNAME specifies the subroutine name calling this
1299 * subprogram.
1300 *
1301 * Calling sequence encodings
1302 * ==========================
1303 *
1304 * code Formal argument list Examples
1305 *
1306 * 11 (n, v1,v2) _SWAP, _COPY
1307 * 12 (n,s1, v1 ) _SCAL, _SCAL
1308 * 13 (n,s1, v1,v2) _AXPY, _DOT_
1309 * 14 (n,s1,i1,v1 ) _AMAX
1310 * 15 (n,u1, v1 ) _ASUM, _NRM2
1311 *
1312 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1313 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1314 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1315 * 24 ( m,n,s1,v1,v2,m1) _GER_
1316 * 25 (uplo, n,s1,v1, m1) _SYR
1317 * 26 (uplo, n,u1,v1, m1) _HER
1318 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1319 *
1320 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1321 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1322 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1323 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1324 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1325 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1326 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
1327 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1328 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1329 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1330 *
1331 * -- Written on April 1, 1998 by
1332 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1333 *
1334 * =====================================================================
1335 *
1336 * .. Local Scalars ..
1337  INTEGER APOS
1338 * ..
1339 * .. External Subroutines ..
1340  EXTERNAL pschkmat
1341 * ..
1342 * .. Executable Statements ..
1343 *
1344 * Level 2 PBLAS
1345 *
1346  IF( scode.EQ.21 .OR. scode.EQ.23 ) THEN
1347 *
1348 * Check 1st (and only) matrix
1349 *
1350  apos = 5
1351  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1352 *
1353  ELSE IF( scode.EQ.22 ) THEN
1354 *
1355 * Check 1st (and only) matrix
1356 *
1357  apos = 4
1358  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1359 *
1360  ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1361 *
1362 * Check 1st (and only) matrix
1363 *
1364  apos = 14
1365  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1366 *
1367  ELSE IF( scode.EQ.25 .OR. scode.EQ.26 ) THEN
1368 *
1369 * Check 1st (and only) matrix
1370 *
1371  apos = 9
1372  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1373 *
1374 * Level 3 PBLAS
1375 *
1376  ELSE IF( scode.EQ.31 ) THEN
1377 *
1378 * Check 1st matrix
1379 *
1380  apos = 7
1381  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1382 *
1383 * Check 2nd matrix
1384 *
1385  apos = 11
1386  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1387 *
1388 * Check 3nd matrix
1389 *
1390  apos = 16
1391  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1392 *
1393  ELSE IF( scode.EQ.32 .OR. scode.EQ.35 .OR. scode.EQ.36 ) THEN
1394 *
1395 * Check 1st matrix
1396 *
1397  apos = 6
1398  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1399 *
1400 * Check 2nd matrix
1401 *
1402  apos = 10
1403  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1404 *
1405 * Check 3nd matrix
1406 *
1407  apos = 15
1408  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1409 *
1410  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 ) THEN
1411 *
1412 * Check 1st matrix
1413 *
1414  apos = 6
1415  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1416 *
1417 * Check 2nd matrix
1418 *
1419  apos = 11
1420  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1421 *
1422  ELSE IF( scode.EQ.37 ) THEN
1423 *
1424 * Check 1st matrix
1425 *
1426  apos = 4
1427  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1428 *
1429 * Check 2nd matrix
1430 *
1431  apos = 9
1432  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1433 *
1434  ELSE IF( scode.EQ.38 ) THEN
1435 *
1436 * Check 1st matrix
1437 *
1438  apos = 8
1439  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1440 *
1441 * Check 2nd matrix
1442 *
1443  apos = 12
1444  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1445 *
1446  ELSE IF( scode.EQ.39 ) THEN
1447 *
1448 * Check 1st matrix
1449 *
1450  apos = 5
1451  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1452 *
1453 * Check 2nd matrix
1454 *
1455  apos = 10
1456  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1457 *
1458  ELSE IF( scode.EQ.40 ) THEN
1459 *
1460 * Check 1st matrix
1461 *
1462  apos = 6
1463  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1464 *
1465 * Check 2nd matrix
1466 *
1467  apos = 11
1468  CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1469 *
1470  END IF
1471 *
1472  RETURN
1473 *
1474 * End of PSMATEE
1475 *
1476  END
1477  SUBROUTINE pssetpblas( ICTXT )
1479 * -- PBLAS test routine (version 2.0) --
1480 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1481 * and University of California, Berkeley.
1482 * April 1, 1998
1483 *
1484 * .. Scalar Arguments ..
1485  INTEGER ICTXT
1486 * ..
1487 *
1488 * Purpose
1489 * =======
1490 *
1491 * PSSETPBLAS initializes *all* the dummy arguments to correct values.
1492 *
1493 * Notes
1494 * =====
1495 *
1496 * A description vector is associated with each 2D block-cyclicly dis-
1497 * tributed matrix. This vector stores the information required to
1498 * establish the mapping between a matrix entry and its corresponding
1499 * process and memory location.
1500 *
1501 * In the following comments, the character _ should be read as
1502 * "of the distributed matrix". Let A be a generic term for any 2D
1503 * block cyclicly distributed matrix. Its description vector is DESCA:
1504 *
1505 * NOTATION STORED IN EXPLANATION
1506 * ---------------- --------------- ------------------------------------
1507 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1508 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1509 * the NPROW x NPCOL BLACS process grid
1510 * A is distributed over. The context
1511 * itself is global, but the handle
1512 * (the integer value) may vary.
1513 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1514 * ted matrix A, M_A >= 0.
1515 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1516 * buted matrix A, N_A >= 0.
1517 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1518 * block of the matrix A, IMB_A > 0.
1519 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1520 * left block of the matrix A,
1521 * INB_A > 0.
1522 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1523 * bute the last M_A-IMB_A rows of A,
1524 * MB_A > 0.
1525 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1526 * bute the last N_A-INB_A columns of
1527 * A, NB_A > 0.
1528 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1529 * row of the matrix A is distributed,
1530 * NPROW > RSRC_A >= 0.
1531 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1532 * first column of A is distributed.
1533 * NPCOL > CSRC_A >= 0.
1534 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1535 * array storing the local blocks of
1536 * the distributed matrix A,
1537 * IF( Lc( 1, N_A ) > 0 )
1538 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1539 * ELSE
1540 * LLD_A >= 1.
1541 *
1542 * Let K be the number of rows of a matrix A starting at the global in-
1543 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1544 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1545 * receive if these K rows were distributed over NPROW processes. If K
1546 * is the number of columns of a matrix A starting at the global index
1547 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1548 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1549 * these K columns were distributed over NPCOL processes.
1550 *
1551 * The values of Lr() and Lc() may be determined via a call to the func-
1552 * tion PB_NUMROC:
1553 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1554 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1555 *
1556 * Arguments
1557 * =========
1558 *
1559 * ICTXT (local input) INTEGER
1560 * On entry, ICTXT specifies the BLACS context handle, indica-
1561 * ting the global context of the operation. The context itself
1562 * is global, but the value of ICTXT is local.
1563 *
1564 * -- Written on April 1, 1998 by
1565 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1566 *
1567 * =====================================================================
1568 *
1569 * .. Parameters ..
1570  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1571  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1572  $ rsrc_
1573  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1574  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1575  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1576  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1577  REAL ONE
1578  PARAMETER ( ONE = 1.0e+0 )
1579 * ..
1580 * .. External Subroutines ..
1581  EXTERNAL pb_descset2
1582 * ..
1583 * .. Common Blocks ..
1584  CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
1585  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1586  $ jc, jx, jy, kdim, mdim, ndim
1587  REAL USCLR, SCLR
1588  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1589  $ descx( dlen_ ), descy( dlen_ )
1590  REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
1591  COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO
1592  COMMON /pblasd/desca, descb, descc, descx, descy
1593  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1594  $ ja, jb, jc, jx, jy
1595  COMMON /pblasm/a, b, c
1596  COMMON /pblasn/kdim, mdim, ndim
1597  COMMON /pblass/sclr, usclr
1598  COMMON /pblasv/x, y
1599 * ..
1600 * .. Executable Statements ..
1601 *
1602 * Set default values for options
1603 *
1604  diag = 'N'
1605  side = 'L'
1606  transa = 'N'
1607  transb = 'N'
1608  uplo = 'U'
1609 *
1610 * Set default values for scalars
1611 *
1612  kdim = 1
1613  mdim = 1
1614  ndim = 1
1615  isclr = 1
1616  sclr = one
1617  usclr = one
1618 *
1619 * Set default values for distributed matrix A
1620 *
1621  a( 1, 1 ) = one
1622  a( 2, 1 ) = one
1623  a( 1, 2 ) = one
1624  a( 2, 2 ) = one
1625  ia = 1
1626  ja = 1
1627  CALL pb_descset2( desca, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1628 *
1629 * Set default values for distributed matrix B
1630 *
1631  b( 1, 1 ) = one
1632  b( 2, 1 ) = one
1633  b( 1, 2 ) = one
1634  b( 2, 2 ) = one
1635  ib = 1
1636  jb = 1
1637  CALL pb_descset2( descb, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1638 *
1639 * Set default values for distributed matrix C
1640 *
1641  c( 1, 1 ) = one
1642  c( 2, 1 ) = one
1643  c( 1, 2 ) = one
1644  c( 2, 2 ) = one
1645  ic = 1
1646  jc = 1
1647  CALL pb_descset2( descc, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1648 *
1649 * Set default values for distributed matrix X
1650 *
1651  x( 1 ) = one
1652  x( 2 ) = one
1653  ix = 1
1654  jx = 1
1655  CALL pb_descset2( descx, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1656  incx = 1
1657 *
1658 * Set default values for distributed matrix Y
1659 *
1660  y( 1 ) = one
1661  y( 2 ) = one
1662  iy = 1
1663  jy = 1
1664  CALL pb_descset2( descy, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1665  incy = 1
1666 *
1667  RETURN
1668 *
1669 * End of PSSETPBLAS
1670 *
1671  END
1672  SUBROUTINE pschkmat( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
1673  $ ARGPOS )
1675 * -- PBLAS test routine (version 2.0) --
1676 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1677 * and University of California, Berkeley.
1678 * April 1, 1998
1679 *
1680 * .. Scalar Arguments ..
1681  CHARACTER*1 ARGNAM
1682  INTEGER ARGPOS, ICTXT, NOUT, SCODE
1683 * ..
1684 * .. Array Arguments ..
1685  CHARACTER*(*) SNAME
1686 * ..
1687 * .. Subroutine Arguments ..
1688  EXTERNAL subptr
1689 * ..
1690 *
1691 * Purpose
1692 * =======
1693 *
1694 * PSCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine.
1695 *
1696 * Notes
1697 * =====
1698 *
1699 * A description vector is associated with each 2D block-cyclicly dis-
1700 * tributed matrix. This vector stores the information required to
1701 * establish the mapping between a matrix entry and its corresponding
1702 * process and memory location.
1703 *
1704 * In the following comments, the character _ should be read as
1705 * "of the distributed matrix". Let A be a generic term for any 2D
1706 * block cyclicly distributed matrix. Its description vector is DESCA:
1707 *
1708 * NOTATION STORED IN EXPLANATION
1709 * ---------------- --------------- ------------------------------------
1710 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1711 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1712 * the NPROW x NPCOL BLACS process grid
1713 * A is distributed over. The context
1714 * itself is global, but the handle
1715 * (the integer value) may vary.
1716 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1717 * ted matrix A, M_A >= 0.
1718 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1719 * buted matrix A, N_A >= 0.
1720 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1721 * block of the matrix A, IMB_A > 0.
1722 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1723 * left block of the matrix A,
1724 * INB_A > 0.
1725 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1726 * bute the last M_A-IMB_A rows of A,
1727 * MB_A > 0.
1728 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1729 * bute the last N_A-INB_A columns of
1730 * A, NB_A > 0.
1731 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1732 * row of the matrix A is distributed,
1733 * NPROW > RSRC_A >= 0.
1734 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1735 * first column of A is distributed.
1736 * NPCOL > CSRC_A >= 0.
1737 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1738 * array storing the local blocks of
1739 * the distributed matrix A,
1740 * IF( Lc( 1, N_A ) > 0 )
1741 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1742 * ELSE
1743 * LLD_A >= 1.
1744 *
1745 * Let K be the number of rows of a matrix A starting at the global in-
1746 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1747 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1748 * receive if these K rows were distributed over NPROW processes. If K
1749 * is the number of columns of a matrix A starting at the global index
1750 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1751 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1752 * these K columns were distributed over NPCOL processes.
1753 *
1754 * The values of Lr() and Lc() may be determined via a call to the func-
1755 * tion PB_NUMROC:
1756 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1757 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1758 *
1759 * Arguments
1760 * =========
1761 *
1762 * ICTXT (local input) INTEGER
1763 * On entry, ICTXT specifies the BLACS context handle, indica-
1764 * ting the global context of the operation. The context itself
1765 * is global, but the value of ICTXT is local.
1766 *
1767 * NOUT (global input) INTEGER
1768 * On entry, NOUT specifies the unit number for the output file.
1769 * When NOUT is 6, output to screen, when NOUT is 0, output to
1770 * stderr. NOUT is only defined for process 0.
1771 *
1772 * SUBPTR (global input) SUBROUTINE
1773 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1774 * EXTERNAL in the calling subroutine.
1775 *
1776 * SCODE (global input) INTEGER
1777 * On entry, SCODE specifies the calling sequence code.
1778 *
1779 * SNAME (global input) CHARACTER*(*)
1780 * On entry, SNAME specifies the subroutine name calling this
1781 * subprogram.
1782 *
1783 * ARGNAM (global input) CHARACTER*(*)
1784 * On entry, ARGNAM specifies the name of the matrix or vector
1785 * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one
1786 * wants to check a matrix, and 'X' or 'Y' for a vector.
1787 *
1788 * ARGPOS (global input) INTEGER
1789 * On entry, ARGPOS indicates the position of the first argument
1790 * of the matrix (or vector) ARGNAM.
1791 *
1792 * -- Written on April 1, 1998 by
1793 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1794 *
1795 * =====================================================================
1796 *
1797 * .. Parameters ..
1798  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1799  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1800  $ RSRC_
1801  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1802  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1803  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1804  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1805  INTEGER DESCMULT
1806  PARAMETER ( DESCMULT = 100 )
1807 * ..
1808 * .. Local Scalars ..
1809  INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL
1810 * ..
1811 * .. External Subroutines ..
1812  EXTERNAL blacs_gridinfo, pchkpbe, pscallsub, pssetpblas
1813 * ..
1814 * .. External Functions ..
1815  LOGICAL LSAME
1816  EXTERNAL LSAME
1817 * ..
1818 * .. Common Blocks ..
1819  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1820  $ JC, JX, JY
1821  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1822  $ descx( dlen_ ), descy( dlen_ )
1823  COMMON /pblasd/desca, descb, descc, descx, descy
1824  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1825  $ ja, jb, jc, jx, jy
1826 * ..
1827 * .. Executable Statements ..
1828 *
1829  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1830 *
1831  IF( lsame( argnam, 'A' ) ) THEN
1832 *
1833 * Check IA. Set all other OK, bad IA
1834 *
1835  CALL pssetpblas( ictxt )
1836  ia = -1
1837  infot = argpos + 1
1838  CALL pscallsub( subptr, scode )
1839  CALL pchkpbe( ictxt, nout, sname, infot )
1840 *
1841 * Check JA. Set all other OK, bad JA
1842 *
1843  CALL pssetpblas( ictxt )
1844  ja = -1
1845  infot = argpos + 2
1846  CALL pscallsub( subptr, scode )
1847  CALL pchkpbe( ictxt, nout, sname, infot )
1848 *
1849 * Check DESCA. Set all other OK, bad DESCA
1850 *
1851  DO 10 i = 1, dlen_
1852 *
1853 * Set I'th entry of DESCA to incorrect value, rest ok.
1854 *
1855  CALL pssetpblas( ictxt )
1856  desca( i ) = -2
1857  infot = ( ( argpos + 3 ) * descmult ) + i
1858  CALL pscallsub( subptr, scode )
1859  CALL pchkpbe( ictxt, nout, sname, infot )
1860 *
1861 * Extra tests for RSRCA, CSRCA, LDA
1862 *
1863  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1864  $ ( i.EQ.lld_ ) ) THEN
1865 *
1866  CALL pssetpblas( ictxt )
1867 *
1868 * Test RSRCA >= NPROW
1869 *
1870  IF( i.EQ.rsrc_ )
1871  $ desca( i ) = nprow
1872 *
1873 * Test CSRCA >= NPCOL
1874 *
1875  IF( i.EQ.csrc_ )
1876  $ desca( i ) = npcol
1877 *
1878 * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1879 *
1880  IF( i.EQ.lld_ ) THEN
1881  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1882  desca( i ) = 1
1883  ELSE
1884  desca( i ) = 0
1885  END IF
1886  END IF
1887 *
1888  infot = ( ( argpos + 3 ) * descmult ) + i
1889  CALL pscallsub( subptr, scode )
1890  CALL pchkpbe( ictxt, nout, sname, infot )
1891 *
1892  END IF
1893 *
1894  10 CONTINUE
1895 *
1896  ELSE IF( lsame( argnam, 'B' ) ) THEN
1897 *
1898 * Check IB. Set all other OK, bad IB
1899 *
1900  CALL pssetpblas( ictxt )
1901  ib = -1
1902  infot = argpos + 1
1903  CALL pscallsub( subptr, scode )
1904  CALL pchkpbe( ictxt, nout, sname, infot )
1905 *
1906 * Check JB. Set all other OK, bad JB
1907 *
1908  CALL pssetpblas( ictxt )
1909  jb = -1
1910  infot = argpos + 2
1911  CALL pscallsub( subptr, scode )
1912  CALL pchkpbe( ictxt, nout, sname, infot )
1913 *
1914 * Check DESCB. Set all other OK, bad DESCB
1915 *
1916  DO 20 i = 1, dlen_
1917 *
1918 * Set I'th entry of DESCB to incorrect value, rest ok.
1919 *
1920  CALL pssetpblas( ictxt )
1921  descb( i ) = -2
1922  infot = ( ( argpos + 3 ) * descmult ) + i
1923  CALL pscallsub( subptr, scode )
1924  CALL pchkpbe( ictxt, nout, sname, infot )
1925 *
1926 * Extra tests for RSRCB, CSRCB, LDB
1927 *
1928  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1929  $ ( i.EQ.lld_ ) ) THEN
1930 *
1931  CALL pssetpblas( ictxt )
1932 *
1933 * Test RSRCB >= NPROW
1934 *
1935  IF( i.EQ.rsrc_ )
1936  $ descb( i ) = nprow
1937 *
1938 * Test CSRCB >= NPCOL
1939 *
1940  IF( i.EQ.csrc_ )
1941  $ descb( i ) = npcol
1942 *
1943 * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1944 *
1945  IF( i.EQ.lld_ ) THEN
1946  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1947  descb( i ) = 1
1948  ELSE
1949  descb( i ) = 0
1950  END IF
1951  END IF
1952 *
1953  infot = ( ( argpos + 3 ) * descmult ) + i
1954  CALL pscallsub( subptr, scode )
1955  CALL pchkpbe( ictxt, nout, sname, infot )
1956 *
1957  END IF
1958 *
1959  20 CONTINUE
1960 *
1961  ELSE IF( lsame( argnam, 'C' ) ) THEN
1962 *
1963 * Check IC. Set all other OK, bad IC
1964 *
1965  CALL pssetpblas( ictxt )
1966  ic = -1
1967  infot = argpos + 1
1968  CALL pscallsub( subptr, scode )
1969  CALL pchkpbe( ictxt, nout, sname, infot )
1970 *
1971 * Check JC. Set all other OK, bad JC
1972 *
1973  CALL pssetpblas( ictxt )
1974  jc = -1
1975  infot = argpos + 2
1976  CALL pscallsub( subptr, scode )
1977  CALL pchkpbe( ictxt, nout, sname, infot )
1978 *
1979 * Check DESCC. Set all other OK, bad DESCC
1980 *
1981  DO 30 i = 1, dlen_
1982 *
1983 * Set I'th entry of DESCC to incorrect value, rest ok.
1984 *
1985  CALL pssetpblas( ictxt )
1986  descc( i ) = -2
1987  infot = ( ( argpos + 3 ) * descmult ) + i
1988  CALL pscallsub( subptr, scode )
1989  CALL pchkpbe( ictxt, nout, sname, infot )
1990 *
1991 * Extra tests for RSRCC, CSRCC, LDC
1992 *
1993  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1994  $ ( i.EQ.lld_ ) ) THEN
1995 *
1996  CALL pssetpblas( ictxt )
1997 *
1998 * Test RSRCC >= NPROW
1999 *
2000  IF( i.EQ.rsrc_ )
2001  $ descc( i ) = nprow
2002 *
2003 * Test CSRCC >= NPCOL
2004 *
2005  IF( i.EQ.csrc_ )
2006  $ descc( i ) = npcol
2007 *
2008 * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2009 *
2010  IF( i.EQ.lld_ ) THEN
2011  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2012  descc( i ) = 1
2013  ELSE
2014  descc( i ) = 0
2015  END IF
2016  END IF
2017 *
2018  infot = ( ( argpos + 3 ) * descmult ) + i
2019  CALL pscallsub( subptr, scode )
2020  CALL pchkpbe( ictxt, nout, sname, infot )
2021 *
2022  END IF
2023 *
2024  30 CONTINUE
2025 *
2026  ELSE IF( lsame( argnam, 'X' ) ) THEN
2027 *
2028 * Check IX. Set all other OK, bad IX
2029 *
2030  CALL pssetpblas( ictxt )
2031  ix = -1
2032  infot = argpos + 1
2033  CALL pscallsub( subptr, scode )
2034  CALL pchkpbe( ictxt, nout, sname, infot )
2035 *
2036 * Check JX. Set all other OK, bad JX
2037 *
2038  CALL pssetpblas( ictxt )
2039  jx = -1
2040  infot = argpos + 2
2041  CALL pscallsub( subptr, scode )
2042  CALL pchkpbe( ictxt, nout, sname, infot )
2043 *
2044 * Check DESCX. Set all other OK, bad DESCX
2045 *
2046  DO 40 i = 1, dlen_
2047 *
2048 * Set I'th entry of DESCX to incorrect value, rest ok.
2049 *
2050  CALL pssetpblas( ictxt )
2051  descx( i ) = -2
2052  infot = ( ( argpos + 3 ) * descmult ) + i
2053  CALL pscallsub( subptr, scode )
2054  CALL pchkpbe( ictxt, nout, sname, infot )
2055 *
2056 * Extra tests for RSRCX, CSRCX, LDX
2057 *
2058  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2059  $ ( i.EQ.lld_ ) ) THEN
2060 *
2061  CALL pssetpblas( ictxt )
2062 *
2063 * Test RSRCX >= NPROW
2064 *
2065  IF( i.EQ.rsrc_ )
2066  $ descx( i ) = nprow
2067 *
2068 * Test CSRCX >= NPCOL
2069 *
2070  IF( i.EQ.csrc_ )
2071  $ descx( i ) = npcol
2072 *
2073 * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2074 *
2075  IF( i.EQ.lld_ ) THEN
2076  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2077  descx( i ) = 1
2078  ELSE
2079  descx( i ) = 0
2080  END IF
2081  END IF
2082 *
2083  infot = ( ( argpos + 3 ) * descmult ) + i
2084  CALL pscallsub( subptr, scode )
2085  CALL pchkpbe( ictxt, nout, sname, infot )
2086 *
2087  END IF
2088 *
2089  40 CONTINUE
2090 *
2091 * Check INCX. Set all other OK, bad INCX
2092 *
2093  CALL pssetpblas( ictxt )
2094  incx = -1
2095  infot = argpos + 4
2096  CALL pscallsub( subptr, scode )
2097  CALL pchkpbe( ictxt, nout, sname, infot )
2098 *
2099  ELSE
2100 *
2101 * Check IY. Set all other OK, bad IY
2102 *
2103  CALL pssetpblas( ictxt )
2104  iy = -1
2105  infot = argpos + 1
2106  CALL pscallsub( subptr, scode )
2107  CALL pchkpbe( ictxt, nout, sname, infot )
2108 *
2109 * Check JY. Set all other OK, bad JY
2110 *
2111  CALL pssetpblas( ictxt )
2112  jy = -1
2113  infot = argpos + 2
2114  CALL pscallsub( subptr, scode )
2115  CALL pchkpbe( ictxt, nout, sname, infot )
2116 *
2117 * Check DESCY. Set all other OK, bad DESCY
2118 *
2119  DO 50 i = 1, dlen_
2120 *
2121 * Set I'th entry of DESCY to incorrect value, rest ok.
2122 *
2123  CALL pssetpblas( ictxt )
2124  descy( i ) = -2
2125  infot = ( ( argpos + 3 ) * descmult ) + i
2126  CALL pscallsub( subptr, scode )
2127  CALL pchkpbe( ictxt, nout, sname, infot )
2128 *
2129 * Extra tests for RSRCY, CSRCY, LDY
2130 *
2131  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2132  $ ( i.EQ.lld_ ) ) THEN
2133 *
2134  CALL pssetpblas( ictxt )
2135 *
2136 * Test RSRCY >= NPROW
2137 *
2138  IF( i.EQ.rsrc_ )
2139  $ descy( i ) = nprow
2140 *
2141 * Test CSRCY >= NPCOL
2142 *
2143  IF( i.EQ.csrc_ )
2144  $ descy( i ) = npcol
2145 *
2146 * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2147 *
2148  IF( i.EQ.lld_ ) THEN
2149  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2150  descy( i ) = 1
2151  ELSE
2152  descy( i ) = 0
2153  END IF
2154  END IF
2155 *
2156  infot = ( ( argpos + 3 ) * descmult ) + i
2157  CALL pscallsub( subptr, scode )
2158  CALL pchkpbe( ictxt, nout, sname, infot )
2159 *
2160  END IF
2161 *
2162  50 CONTINUE
2163 *
2164 * Check INCY. Set all other OK, bad INCY
2165 *
2166  CALL pssetpblas( ictxt )
2167  incy = -1
2168  infot = argpos + 4
2169  CALL pscallsub( subptr, scode )
2170  CALL pchkpbe( ictxt, nout, sname, infot )
2171 *
2172  END IF
2173 *
2174  RETURN
2175 *
2176 * End of PSCHKMAT
2177 *
2178  END
2179  SUBROUTINE pscallsub( SUBPTR, SCODE )
2181 * -- PBLAS test routine (version 2.0) --
2182 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2183 * and University of California, Berkeley.
2184 * April 1, 1998
2185 *
2186 * .. Scalar Arguments ..
2187  INTEGER SCODE
2188 * ..
2189 * .. Subroutine Arguments ..
2190  EXTERNAL subptr
2191 * ..
2192 *
2193 * Purpose
2194 * =======
2195 *
2196 * PSCALLSUB calls the subroutine SUBPTR with the calling sequence iden-
2197 * tified by SCODE.
2198 *
2199 * Notes
2200 * =====
2201 *
2202 * A description vector is associated with each 2D block-cyclicly dis-
2203 * tributed matrix. This vector stores the information required to
2204 * establish the mapping between a matrix entry and its corresponding
2205 * process and memory location.
2206 *
2207 * In the following comments, the character _ should be read as
2208 * "of the distributed matrix". Let A be a generic term for any 2D
2209 * block cyclicly distributed matrix. Its description vector is DESCA:
2210 *
2211 * NOTATION STORED IN EXPLANATION
2212 * ---------------- --------------- ------------------------------------
2213 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2214 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2215 * the NPROW x NPCOL BLACS process grid
2216 * A is distributed over. The context
2217 * itself is global, but the handle
2218 * (the integer value) may vary.
2219 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2220 * ted matrix A, M_A >= 0.
2221 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2222 * buted matrix A, N_A >= 0.
2223 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2224 * block of the matrix A, IMB_A > 0.
2225 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2226 * left block of the matrix A,
2227 * INB_A > 0.
2228 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2229 * bute the last M_A-IMB_A rows of A,
2230 * MB_A > 0.
2231 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2232 * bute the last N_A-INB_A columns of
2233 * A, NB_A > 0.
2234 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2235 * row of the matrix A is distributed,
2236 * NPROW > RSRC_A >= 0.
2237 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2238 * first column of A is distributed.
2239 * NPCOL > CSRC_A >= 0.
2240 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2241 * array storing the local blocks of
2242 * the distributed matrix A,
2243 * IF( Lc( 1, N_A ) > 0 )
2244 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2245 * ELSE
2246 * LLD_A >= 1.
2247 *
2248 * Let K be the number of rows of a matrix A starting at the global in-
2249 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2250 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2251 * receive if these K rows were distributed over NPROW processes. If K
2252 * is the number of columns of a matrix A starting at the global index
2253 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2254 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2255 * these K columns were distributed over NPCOL processes.
2256 *
2257 * The values of Lr() and Lc() may be determined via a call to the func-
2258 * tion PB_NUMROC:
2259 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2260 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2261 *
2262 * Arguments
2263 * =========
2264 *
2265 * SUBPTR (global input) SUBROUTINE
2266 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
2267 * EXTERNAL in the calling subroutine.
2268 *
2269 * SCODE (global input) INTEGER
2270 * On entry, SCODE specifies the calling sequence code.
2271 *
2272 * Calling sequence encodings
2273 * ==========================
2274 *
2275 * code Formal argument list Examples
2276 *
2277 * 11 (n, v1,v2) _SWAP, _COPY
2278 * 12 (n,s1, v1 ) _SCAL, _SCAL
2279 * 13 (n,s1, v1,v2) _AXPY, _DOT_
2280 * 14 (n,s1,i1,v1 ) _AMAX
2281 * 15 (n,u1, v1 ) _ASUM, _NRM2
2282 *
2283 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
2284 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
2285 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
2286 * 24 ( m,n,s1,v1,v2,m1) _GER_
2287 * 25 (uplo, n,s1,v1, m1) _SYR
2288 * 26 (uplo, n,u1,v1, m1) _HER
2289 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
2290 *
2291 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
2292 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
2293 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
2294 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
2295 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
2296 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
2297 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
2298 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
2299 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
2300 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
2301 *
2302 * -- Written on April 1, 1998 by
2303 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2304 *
2305 * =====================================================================
2306 *
2307 * .. Parameters ..
2308  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2309  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2310  $ RSRC_
2311  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
2312  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2313  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2314  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2315 * ..
2316 * .. Common Blocks ..
2317  CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
2318  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
2319  $ JC, JX, JY, KDIM, MDIM, NDIM
2320  REAL USCLR, SCLR
2321  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
2322  $ DESCX( DLEN_ ), DESCY( DLEN_ )
2323  REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
2324  COMMON /pblasc/diag, side, transa, transb, uplo
2325  COMMON /pblasd/desca, descb, descc, descx, descy
2326  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
2327  $ ja, jb, jc, jx, jy
2328  COMMON /pblasm/a, b, c
2329  COMMON /pblasn/kdim, mdim, ndim
2330  COMMON /pblass/sclr, usclr
2331  COMMON /pblasv/x, y
2332 * ..
2333 * .. Executable Statements ..
2334 *
2335 * Level 1 PBLAS
2336 *
2337  IF( scode.EQ.11 ) THEN
2338 *
2339  CALL subptr( ndim, x, ix, jx, descx, incx, y, iy, jy, descy,
2340  $ incy )
2341 *
2342  ELSE IF( scode.EQ.12 ) THEN
2343 *
2344  CALL subptr( ndim, sclr, x, ix, jx, descx, incx )
2345 *
2346  ELSE IF( scode.EQ.13 ) THEN
2347 *
2348  CALL subptr( ndim, sclr, x, ix, jx, descx, incx, y, iy, jy,
2349  $ descy, incy )
2350 *
2351  ELSE IF( scode.EQ.14 ) THEN
2352 *
2353  CALL subptr( ndim, sclr, isclr, x, ix, jx, descx, incx )
2354 *
2355  ELSE IF( scode.EQ.15 ) THEN
2356 *
2357  CALL subptr( ndim, usclr, x, ix, jx, descx, incx )
2358 *
2359 * Level 2 PBLAS
2360 *
2361  ELSE IF( scode.EQ.21 ) THEN
2362 *
2363  CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, x, ix,
2364  $ jx, descx, incx, sclr, y, iy, jy, descy, incy )
2365 *
2366  ELSE IF( scode.EQ.22 ) THEN
2367 *
2368  CALL subptr( uplo, ndim, sclr, a, ia, ja, desca, x, ix, jx,
2369  $ descx, incx, sclr, y, iy, jy, descy, incy )
2370 *
2371  ELSE IF( scode.EQ.23 ) THEN
2372 *
2373  CALL subptr( uplo, transa, diag, ndim, a, ia, ja, desca, x, ix,
2374  $ jx, descx, incx )
2375 *
2376  ELSE IF( scode.EQ.24 ) THEN
2377 *
2378  CALL subptr( mdim, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2379  $ jy, descy, incy, a, ia, ja, desca )
2380 *
2381  ELSE IF( scode.EQ.25 ) THEN
2382 *
2383  CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, a, ia,
2384  $ ja, desca )
2385 *
2386  ELSE IF( scode.EQ.26 ) THEN
2387 *
2388  CALL subptr( uplo, ndim, usclr, x, ix, jx, descx, incx, a, ia,
2389  $ ja, desca )
2390 *
2391  ELSE IF( scode.EQ.27 ) THEN
2392 *
2393  CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2394  $ jy, descy, incy, a, ia, ja, desca )
2395 *
2396 * Level 3 PBLAS
2397 *
2398  ELSE IF( scode.EQ.31 ) THEN
2399 *
2400  CALL subptr( transa, transb, mdim, ndim, kdim, sclr, a, ia, ja,
2401  $ desca, b, ib, jb, descb, sclr, c, ic, jc, descc )
2402 *
2403  ELSE IF( scode.EQ.32 ) THEN
2404 *
2405  CALL subptr( side, uplo, mdim, ndim, sclr, a, ia, ja, desca, b,
2406  $ ib, jb, descb, sclr, c, ic, jc, descc )
2407 *
2408  ELSE IF( scode.EQ.33 ) THEN
2409 *
2410  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2411  $ sclr, c, ic, jc, descc )
2412 *
2413  ELSE IF( scode.EQ.34 ) THEN
2414 *
2415  CALL subptr( uplo, transa, ndim, kdim, usclr, a, ia, ja, desca,
2416  $ usclr, c, ic, jc, descc )
2417 *
2418  ELSE IF( scode.EQ.35 ) THEN
2419 *
2420  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2421  $ b, ib, jb, descb, sclr, c, ic, jc, descc )
2422 *
2423  ELSE IF( scode.EQ.36 ) THEN
2424 *
2425  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2426  $ b, ib, jb, descb, usclr, c, ic, jc, descc )
2427 *
2428  ELSE IF( scode.EQ.37 ) THEN
2429 *
2430  CALL subptr( mdim, ndim, sclr, a, ia, ja, desca, sclr, c, ic,
2431  $ jc, descc )
2432 *
2433  ELSE IF( scode.EQ.38 ) THEN
2434 *
2435  CALL subptr( side, uplo, transa, diag, mdim, ndim, sclr, a, ia,
2436  $ ja, desca, b, ib, jb, descb )
2437 *
2438  ELSE IF( scode.EQ.39 ) THEN
2439 *
2440  CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, sclr,
2441  $ c, ic, jc, descc )
2442 *
2443  ELSE IF( scode.EQ.40 ) THEN
2444 *
2445  CALL subptr( uplo, transa, mdim, ndim, sclr, a, ia, ja, desca,
2446  $ sclr, c, ic, jc, descc )
2447 *
2448  END IF
2449 *
2450  RETURN
2451 *
2452 * End of PSCALLSUB
2453 *
2454  END
2455  SUBROUTINE pserrset( ERR, ERRMAX, XTRUE, X )
2457 * -- PBLAS test routine (version 2.0) --
2458 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2459 * and University of California, Berkeley.
2460 * April 1, 1998
2461 *
2462 * .. Scalar Arguments ..
2463  REAL ERR, ERRMAX, X, XTRUE
2464 * ..
2465 *
2466 * Purpose
2467 * =======
2468 *
2469 * PSERRSET computes the absolute difference ERR = |XTRUE - X| and com-
2470 * pares it with zero. ERRMAX accumulates the absolute error difference.
2471 *
2472 * Notes
2473 * =====
2474 *
2475 * A description vector is associated with each 2D block-cyclicly dis-
2476 * tributed matrix. This vector stores the information required to
2477 * establish the mapping between a matrix entry and its corresponding
2478 * process and memory location.
2479 *
2480 * In the following comments, the character _ should be read as
2481 * "of the distributed matrix". Let A be a generic term for any 2D
2482 * block cyclicly distributed matrix. Its description vector is DESCA:
2483 *
2484 * NOTATION STORED IN EXPLANATION
2485 * ---------------- --------------- ------------------------------------
2486 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2487 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2488 * the NPROW x NPCOL BLACS process grid
2489 * A is distributed over. The context
2490 * itself is global, but the handle
2491 * (the integer value) may vary.
2492 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2493 * ted matrix A, M_A >= 0.
2494 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2495 * buted matrix A, N_A >= 0.
2496 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2497 * block of the matrix A, IMB_A > 0.
2498 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2499 * left block of the matrix A,
2500 * INB_A > 0.
2501 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2502 * bute the last M_A-IMB_A rows of A,
2503 * MB_A > 0.
2504 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2505 * bute the last N_A-INB_A columns of
2506 * A, NB_A > 0.
2507 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2508 * row of the matrix A is distributed,
2509 * NPROW > RSRC_A >= 0.
2510 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2511 * first column of A is distributed.
2512 * NPCOL > CSRC_A >= 0.
2513 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2514 * array storing the local blocks of
2515 * the distributed matrix A,
2516 * IF( Lc( 1, N_A ) > 0 )
2517 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2518 * ELSE
2519 * LLD_A >= 1.
2520 *
2521 * Let K be the number of rows of a matrix A starting at the global in-
2522 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2523 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2524 * receive if these K rows were distributed over NPROW processes. If K
2525 * is the number of columns of a matrix A starting at the global index
2526 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2527 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2528 * these K columns were distributed over NPCOL processes.
2529 *
2530 * The values of Lr() and Lc() may be determined via a call to the func-
2531 * tion PB_NUMROC:
2532 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2533 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2534 *
2535 * Arguments
2536 * =========
2537 *
2538 * ERR (local output) REAL
2539 * On exit, ERR specifies the absolute difference |XTRUE - X|.
2540 *
2541 * ERRMAX (local input/local output) REAL
2542 * On entry, ERRMAX specifies a previously computed error. On
2543 * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ).
2544 *
2545 * XTRUE (local input) REAL
2546 * On entry, XTRUE specifies the true value.
2547 *
2548 * X (local input) REAL
2549 * On entry, X specifies the value to be compared to XTRUE.
2550 *
2551 * -- Written on April 1, 1998 by
2552 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2553 *
2554 * =====================================================================
2555 *
2556 * .. External Functions ..
2557  REAL PSDIFF
2558  EXTERNAL PSDIFF
2559 * ..
2560 * .. Intrinsic Functions ..
2561  INTRINSIC abs, max
2562 * ..
2563 * .. Executable Statements ..
2564 *
2565  err = abs( psdiff( xtrue, x ) )
2566 *
2567  errmax = max( errmax, err )
2568 *
2569  RETURN
2570 *
2571 * End of PSERRSET
2572 *
2573  END
2574  SUBROUTINE pschkvin( ERRMAX, N, X, PX, IX, JX, DESCX, INCX,
2575  $ INFO )
2577 * -- PBLAS test routine (version 2.0) --
2578 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2579 * and University of California, Berkeley.
2580 * April 1, 1998
2581 *
2582 * .. Scalar Arguments ..
2583  INTEGER INCX, INFO, IX, JX, N
2584  REAL ERRMAX
2585 * ..
2586 * .. Array Arguments ..
2587  INTEGER DESCX( * )
2588  REAL PX( * ), X( * )
2589 * ..
2590 *
2591 * Purpose
2592 * =======
2593 *
2594 * PSCHKVIN checks that the submatrix sub( PX ) remained unchanged. The
2595 * local array entries are compared element by element, and their dif-
2596 * ference is tested against 0.0 as well as the epsilon machine. Notice
2597 * that this difference should be numerically exactly the zero machine,
2598 * but because of the possible fluctuation of some of the data we flag-
2599 * ged differently a difference less than twice the epsilon machine. The
2600 * largest error is also returned.
2601 *
2602 * Notes
2603 * =====
2604 *
2605 * A description vector is associated with each 2D block-cyclicly dis-
2606 * tributed matrix. This vector stores the information required to
2607 * establish the mapping between a matrix entry and its corresponding
2608 * process and memory location.
2609 *
2610 * In the following comments, the character _ should be read as
2611 * "of the distributed matrix". Let A be a generic term for any 2D
2612 * block cyclicly distributed matrix. Its description vector is DESCA:
2613 *
2614 * NOTATION STORED IN EXPLANATION
2615 * ---------------- --------------- ------------------------------------
2616 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2617 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2618 * the NPROW x NPCOL BLACS process grid
2619 * A is distributed over. The context
2620 * itself is global, but the handle
2621 * (the integer value) may vary.
2622 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2623 * ted matrix A, M_A >= 0.
2624 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2625 * buted matrix A, N_A >= 0.
2626 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2627 * block of the matrix A, IMB_A > 0.
2628 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2629 * left block of the matrix A,
2630 * INB_A > 0.
2631 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2632 * bute the last M_A-IMB_A rows of A,
2633 * MB_A > 0.
2634 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2635 * bute the last N_A-INB_A columns of
2636 * A, NB_A > 0.
2637 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2638 * row of the matrix A is distributed,
2639 * NPROW > RSRC_A >= 0.
2640 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2641 * first column of A is distributed.
2642 * NPCOL > CSRC_A >= 0.
2643 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2644 * array storing the local blocks of
2645 * the distributed matrix A,
2646 * IF( Lc( 1, N_A ) > 0 )
2647 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2648 * ELSE
2649 * LLD_A >= 1.
2650 *
2651 * Let K be the number of rows of a matrix A starting at the global in-
2652 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2653 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2654 * receive if these K rows were distributed over NPROW processes. If K
2655 * is the number of columns of a matrix A starting at the global index
2656 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2657 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2658 * these K columns were distributed over NPCOL processes.
2659 *
2660 * The values of Lr() and Lc() may be determined via a call to the func-
2661 * tion PB_NUMROC:
2662 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2663 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2664 *
2665 * Arguments
2666 * =========
2667 *
2668 * ERRMAX (global output) REAL
2669 * On exit, ERRMAX specifies the largest absolute element-wise
2670 * difference between sub( X ) and sub( PX ).
2671 *
2672 * N (global input) INTEGER
2673 * On entry, N specifies the length of the subvector operand
2674 * sub( X ). N must be at least zero.
2675 *
2676 * X (local input) REAL array
2677 * On entry, X is an array of dimension (DESCX( M_ ),*). This
2678 * array contains a local copy of the initial entire matrix PX.
2679 *
2680 * PX (local input) REAL array
2681 * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2682 * array contains the local entries of the matrix PX.
2683 *
2684 * IX (global input) INTEGER
2685 * On entry, IX specifies X's global row index, which points to
2686 * the beginning of the submatrix sub( X ).
2687 *
2688 * JX (global input) INTEGER
2689 * On entry, JX specifies X's global column index, which points
2690 * to the beginning of the submatrix sub( X ).
2691 *
2692 * DESCX (global and local input) INTEGER array
2693 * On entry, DESCX is an integer array of dimension DLEN_. This
2694 * is the array descriptor for the matrix X.
2695 *
2696 * INCX (global input) INTEGER
2697 * On entry, INCX specifies the global increment for the
2698 * elements of X. Only two values of INCX are supported in
2699 * this version, namely 1 and M_X. INCX must not be zero.
2700 *
2701 * INFO (global output) INTEGER
2702 * On exit, if INFO = 0, no error has been found,
2703 * If INFO > 0, the maximum abolute error found is in (0,eps],
2704 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
2705 *
2706 * -- Written on April 1, 1998 by
2707 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2708 *
2709 * =====================================================================
2710 *
2711 * .. Parameters ..
2712  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2713  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2714  $ RSRC_
2715  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
2716  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2717  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2718  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2719  REAL ZERO
2720  PARAMETER ( ZERO = 0.0e+0 )
2721 * ..
2722 * .. Local Scalars ..
2723  LOGICAL COLREP, ROWREP
2724  INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL,
2725  $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL,
2726  $ MYCOL, MYROW, NPCOL, NPROW
2727  REAL ERR, EPS
2728 * ..
2729 * .. External Subroutines ..
2730  EXTERNAL blacs_gridinfo, pb_infog2l, pserrset, sgamx2d
2731 * ..
2732 * .. External Functions ..
2733  REAL PSLAMCH
2734  EXTERNAL pslamch
2735 * ..
2736 * .. Intrinsic Functions ..
2737  INTRINSIC abs, max, min, mod
2738 * ..
2739 * .. Executable Statements ..
2740 *
2741  info = 0
2742  errmax = zero
2743 *
2744 * Quick return if possible
2745 *
2746  IF( n.LE.0 )
2747  $ RETURN
2748 *
2749  ictxt = descx( ctxt_ )
2750  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
2751 *
2752  eps = pslamch( ictxt, 'eps' )
2753 *
2754  CALL pb_infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix,
2755  $ jjx, ixrow, ixcol )
2756 *
2757  ldx = descx( m_ )
2758  ldpx = descx( lld_ )
2759  rowrep = ( ixrow.EQ.-1 )
2760  colrep = ( ixcol.EQ.-1 )
2761 *
2762  IF( n.EQ.1 ) THEN
2763 *
2764  IF( ( myrow.EQ.ixrow .OR. rowrep ) .AND.
2765  $ ( mycol.EQ.ixcol .OR. colrep ) )
2766  $ CALL pserrset( err, errmax, x( ix+(jx-1)*ldx ),
2767  $ px( iix+(jjx-1)*ldpx ) )
2768 *
2769  ELSE IF( incx.EQ.descx( m_ ) ) THEN
2770 *
2771 * sub( X ) is a row vector
2772 *
2773  jb = descx( inb_ ) - jx + 1
2774  IF( jb.LE.0 )
2775  $ jb = ( ( -jb ) / descx( nb_ ) + 1 ) * descx( nb_ ) + jb
2776  jb = min( jb, n )
2777  jn = jx + jb - 1
2778 *
2779  IF( myrow.EQ.ixrow .OR. rowrep ) THEN
2780 *
2781  icurcol = ixcol
2782  IF( mycol.EQ.icurcol .OR. colrep ) THEN
2783  DO 10 j = jx, jn
2784  CALL pserrset( err, errmax, x( ix+(j-1)*ldx ),
2785  $ px( iix+(jjx-1)*ldpx ) )
2786  jjx = jjx + 1
2787  10 CONTINUE
2788  END IF
2789  icurcol = mod( icurcol+1, npcol )
2790 *
2791  DO 30 j = jn+1, jx+n-1, descx( nb_ )
2792  jb = min( jx+n-j, descx( nb_ ) )
2793 *
2794  IF( mycol.EQ.icurcol .OR. colrep ) THEN
2795 *
2796  DO 20 kk = 0, jb-1
2797  CALL pserrset( err, errmax, x( ix+(j+kk-1)*ldx ),
2798  $ px( iix+(jjx+kk-1)*ldpx ) )
2799  20 CONTINUE
2800 *
2801  jjx = jjx + jb
2802 *
2803  END IF
2804 *
2805  icurcol = mod( icurcol+1, npcol )
2806 *
2807  30 CONTINUE
2808 *
2809  END IF
2810 *
2811  ELSE
2812 *
2813 * sub( X ) is a column vector
2814 *
2815  ib = descx( imb_ ) - ix + 1
2816  IF( ib.LE.0 )
2817  $ ib = ( ( -ib ) / descx( mb_ ) + 1 ) * descx( mb_ ) + ib
2818  ib = min( ib, n )
2819  in = ix + ib - 1
2820 *
2821  IF( mycol.EQ.ixcol .OR. colrep ) THEN
2822 *
2823  icurrow = ixrow
2824  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2825  DO 40 i = ix, in
2826  CALL pserrset( err, errmax, x( i+(jx-1)*ldx ),
2827  $ px( iix+(jjx-1)*ldpx ) )
2828  iix = iix + 1
2829  40 CONTINUE
2830  END IF
2831  icurrow = mod( icurrow+1, nprow )
2832 *
2833  DO 60 i = in+1, ix+n-1, descx( mb_ )
2834  ib = min( ix+n-i, descx( mb_ ) )
2835 *
2836  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2837 *
2838  DO 50 kk = 0, ib-1
2839  CALL pserrset( err, errmax, x( i+kk+(jx-1)*ldx ),
2840  $ px( iix+kk+(jjx-1)*ldpx ) )
2841  50 CONTINUE
2842 *
2843  iix = iix + ib
2844 *
2845  END IF
2846 *
2847  icurrow = mod( icurrow+1, nprow )
2848 *
2849  60 CONTINUE
2850 *
2851  END IF
2852 *
2853  END IF
2854 *
2855  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
2856  $ -1, -1 )
2857 *
2858  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
2859  info = 1
2860  ELSE IF( errmax.GT.eps ) THEN
2861  info = -1
2862  END IF
2863 *
2864  RETURN
2865 *
2866 * End of PSCHKVIN
2867 *
2868  END
2869  SUBROUTINE pschkvout( N, X, PX, IX, JX, DESCX, INCX, INFO )
2871 * -- PBLAS test routine (version 2.0) --
2872 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2873 * and University of California, Berkeley.
2874 * April 1, 1998
2875 *
2876 * .. Scalar Arguments ..
2877  INTEGER INCX, INFO, IX, JX, N
2878 * ..
2879 * .. Array Arguments ..
2880  INTEGER DESCX( * )
2881  REAL PX( * ), X( * )
2882 * ..
2883 *
2884 * Purpose
2885 * =======
2886 *
2887 * PSCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged.
2888 * The local array entries are compared element by element, and their
2889 * difference is tested against 0.0 as well as the epsilon machine. No-
2890 * tice that this difference should be numerically exactly the zero ma-
2891 * chine, but because of the possible movement of some of the data we
2892 * flagged differently a difference less than twice the epsilon machine.
2893 * The largest error is reported.
2894 *
2895 * Notes
2896 * =====
2897 *
2898 * A description vector is associated with each 2D block-cyclicly dis-
2899 * tributed matrix. This vector stores the information required to
2900 * establish the mapping between a matrix entry and its corresponding
2901 * process and memory location.
2902 *
2903 * In the following comments, the character _ should be read as
2904 * "of the distributed matrix". Let A be a generic term for any 2D
2905 * block cyclicly distributed matrix. Its description vector is DESCA:
2906 *
2907 * NOTATION STORED IN EXPLANATION
2908 * ---------------- --------------- ------------------------------------
2909 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2910 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2911 * the NPROW x NPCOL BLACS process grid
2912 * A is distributed over. The context
2913 * itself is global, but the handle
2914 * (the integer value) may vary.
2915 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2916 * ted matrix A, M_A >= 0.
2917 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2918 * buted matrix A, N_A >= 0.
2919 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2920 * block of the matrix A, IMB_A > 0.
2921 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2922 * left block of the matrix A,
2923 * INB_A > 0.
2924 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2925 * bute the last M_A-IMB_A rows of A,
2926 * MB_A > 0.
2927 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2928 * bute the last N_A-INB_A columns of
2929 * A, NB_A > 0.
2930 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2931 * row of the matrix A is distributed,
2932 * NPROW > RSRC_A >= 0.
2933 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2934 * first column of A is distributed.
2935 * NPCOL > CSRC_A >= 0.
2936 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2937 * array storing the local blocks of
2938 * the distributed matrix A,
2939 * IF( Lc( 1, N_A ) > 0 )
2940 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2941 * ELSE
2942 * LLD_A >= 1.
2943 *
2944 * Let K be the number of rows of a matrix A starting at the global in-
2945 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2946 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2947 * receive if these K rows were distributed over NPROW processes. If K
2948 * is the number of columns of a matrix A starting at the global index
2949 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2950 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2951 * these K columns were distributed over NPCOL processes.
2952 *
2953 * The values of Lr() and Lc() may be determined via a call to the func-
2954 * tion PB_NUMROC:
2955 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2956 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2957 *
2958 * Arguments
2959 * =========
2960 *
2961 * N (global input) INTEGER
2962 * On entry, N specifies the length of the subvector operand
2963 * sub( X ). N must be at least zero.
2964 *
2965 * X (local input) REAL array
2966 * On entry, X is an array of dimension (DESCX( M_ ),*). This
2967 * array contains a local copy of the initial entire matrix PX.
2968 *
2969 * PX (local input) REAL array
2970 * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2971 * array contains the local entries of the matrix PX.
2972 *
2973 * IX (global input) INTEGER
2974 * On entry, IX specifies X's global row index, which points to
2975 * the beginning of the submatrix sub( X ).
2976 *
2977 * JX (global input) INTEGER
2978 * On entry, JX specifies X's global column index, which points
2979 * to the beginning of the submatrix sub( X ).
2980 *
2981 * DESCX (global and local input) INTEGER array
2982 * On entry, DESCX is an integer array of dimension DLEN_. This
2983 * is the array descriptor for the matrix X.
2984 *
2985 * INCX (global input) INTEGER
2986 * On entry, INCX specifies the global increment for the
2987 * elements of X. Only two values of INCX are supported in
2988 * this version, namely 1 and M_X. INCX must not be zero.
2989 *
2990 * INFO (global output) INTEGER
2991 * On exit, if INFO = 0, no error has been found,
2992 * If INFO > 0, the maximum abolute error found is in (0,eps],
2993 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
2994 *
2995 * -- Written on April 1, 1998 by
2996 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2997 *
2998 * =====================================================================
2999 *
3000 * .. Parameters ..
3001  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3002  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3003  $ RSRC_
3004  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3005  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3006  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3007  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3008  REAL ZERO
3009  PARAMETER ( ZERO = 0.0e+0 )
3010 * ..
3011 * .. Local Scalars ..
3012  LOGICAL COLREP, ROWREP
3013  INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX,
3014  $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL,
3015  $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL,
3016  $ nprow, nqall
3017  REAL EPS, ERR, ERRMAX
3018 * ..
3019 * .. External Subroutines ..
3020  EXTERNAL BLACS_GRIDINFO, PSERRSET, SGAMX2D
3021 * ..
3022 * .. External Functions ..
3023  INTEGER PB_NUMROC
3024  REAL PSLAMCH
3025  EXTERNAL PSLAMCH, PB_NUMROC
3026 * ..
3027 * .. Intrinsic Functions ..
3028  INTRINSIC abs, max, min, mod
3029 * ..
3030 * .. Executable Statements ..
3031 *
3032  info = 0
3033  errmax = zero
3034 *
3035 * Quick return if possible
3036 *
3037  IF( ( descx( m_ ).LE.0 ).OR.( descx( n_ ).LE.0 ) )
3038  $ RETURN
3039 *
3040 * Start the operations
3041 *
3042  ictxt = descx( ctxt_ )
3043  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3044 *
3045  eps = pslamch( ictxt, 'eps' )
3046 *
3047  mpall = pb_numroc( descx( m_ ), 1, descx( imb_ ), descx( mb_ ),
3048  $ myrow, descx( rsrc_ ), nprow )
3049  nqall = pb_numroc( descx( n_ ), 1, descx( inb_ ), descx( nb_ ),
3050  $ mycol, descx( csrc_ ), npcol )
3051 *
3052  mbx = descx( mb_ )
3053  nbx = descx( nb_ )
3054  ldx = descx( m_ )
3055  ldpx = descx( lld_ )
3056  icurrow = descx( rsrc_ )
3057  icurcol = descx( csrc_ )
3058  rowrep = ( icurrow.EQ.-1 )
3059  colrep = ( icurcol.EQ.-1 )
3060  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3061  imbx = descx( imb_ )
3062  ELSE
3063  imbx = mbx
3064  END IF
3065  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3066  inbx = descx( inb_ )
3067  ELSE
3068  inbx = nbx
3069  END IF
3070  IF( rowrep ) THEN
3071  myrowdist = 0
3072  ELSE
3073  myrowdist = mod( myrow - icurrow + nprow, nprow )
3074  END IF
3075  IF( colrep ) THEN
3076  mycoldist = 0
3077  ELSE
3078  mycoldist = mod( mycol - icurcol + npcol, npcol )
3079  END IF
3080  ii = 1
3081  jj = 1
3082 *
3083  IF( incx.EQ.descx( m_ ) ) THEN
3084 *
3085 * sub( X ) is a row vector
3086 *
3087  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3088 *
3089  i = 1
3090  IF( mycoldist.EQ.0 ) THEN
3091  j = 1
3092  ELSE
3093  j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3094  END IF
3095  jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3096  ib = min( descx( m_ ), descx( imb_ ) )
3097 *
3098  DO 20 kk = 0, jb-1
3099  DO 10 ll = 0, ib-1
3100  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR. j+kk.GT.jx+n-1 )
3101  $ CALL pserrset( err, errmax,
3102  $ x( i+ll+(j+kk-1)*ldx ),
3103  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3104  10 CONTINUE
3105  20 CONTINUE
3106  IF( colrep ) THEN
3107  j = j + inbx
3108  ELSE
3109  j = j + inbx + ( npcol - 1 ) * nbx
3110  END IF
3111 *
3112  DO 50 jj = inbx+1, nqall, nbx
3113  jb = min( nqall-jj+1, nbx )
3114 *
3115  DO 40 kk = 0, jb-1
3116  DO 30 ll = 0, ib-1
3117  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3118  $ j+kk.GT.jx+n-1 )
3119  $ CALL pserrset( err, errmax,
3120  $ x( i+ll+(j+kk-1)*ldx ),
3121  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3122  30 CONTINUE
3123  40 CONTINUE
3124 *
3125  IF( colrep ) THEN
3126  j = j + nbx
3127  ELSE
3128  j = j + npcol * nbx
3129  END IF
3130 *
3131  50 CONTINUE
3132 *
3133  ii = ii + ib
3134 *
3135  END IF
3136 *
3137  icurrow = mod( icurrow + 1, nprow )
3138 *
3139  DO 110 i = descx( imb_ ) + 1, descx( m_ ), mbx
3140  ib = min( descx( m_ ) - i + 1, mbx )
3141 *
3142  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3143 *
3144  IF( mycoldist.EQ.0 ) THEN
3145  j = 1
3146  ELSE
3147  j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3148  END IF
3149 *
3150  jj = 1
3151  jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3152  DO 70 kk = 0, jb-1
3153  DO 60 ll = 0, ib-1
3154  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3155  $ j+kk.GT.jx+n-1 )
3156  $ CALL pserrset( err, errmax,
3157  $ x( i+ll+(j+kk-1)*ldx ),
3158  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3159  60 CONTINUE
3160  70 CONTINUE
3161  IF( colrep ) THEN
3162  j = j + inbx
3163  ELSE
3164  j = j + inbx + ( npcol - 1 ) * nbx
3165  END IF
3166 *
3167  DO 100 jj = inbx+1, nqall, nbx
3168  jb = min( nqall-jj+1, nbx )
3169 *
3170  DO 90 kk = 0, jb-1
3171  DO 80 ll = 0, ib-1
3172  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3173  $ j+kk.GT.jx+n-1 )
3174  $ CALL pserrset( err, errmax,
3175  $ x( i+ll+(j+kk-1)*ldx ),
3176  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3177  80 CONTINUE
3178  90 CONTINUE
3179 *
3180  IF( colrep ) THEN
3181  j = j + nbx
3182  ELSE
3183  j = j + npcol * nbx
3184  END IF
3185 *
3186  100 CONTINUE
3187 *
3188  ii = ii + ib
3189 *
3190  END IF
3191 *
3192  icurrow = mod( icurrow + 1, nprow )
3193 *
3194  110 CONTINUE
3195 *
3196  ELSE
3197 *
3198 * sub( X ) is a column vector
3199 *
3200  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3201 *
3202  j = 1
3203  IF( myrowdist.EQ.0 ) THEN
3204  i = 1
3205  ELSE
3206  i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3207  END IF
3208  ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3209  jb = min( descx( n_ ), descx( inb_ ) )
3210 *
3211  DO 130 kk = 0, jb-1
3212  DO 120 ll = 0, ib-1
3213  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR. i+ll.GT.ix+n-1 )
3214  $ CALL pserrset( err, errmax,
3215  $ x( i+ll+(j+kk-1)*ldx ),
3216  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3217  120 CONTINUE
3218  130 CONTINUE
3219  IF( rowrep ) THEN
3220  i = i + imbx
3221  ELSE
3222  i = i + imbx + ( nprow - 1 ) * mbx
3223  END IF
3224 *
3225  DO 160 ii = imbx+1, mpall, mbx
3226  ib = min( mpall-ii+1, mbx )
3227 *
3228  DO 150 kk = 0, jb-1
3229  DO 140 ll = 0, ib-1
3230  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3231  $ i+ll.GT.ix+n-1 )
3232  $ CALL pserrset( err, errmax,
3233  $ x( i+ll+(j+kk-1)*ldx ),
3234  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3235  140 CONTINUE
3236  150 CONTINUE
3237 *
3238  IF( rowrep ) THEN
3239  i = i + mbx
3240  ELSE
3241  i = i + nprow * mbx
3242  END IF
3243 *
3244  160 CONTINUE
3245 *
3246  jj = jj + jb
3247 *
3248  END IF
3249 *
3250  icurcol = mod( icurcol + 1, npcol )
3251 *
3252  DO 220 j = descx( inb_ ) + 1, descx( n_ ), nbx
3253  jb = min( descx( n_ ) - j + 1, nbx )
3254 *
3255  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3256 *
3257  IF( myrowdist.EQ.0 ) THEN
3258  i = 1
3259  ELSE
3260  i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3261  END IF
3262 *
3263  ii = 1
3264  ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3265  DO 180 kk = 0, jb-1
3266  DO 170 ll = 0, ib-1
3267  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3268  $ i+ll.GT.ix+n-1 )
3269  $ CALL pserrset( err, errmax,
3270  $ x( i+ll+(j+kk-1)*ldx ),
3271  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3272  170 CONTINUE
3273  180 CONTINUE
3274  IF( rowrep ) THEN
3275  i = i + imbx
3276  ELSE
3277  i = i + imbx + ( nprow - 1 ) * mbx
3278  END IF
3279 *
3280  DO 210 ii = imbx+1, mpall, mbx
3281  ib = min( mpall-ii+1, mbx )
3282 *
3283  DO 200 kk = 0, jb-1
3284  DO 190 ll = 0, ib-1
3285  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3286  $ i+ll.GT.ix+n-1 )
3287  $ CALL pserrset( err, errmax,
3288  $ x( i+ll+(j+kk-1)*ldx ),
3289  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3290  190 CONTINUE
3291  200 CONTINUE
3292 *
3293  IF( rowrep ) THEN
3294  i = i + mbx
3295  ELSE
3296  i = i + nprow * mbx
3297  END IF
3298 *
3299  210 CONTINUE
3300 *
3301  jj = jj + jb
3302 *
3303  END IF
3304 *
3305  icurcol = mod( icurcol + 1, npcol )
3306 *
3307  220 CONTINUE
3308 *
3309  END IF
3310 *
3311  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3312  $ -1, -1 )
3313 *
3314  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3315  info = 1
3316  ELSE IF( errmax.GT.eps ) THEN
3317  info = -1
3318  END IF
3319 *
3320  RETURN
3321 *
3322 * End of PSCHKVOUT
3323 *
3324  END
3325  SUBROUTINE pschkmin( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO )
3327 * -- PBLAS test routine (version 2.0) --
3328 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3329 * and University of California, Berkeley.
3330 * April 1, 1998
3331 *
3332 * .. Scalar Arguments ..
3333  INTEGER IA, INFO, JA, M, N
3334  REAL ERRMAX
3335 * ..
3336 * .. Array Arguments ..
3337  INTEGER DESCA( * )
3338  REAL PA( * ), A( * )
3339 * ..
3340 *
3341 * Purpose
3342 * =======
3343 *
3344 * PSCHKMIN checks that the submatrix sub( PA ) remained unchanged. The
3345 * local array entries are compared element by element, and their dif-
3346 * ference is tested against 0.0 as well as the epsilon machine. Notice
3347 * that this difference should be numerically exactly the zero machine,
3348 * but because of the possible fluctuation of some of the data we flag-
3349 * ged differently a difference less than twice the epsilon machine. The
3350 * largest error is also returned.
3351 *
3352 * Notes
3353 * =====
3354 *
3355 * A description vector is associated with each 2D block-cyclicly dis-
3356 * tributed matrix. This vector stores the information required to
3357 * establish the mapping between a matrix entry and its corresponding
3358 * process and memory location.
3359 *
3360 * In the following comments, the character _ should be read as
3361 * "of the distributed matrix". Let A be a generic term for any 2D
3362 * block cyclicly distributed matrix. Its description vector is DESCA:
3363 *
3364 * NOTATION STORED IN EXPLANATION
3365 * ---------------- --------------- ------------------------------------
3366 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3367 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3368 * the NPROW x NPCOL BLACS process grid
3369 * A is distributed over. The context
3370 * itself is global, but the handle
3371 * (the integer value) may vary.
3372 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
3373 * ted matrix A, M_A >= 0.
3374 * N_A (global) DESCA( N_ ) The number of columns in the distri-
3375 * buted matrix A, N_A >= 0.
3376 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3377 * block of the matrix A, IMB_A > 0.
3378 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
3379 * left block of the matrix A,
3380 * INB_A > 0.
3381 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3382 * bute the last M_A-IMB_A rows of A,
3383 * MB_A > 0.
3384 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3385 * bute the last N_A-INB_A columns of
3386 * A, NB_A > 0.
3387 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3388 * row of the matrix A is distributed,
3389 * NPROW > RSRC_A >= 0.
3390 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3391 * first column of A is distributed.
3392 * NPCOL > CSRC_A >= 0.
3393 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3394 * array storing the local blocks of
3395 * the distributed matrix A,
3396 * IF( Lc( 1, N_A ) > 0 )
3397 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
3398 * ELSE
3399 * LLD_A >= 1.
3400 *
3401 * Let K be the number of rows of a matrix A starting at the global in-
3402 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3403 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3404 * receive if these K rows were distributed over NPROW processes. If K
3405 * is the number of columns of a matrix A starting at the global index
3406 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3407 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3408 * these K columns were distributed over NPCOL processes.
3409 *
3410 * The values of Lr() and Lc() may be determined via a call to the func-
3411 * tion PB_NUMROC:
3412 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3413 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3414 *
3415 * Arguments
3416 * =========
3417 *
3418 * ERRMAX (global output) REAL
3419 * On exit, ERRMAX specifies the largest absolute element-wise
3420 * difference between sub( A ) and sub( PA ).
3421 *
3422 * M (global input) INTEGER
3423 * On entry, M specifies the number of rows of the submatrix
3424 * operand sub( A ). M must be at least zero.
3425 *
3426 * N (global input) INTEGER
3427 * On entry, N specifies the number of columns of the submatrix
3428 * operand sub( A ). N must be at least zero.
3429 *
3430 * A (local input) REAL array
3431 * On entry, A is an array of dimension (DESCA( M_ ),*). This
3432 * array contains a local copy of the initial entire matrix PA.
3433 *
3434 * PA (local input) REAL array
3435 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3436 * array contains the local entries of the matrix PA.
3437 *
3438 * IA (global input) INTEGER
3439 * On entry, IA specifies A's global row index, which points to
3440 * the beginning of the submatrix sub( A ).
3441 *
3442 * JA (global input) INTEGER
3443 * On entry, JA specifies A's global column index, which points
3444 * to the beginning of the submatrix sub( A ).
3445 *
3446 * DESCA (global and local input) INTEGER array
3447 * On entry, DESCA is an integer array of dimension DLEN_. This
3448 * is the array descriptor for the matrix A.
3449 *
3450 * INFO (global output) INTEGER
3451 * On exit, if INFO = 0, no error has been found,
3452 * If INFO > 0, the maximum abolute error found is in (0,eps],
3453 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
3454 *
3455 * -- Written on April 1, 1998 by
3456 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3457 *
3458 * =====================================================================
3459 *
3460 * .. Parameters ..
3461  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3462  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3463  $ RSRC_
3464  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3465  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3466  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3467  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3468  REAL ZERO
3469  PARAMETER ( ZERO = 0.0e+0 )
3470 * ..
3471 * .. Local Scalars ..
3472  LOGICAL COLREP, ROWREP
3473  INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
3474  $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
3475  $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW
3476  REAL ERR, EPS
3477 * ..
3478 * .. External Subroutines ..
3479  EXTERNAL blacs_gridinfo, pb_infog2l, pserrset, sgamx2d
3480 * ..
3481 * .. External Functions ..
3482  REAL PSLAMCH
3483  EXTERNAL pslamch
3484 * ..
3485 * .. Intrinsic Functions ..
3486  INTRINSIC abs, max, min, mod
3487 * ..
3488 * .. Executable Statements ..
3489 *
3490  info = 0
3491  errmax = zero
3492 *
3493 * Quick return if posssible
3494 *
3495  IF( ( m.EQ.0 ).OR.( n.EQ.0 ) )
3496  $ RETURN
3497 *
3498 * Start the operations
3499 *
3500  ictxt = desca( ctxt_ )
3501  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3502 *
3503  eps = pslamch( ictxt, 'eps' )
3504 *
3505  CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
3506  $ jja, iarow, iacol )
3507 *
3508  ii = iia
3509  jj = jja
3510  lda = desca( m_ )
3511  ldpa = desca( lld_ )
3512  icurrow = iarow
3513  icurcol = iacol
3514  rowrep = ( iarow.EQ.-1 )
3515  colrep = ( iacol.EQ.-1 )
3516 *
3517 * Handle the first block of column separately
3518 *
3519  jb = desca( inb_ ) - ja + 1
3520  IF( jb.LE.0 )
3521  $ jb = ( ( -jb ) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
3522  jb = min( jb, n )
3523  jn = ja + jb - 1
3524 *
3525  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3526 *
3527  DO 40 h = 0, jb-1
3528  ib = desca( imb_ ) - ia + 1
3529  IF( ib.LE.0 )
3530  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
3531  ib = min( ib, m )
3532  in = ia + ib - 1
3533  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3534  DO 10 k = 0, ib-1
3535  CALL pserrset( err, errmax, a( ia+k+(ja+h-1)*lda ),
3536  $ pa( ii+k+(jj+h-1)*ldpa ) )
3537  10 CONTINUE
3538  ii = ii + ib
3539  END IF
3540  icurrow = mod( icurrow+1, nprow )
3541 *
3542 * Loop over remaining block of rows
3543 *
3544  DO 30 i = in+1, ia+m-1, desca( mb_ )
3545  ib = min( desca( mb_ ), ia+m-i )
3546  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3547  DO 20 k = 0, ib-1
3548  CALL pserrset( err, errmax, a( i+k+(ja+h-1)*lda ),
3549  $ pa( ii+k+(jj+h-1)*ldpa ) )
3550  20 CONTINUE
3551  ii = ii + ib
3552  END IF
3553  icurrow = mod( icurrow+1, nprow )
3554  30 CONTINUE
3555 *
3556  ii = iia
3557  icurrow = iarow
3558  40 CONTINUE
3559 *
3560  jj = jj + jb
3561 *
3562  END IF
3563 *
3564  icurcol = mod( icurcol+1, npcol )
3565 *
3566 * Loop over remaining column blocks
3567 *
3568  DO 90 j = jn+1, ja+n-1, desca( nb_ )
3569  jb = min( desca( nb_ ), ja+n-j )
3570  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3571  DO 80 h = 0, jb-1
3572  ib = desca( imb_ ) - ia + 1
3573  IF( ib.LE.0 )
3574  $ ib = ( ( -ib ) / desca( mb_ ) + 1 )*desca( mb_ ) + ib
3575  ib = min( ib, m )
3576  in = ia + ib - 1
3577  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3578  DO 50 k = 0, ib-1
3579  CALL pserrset( err, errmax, a( ia+k+(j+h-1)*lda ),
3580  $ pa( ii+k+(jj+h-1)*ldpa ) )
3581  50 CONTINUE
3582  ii = ii + ib
3583  END IF
3584  icurrow = mod( icurrow+1, nprow )
3585 *
3586 * Loop over remaining block of rows
3587 *
3588  DO 70 i = in+1, ia+m-1, desca( mb_ )
3589  ib = min( desca( mb_ ), ia+m-i )
3590  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3591  DO 60 k = 0, ib-1
3592  CALL pserrset( err, errmax,
3593  $ a( i+k+(j+h-1)*lda ),
3594  $ pa( ii+k+(jj+h-1)*ldpa ) )
3595  60 CONTINUE
3596  ii = ii + ib
3597  END IF
3598  icurrow = mod( icurrow+1, nprow )
3599  70 CONTINUE
3600 *
3601  ii = iia
3602  icurrow = iarow
3603  80 CONTINUE
3604 *
3605  jj = jj + jb
3606  END IF
3607 *
3608  icurcol = mod( icurcol+1, npcol )
3609 *
3610  90 CONTINUE
3611 *
3612  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3613  $ -1, -1 )
3614 *
3615  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3616  info = 1
3617  ELSE IF( errmax.GT.eps ) THEN
3618  info = -1
3619  END IF
3620 *
3621  RETURN
3622 *
3623 * End of PSCHKMIN
3624 *
3625  END
3626  SUBROUTINE pschkmout( M, N, A, PA, IA, JA, DESCA, INFO )
3628 * -- PBLAS test routine (version 2.0) --
3629 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3630 * and University of California, Berkeley.
3631 * April 1, 1998
3632 *
3633 * .. Scalar Arguments ..
3634  INTEGER IA, INFO, JA, M, N
3635 * ..
3636 * .. Array Arguments ..
3637  INTEGER DESCA( * )
3638  REAL A( * ), PA( * )
3639 * ..
3640 *
3641 * Purpose
3642 * =======
3643 *
3644 * PSCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged.
3645 * The local array entries are compared element by element, and their
3646 * difference is tested against 0.0 as well as the epsilon machine. No-
3647 * tice that this difference should be numerically exactly the zero ma-
3648 * chine, but because of the possible movement of some of the data we
3649 * flagged differently a difference less than twice the epsilon machine.
3650 * The largest error is reported.
3651 *
3652 * Notes
3653 * =====
3654 *
3655 * A description vector is associated with each 2D block-cyclicly dis-
3656 * tributed matrix. This vector stores the information required to
3657 * establish the mapping between a matrix entry and its corresponding
3658 * process and memory location.
3659 *
3660 * In the following comments, the character _ should be read as
3661 * "of the distributed matrix". Let A be a generic term for any 2D
3662 * block cyclicly distributed matrix. Its description vector is DESCA:
3663 *
3664 * NOTATION STORED IN EXPLANATION
3665 * ---------------- --------------- ------------------------------------
3666 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3667 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3668 * the NPROW x NPCOL BLACS process grid
3669 * A is distributed over. The context
3670 * itself is global, but the handle
3671 * (the integer value) may vary.
3672 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
3673 * ted matrix A, M_A >= 0.
3674 * N_A (global) DESCA( N_ ) The number of columns in the distri-
3675 * buted matrix A, N_A >= 0.
3676 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3677 * block of the matrix A, IMB_A > 0.
3678 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
3679 * left block of the matrix A,
3680 * INB_A > 0.
3681 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3682 * bute the last M_A-IMB_A rows of A,
3683 * MB_A > 0.
3684 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3685 * bute the last N_A-INB_A columns of
3686 * A, NB_A > 0.
3687 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3688 * row of the matrix A is distributed,
3689 * NPROW > RSRC_A >= 0.
3690 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3691 * first column of A is distributed.
3692 * NPCOL > CSRC_A >= 0.
3693 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3694 * array storing the local blocks of
3695 * the distributed matrix A,
3696 * IF( Lc( 1, N_A ) > 0 )
3697 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
3698 * ELSE
3699 * LLD_A >= 1.
3700 *
3701 * Let K be the number of rows of a matrix A starting at the global in-
3702 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3703 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3704 * receive if these K rows were distributed over NPROW processes. If K
3705 * is the number of columns of a matrix A starting at the global index
3706 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3707 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3708 * these K columns were distributed over NPCOL processes.
3709 *
3710 * The values of Lr() and Lc() may be determined via a call to the func-
3711 * tion PB_NUMROC:
3712 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3713 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3714 *
3715 * Arguments
3716 * =========
3717 *
3718 * M (global input) INTEGER
3719 * On entry, M specifies the number of rows of the submatrix
3720 * sub( PA ). M must be at least zero.
3721 *
3722 * N (global input) INTEGER
3723 * On entry, N specifies the number of columns of the submatrix
3724 * sub( PA ). N must be at least zero.
3725 *
3726 * A (local input) REAL array
3727 * On entry, A is an array of dimension (DESCA( M_ ),*). This
3728 * array contains a local copy of the initial entire matrix PA.
3729 *
3730 * PA (local input) REAL array
3731 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3732 * array contains the local entries of the matrix PA.
3733 *
3734 * IA (global input) INTEGER
3735 * On entry, IA specifies A's global row index, which points to
3736 * the beginning of the submatrix sub( A ).
3737 *
3738 * JA (global input) INTEGER
3739 * On entry, JA specifies A's global column index, which points
3740 * to the beginning of the submatrix sub( A ).
3741 *
3742 * DESCA (global and local input) INTEGER array
3743 * On entry, DESCA is an integer array of dimension DLEN_. This
3744 * is the array descriptor for the matrix A.
3745 *
3746 * INFO (global output) INTEGER
3747 * On exit, if INFO = 0, no error has been found,
3748 * If INFO > 0, the maximum abolute error found is in (0,eps],
3749 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
3750 *
3751 * -- Written on April 1, 1998 by
3752 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3753 *
3754 * =====================================================================
3755 *
3756 * .. Parameters ..
3757  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3758  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3759  $ RSRC_
3760  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3761  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3762  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3763  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3764  REAL ZERO
3765  PARAMETER ( ZERO = 0.0e+0 )
3766 * ..
3767 * .. Local Scalars ..
3768  LOGICAL COLREP, ROWREP
3769  INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK,
3770  $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST,
3771  $ NPCOL, NPROW
3772  REAL EPS, ERR, ERRMAX
3773 * ..
3774 * .. External Subroutines ..
3775  EXTERNAL blacs_gridinfo, pserrset, sgamx2d
3776 * ..
3777 * .. External Functions ..
3778  INTEGER PB_NUMROC
3779  REAL PSLAMCH
3780  EXTERNAL PSLAMCH, PB_NUMROC
3781 * ..
3782 * .. Intrinsic Functions ..
3783  INTRINSIC max, min, mod
3784 * ..
3785 * .. Executable Statements ..
3786 *
3787  info = 0
3788  errmax = zero
3789 *
3790 * Quick return if possible
3791 *
3792  IF( ( desca( m_ ).LE.0 ).OR.( desca( n_ ).LE.0 ) )
3793  $ RETURN
3794 *
3795 * Start the operations
3796 *
3797  ictxt = desca( ctxt_ )
3798  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3799 *
3800  eps = pslamch( ictxt, 'eps' )
3801 *
3802  mpall = pb_numroc( desca( m_ ), 1, desca( imb_ ), desca( mb_ ),
3803  $ myrow, desca( rsrc_ ), nprow )
3804 *
3805  lda = desca( m_ )
3806  ldpa = desca( lld_ )
3807 *
3808  ii = 1
3809  jj = 1
3810  rowrep = ( desca( rsrc_ ).EQ.-1 )
3811  colrep = ( desca( csrc_ ).EQ.-1 )
3812  icurcol = desca( csrc_ )
3813  IF( myrow.EQ.desca( rsrc_ ) .OR. rowrep ) THEN
3814  imba = desca( imb_ )
3815  ELSE
3816  imba = desca( mb_ )
3817  END IF
3818  IF( rowrep ) THEN
3819  myrowdist = 0
3820  ELSE
3821  myrowdist = mod( myrow - desca( rsrc_ ) + nprow, nprow )
3822  END IF
3823 *
3824  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3825 *
3826  j = 1
3827  IF( myrowdist.EQ.0 ) THEN
3828  i = 1
3829  ELSE
3830  i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3831  END IF
3832  ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3833  jb = min( desca( n_ ), desca( inb_ ) )
3834 *
3835  DO 20 kk = 0, jb-1
3836  DO 10 ll = 0, ib-1
3837  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3838  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3839  $ CALL pserrset( err, errmax, a( i+ll+(j+kk-1)*lda ),
3840  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3841  10 CONTINUE
3842  20 CONTINUE
3843  IF( rowrep ) THEN
3844  i = i + imba
3845  ELSE
3846  i = i + imba + ( nprow - 1 ) * desca( mb_ )
3847  END IF
3848 *
3849  DO 50 ii = imba + 1, mpall, desca( mb_ )
3850  ib = min( mpall-ii+1, desca( mb_ ) )
3851 *
3852  DO 40 kk = 0, jb-1
3853  DO 30 ll = 0, ib-1
3854  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3855  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3856  $ CALL pserrset( err, errmax,
3857  $ a( i+ll+(j+kk-1)*lda ),
3858  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3859  30 CONTINUE
3860  40 CONTINUE
3861 *
3862  IF( rowrep ) THEN
3863  i = i + desca( mb_ )
3864  ELSE
3865  i = i + nprow * desca( mb_ )
3866  END IF
3867 *
3868  50 CONTINUE
3869 *
3870  jj = jj + jb
3871 *
3872  END IF
3873 *
3874  icurcol = mod( icurcol + 1, npcol )
3875 *
3876  DO 110 j = desca( inb_ ) + 1, desca( n_ ), desca( nb_ )
3877  jb = min( desca( n_ ) - j + 1, desca( nb_ ) )
3878 *
3879  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3880 *
3881  IF( myrowdist.EQ.0 ) THEN
3882  i = 1
3883  ELSE
3884  i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3885  END IF
3886 *
3887  ii = 1
3888  ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3889  DO 70 kk = 0, jb-1
3890  DO 60 ll = 0, ib-1
3891  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3892  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3893  $ CALL pserrset( err, errmax,
3894  $ a( i+ll+(j+kk-1)*lda ),
3895  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3896  60 CONTINUE
3897  70 CONTINUE
3898  IF( rowrep ) THEN
3899  i = i + imba
3900  ELSE
3901  i = i + imba + ( nprow - 1 ) * desca( mb_ )
3902  END IF
3903 *
3904  DO 100 ii = imba+1, mpall, desca( mb_ )
3905  ib = min( mpall-ii+1, desca( mb_ ) )
3906 *
3907  DO 90 kk = 0, jb-1
3908  DO 80 ll = 0, ib-1
3909  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3910  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3911  $ CALL pserrset( err, errmax,
3912  $ a( i+ll+(j+kk-1)*lda ),
3913  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3914  80 CONTINUE
3915  90 CONTINUE
3916 *
3917  IF( rowrep ) THEN
3918  i = i + desca( mb_ )
3919  ELSE
3920  i = i + nprow * desca( mb_ )
3921  END IF
3922 *
3923  100 CONTINUE
3924 *
3925  jj = jj + jb
3926 *
3927  END IF
3928 *
3929  icurcol = mod( icurcol + 1, npcol )
3930 * INSERT MODE
3931  110 CONTINUE
3932 *
3933  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3934  $ -1, -1 )
3935 *
3936  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3937  info = 1
3938  ELSE IF( errmax.GT.eps ) THEN
3939  info = -1
3940  END IF
3941 *
3942  RETURN
3943 *
3944 * End of PSCHKMOUT
3945 *
3946  END
3947  SUBROUTINE psmprnt( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT,
3948  $ CMATNM )
3950 * -- PBLAS test routine (version 2.0) --
3951 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3952 * and University of California, Berkeley.
3953 * April 1, 1998
3954 *
3955 * .. Scalar Arguments ..
3956  INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT
3957 * ..
3958 * .. Array Arguments ..
3959  CHARACTER*(*) CMATNM
3960  REAL A( LDA, * )
3961 * ..
3962 *
3963 * Purpose
3964 * =======
3965 *
3966 * PSMPRNT prints to the standard output an array A of size m by n. Only
3967 * the process of coordinates ( IRPRNT, ICPRNT ) is printing.
3968 *
3969 * Arguments
3970 * =========
3971 *
3972 * ICTXT (local input) INTEGER
3973 * On entry, ICTXT specifies the BLACS context handle, indica-
3974 * ting the global context of the operation. The context itself
3975 * is global, but the value of ICTXT is local.
3976 *
3977 * NOUT (global input) INTEGER
3978 * On entry, NOUT specifies the unit number for the output file.
3979 * When NOUT is 6, output to screen, when NOUT is 0, output to
3980 * stderr. NOUT is only defined for process 0.
3981 *
3982 * M (global input) INTEGER
3983 * On entry, M specifies the number of rows of the matrix A. M
3984 * must be at least zero.
3985 *
3986 * N (global input) INTEGER
3987 * On entry, N specifies the number of columns of the matrix A.
3988 * N must be at least zero.
3989 *
3990 * A (local input) REAL array
3991 * On entry, A is an array of dimension (LDA,N). The leading m
3992 * by n part of this array is printed.
3993 *
3994 * LDA (local input) INTEGER
3995 * On entry, LDA specifies the leading dimension of the local
3996 * array A to be printed. LDA must be at least MAX( 1, M ).
3997 *
3998 * IRPRNT (global input) INTEGER
3999 * On entry, IRPRNT specifies the process row coordinate of the
4000 * printing process.
4001 *
4002 * ICPRNT (global input) INTEGER
4003 * On entry, ICPRNT specifies the process column coordinate of
4004 * the printing process.
4005 *
4006 * CMATNM (global input) CHARACTER*(*)
4007 * On entry, CMATNM specifies the identifier of the matrix to be
4008 * printed.
4009 *
4010 * -- Written on April 1, 1998 by
4011 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4012 *
4013 * =====================================================================
4014 *
4015 * .. Local Scalars ..
4016  INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW
4017 * ..
4018 * .. External Subroutines ..
4019  EXTERNAL BLACS_GRIDINFO
4020 * ..
4021 * .. Executable Statements ..
4022 *
4023 * Quick return if possible
4024 *
4025  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
4026  $ RETURN
4027 *
4028 * Get grid parameters
4029 *
4030  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4031 *
4032  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4033 *
4034  WRITE( nout, fmt = * )
4035  DO 20 j = 1, n
4036 *
4037  DO 10 i = 1, m
4038 *
4039  WRITE( nout, fmt = 9999 ) cmatnm, i, j, a( i, j )
4040 *
4041  10 CONTINUE
4042 *
4043  20 CONTINUE
4044 *
4045  END IF
4046 *
4047  9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8 )
4048 *
4049  RETURN
4050 *
4051 * End of PSMPRNT
4052 *
4053  END
4054  SUBROUTINE psvprnt( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT,
4055  $ CVECNM )
4057 * -- PBLAS test routine (version 2.0) --
4058 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4059 * and University of California, Berkeley.
4060 * April 1, 1998
4061 *
4062 * .. Scalar Arguments ..
4063  INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT
4064 * ..
4065 * .. Array Arguments ..
4066  CHARACTER*(*) CVECNM
4067  REAL X( * )
4068 * ..
4069 *
4070 * Purpose
4071 * =======
4072 *
4073 * PSVPRNT prints to the standard output an vector x of length n. Only
4074 * the process of coordinates ( IRPRNT, ICPRNT ) is printing.
4075 *
4076 * Arguments
4077 * =========
4078 *
4079 * ICTXT (local input) INTEGER
4080 * On entry, ICTXT specifies the BLACS context handle, indica-
4081 * ting the global context of the operation. The context itself
4082 * is global, but the value of ICTXT is local.
4083 *
4084 * NOUT (global input) INTEGER
4085 * On entry, NOUT specifies the unit number for the output file.
4086 * When NOUT is 6, output to screen, when NOUT is 0, output to
4087 * stderr. NOUT is only defined for process 0.
4088 *
4089 * N (global input) INTEGER
4090 * On entry, N specifies the length of the vector X. N must be
4091 * at least zero.
4092 *
4093 * X (global input) REAL array
4094 * On entry, X is an array of dimension at least
4095 * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen-
4096 * ted array X must contain the vector x.
4097 *
4098 * INCX (global input) INTEGER.
4099 * On entry, INCX specifies the increment for the elements of X.
4100 * INCX must not be zero.
4101 *
4102 * IRPRNT (global input) INTEGER
4103 * On entry, IRPRNT specifies the process row coordinate of the
4104 * printing process.
4105 *
4106 * ICPRNT (global input) INTEGER
4107 * On entry, ICPRNT specifies the process column coordinate of
4108 * the printing process.
4109 *
4110 * CVECNM (global input) CHARACTER*(*)
4111 * On entry, CVECNM specifies the identifier of the vector to be
4112 * printed.
4113 *
4114 * -- Written on April 1, 1998 by
4115 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4116 *
4117 * =====================================================================
4118 *
4119 * .. Local Scalars ..
4120  INTEGER I, MYCOL, MYROW, NPCOL, NPROW
4121 * ..
4122 * .. External Subroutines ..
4123  EXTERNAL BLACS_GRIDINFO
4124 * ..
4125 * .. Executable Statements ..
4126 *
4127 * Quick return if possible
4128 *
4129  IF( n.LE.0 )
4130  $ RETURN
4131 *
4132 * Get grid parameters
4133 *
4134  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4135 *
4136  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4137 *
4138  WRITE( nout, fmt = * )
4139  DO 10 i = 1, 1 + ( n-1 )*incx, incx
4140 *
4141  WRITE( nout, fmt = 9999 ) cvecnm, i, x( i )
4142 *
4143  10 CONTINUE
4144 *
4145  END IF
4146 *
4147  9999 FORMAT( 1x, a, '(', i6, ')=', e16.8 )
4148 *
4149  RETURN
4150 *
4151 * End of PSVPRNT
4152 *
4153  END
4154  SUBROUTINE psmvch( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
4155  $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY,
4156  $ DESCY, INCY, G, ERR, INFO )
4158 * -- PBLAS test routine (version 2.0) --
4159 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4160 * and University of California, Berkeley.
4161 * April 1, 1998
4162 *
4163 * .. Scalar Arguments ..
4164  CHARACTER*1 TRANS
4165  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4166  $ JY, M, N
4167  REAL ALPHA, BETA, ERR
4168 * ..
4169 * .. Array Arguments ..
4170  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4171  REAL A( * ), G( * ), PY( * ), X( * ), Y( * )
4172 * ..
4173 *
4174 * Purpose
4175 * =======
4176 *
4177 * PSMVCH checks the results of the computational tests.
4178 *
4179 * Notes
4180 * =====
4181 *
4182 * A description vector is associated with each 2D block-cyclicly dis-
4183 * tributed matrix. This vector stores the information required to
4184 * establish the mapping between a matrix entry and its corresponding
4185 * process and memory location.
4186 *
4187 * In the following comments, the character _ should be read as
4188 * "of the distributed matrix". Let A be a generic term for any 2D
4189 * block cyclicly distributed matrix. Its description vector is DESCA:
4190 *
4191 * NOTATION STORED IN EXPLANATION
4192 * ---------------- --------------- ------------------------------------
4193 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4194 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4195 * the NPROW x NPCOL BLACS process grid
4196 * A is distributed over. The context
4197 * itself is global, but the handle
4198 * (the integer value) may vary.
4199 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4200 * ted matrix A, M_A >= 0.
4201 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4202 * buted matrix A, N_A >= 0.
4203 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4204 * block of the matrix A, IMB_A > 0.
4205 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4206 * left block of the matrix A,
4207 * INB_A > 0.
4208 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4209 * bute the last M_A-IMB_A rows of A,
4210 * MB_A > 0.
4211 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4212 * bute the last N_A-INB_A columns of
4213 * A, NB_A > 0.
4214 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4215 * row of the matrix A is distributed,
4216 * NPROW > RSRC_A >= 0.
4217 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4218 * first column of A is distributed.
4219 * NPCOL > CSRC_A >= 0.
4220 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4221 * array storing the local blocks of
4222 * the distributed matrix A,
4223 * IF( Lc( 1, N_A ) > 0 )
4224 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4225 * ELSE
4226 * LLD_A >= 1.
4227 *
4228 * Let K be the number of rows of a matrix A starting at the global in-
4229 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4230 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4231 * receive if these K rows were distributed over NPROW processes. If K
4232 * is the number of columns of a matrix A starting at the global index
4233 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4234 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4235 * these K columns were distributed over NPCOL processes.
4236 *
4237 * The values of Lr() and Lc() may be determined via a call to the func-
4238 * tion PB_NUMROC:
4239 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4240 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4241 *
4242 * Arguments
4243 * =========
4244 *
4245 * ICTXT (local input) INTEGER
4246 * On entry, ICTXT specifies the BLACS context handle, indica-
4247 * ting the global context of the operation. The context itself
4248 * is global, but the value of ICTXT is local.
4249 *
4250 * TRANS (global input) CHARACTER*1
4251 * On entry, TRANS specifies which matrix-vector product is to
4252 * be computed as follows:
4253 * If TRANS = 'N',
4254 * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ),
4255 * otherwise
4256 * sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ).
4257 *
4258 * M (global input) INTEGER
4259 * On entry, M specifies the number of rows of the submatrix
4260 * operand matrix A. M must be at least zero.
4261 *
4262 * N (global input) INTEGER
4263 * On entry, N specifies the number of columns of the subma-
4264 * trix operand matrix A. N must be at least zero.
4265 *
4266 * ALPHA (global input) REAL
4267 * On entry, ALPHA specifies the scalar alpha.
4268 *
4269 * A (local input) REAL array
4270 * On entry, A is an array of dimension (DESCA( M_ ),*). This
4271 * array contains a local copy of the initial entire matrix PA.
4272 *
4273 * IA (global input) INTEGER
4274 * On entry, IA specifies A's global row index, which points to
4275 * the beginning of the submatrix sub( A ).
4276 *
4277 * JA (global input) INTEGER
4278 * On entry, JA specifies A's global column index, which points
4279 * to the beginning of the submatrix sub( A ).
4280 *
4281 * DESCA (global and local input) INTEGER array
4282 * On entry, DESCA is an integer array of dimension DLEN_. This
4283 * is the array descriptor for the matrix A.
4284 *
4285 * X (local input) REAL array
4286 * On entry, X is an array of dimension (DESCX( M_ ),*). This
4287 * array contains a local copy of the initial entire matrix PX.
4288 *
4289 * IX (global input) INTEGER
4290 * On entry, IX specifies X's global row index, which points to
4291 * the beginning of the submatrix sub( X ).
4292 *
4293 * JX (global input) INTEGER
4294 * On entry, JX specifies X's global column index, which points
4295 * to the beginning of the submatrix sub( X ).
4296 *
4297 * DESCX (global and local input) INTEGER array
4298 * On entry, DESCX is an integer array of dimension DLEN_. This
4299 * is the array descriptor for the matrix X.
4300 *
4301 * INCX (global input) INTEGER
4302 * On entry, INCX specifies the global increment for the
4303 * elements of X. Only two values of INCX are supported in
4304 * this version, namely 1 and M_X. INCX must not be zero.
4305 *
4306 * BETA (global input) REAL
4307 * On entry, BETA specifies the scalar beta.
4308 *
4309 * Y (local input/local output) REAL array
4310 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
4311 * array contains a local copy of the initial entire matrix PY.
4312 *
4313 * PY (local input) REAL array
4314 * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This
4315 * array contains the local entries of the matrix PY.
4316 *
4317 * IY (global input) INTEGER
4318 * On entry, IY specifies Y's global row index, which points to
4319 * the beginning of the submatrix sub( Y ).
4320 *
4321 * JY (global input) INTEGER
4322 * On entry, JY specifies Y's global column index, which points
4323 * to the beginning of the submatrix sub( Y ).
4324 *
4325 * DESCY (global and local input) INTEGER array
4326 * On entry, DESCY is an integer array of dimension DLEN_. This
4327 * is the array descriptor for the matrix Y.
4328 *
4329 * INCY (global input) INTEGER
4330 * On entry, INCY specifies the global increment for the
4331 * elements of Y. Only two values of INCY are supported in
4332 * this version, namely 1 and M_Y. INCY must not be zero.
4333 *
4334 * G (workspace) REAL array
4335 * On entry, G is an array of dimension at least MAX( M, N ). G
4336 * is used to compute the gauges.
4337 *
4338 * ERR (global output) REAL
4339 * On exit, ERR specifies the largest error in absolute value.
4340 *
4341 * INFO (global output) INTEGER
4342 * On exit, if INFO <> 0, the result is less than half accurate.
4343 *
4344 * -- Written on April 1, 1998 by
4345 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4346 *
4347 * =====================================================================
4348 *
4349 * .. Parameters ..
4350  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4351  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4352  $ RSRC_
4353  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4354  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4355  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4356  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4357  REAL ZERO, ONE
4358  parameter( zero = 0.0e+0, one = 1.0e+0 )
4359 * ..
4360 * .. Local Scalars ..
4361  LOGICAL COLREP, ROWREP, TRAN
4362  INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX,
4363  $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA,
4364  $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL,
4365  $ nprow
4366  REAL EPS, ERRI, GTMP, TBETA, YTMP
4367 * ..
4368 * .. External Subroutines ..
4369  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4370 * ..
4371 * .. External Functions ..
4372  LOGICAL LSAME
4373  REAL PSLAMCH
4374  EXTERNAL lsame, pslamch
4375 * ..
4376 * .. Intrinsic Functions ..
4377  INTRINSIC abs, max, min, mod, sqrt
4378 * ..
4379 * .. Executable Statements ..
4380 *
4381  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4382 *
4383  eps = pslamch( ictxt, 'eps' )
4384 *
4385  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
4386  tbeta = one
4387  ELSE
4388  tbeta = beta
4389  END IF
4390 *
4391  tran = lsame( trans, 'T' ).OR.lsame( trans, 'C' )
4392  IF( tran ) THEN
4393  ml = n
4394  nl = m
4395  ELSE
4396  ml = m
4397  nl = n
4398  END IF
4399 *
4400  lda = max( 1, desca( m_ ) )
4401  ldx = max( 1, descx( m_ ) )
4402  ldy = max( 1, descy( m_ ) )
4403 *
4404 * Compute expected result in Y using data in A, X and Y.
4405 * Compute gauges in G. This part of the computation is performed
4406 * by every process in the grid.
4407 *
4408  ioffy = iy + ( jy - 1 ) * ldy
4409  DO 30 i = 1, ml
4410  ytmp = zero
4411  gtmp = zero
4412  ioffx = ix + ( jx - 1 ) * ldx
4413  IF( tran )THEN
4414  ioffa = ia + ( ja + i - 2 ) * lda
4415  DO 10 j = 1, nl
4416  ytmp = ytmp + a( ioffa ) * x( ioffx )
4417  gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4418  ioffa = ioffa + 1
4419  ioffx = ioffx + incx
4420  10 CONTINUE
4421  ELSE
4422  ioffa = ia + i - 1 + ( ja - 1 ) * lda
4423  DO 20 j = 1, nl
4424  ytmp = ytmp + a( ioffa ) * x( ioffx )
4425  gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4426  ioffa = ioffa + lda
4427  ioffx = ioffx + incx
4428  20 CONTINUE
4429  END IF
4430  g( i ) = abs( alpha ) * gtmp + abs( tbeta * y( ioffy ) )
4431  y( ioffy ) = alpha * ytmp + tbeta * y( ioffy )
4432  ioffy = ioffy + incy
4433  30 CONTINUE
4434 *
4435 * Compute the error ratio for this result.
4436 *
4437  err = zero
4438  info = 0
4439  ldpy = descy( lld_ )
4440  ioffy = iy + ( jy - 1 ) * ldy
4441  CALL pb_infog2l( iy, jy, descy, nprow, npcol, myrow, mycol, iiy,
4442  $ jjy, iyrow, iycol )
4443  icurrow = iyrow
4444  icurcol = iycol
4445  rowrep = ( iyrow.EQ.-1 )
4446  colrep = ( iycol.EQ.-1 )
4447 *
4448  IF( incy.EQ.descy( m_ ) ) THEN
4449 *
4450 * sub( Y ) is a row vector
4451 *
4452  jb = descy( inb_ ) - jy + 1
4453  IF( jb.LE.0 )
4454  $ jb = ( ( -jb ) / descy( nb_ ) + 1 ) * descy( nb_ ) + jb
4455  jb = min( jb, ml )
4456  jn = jy + jb - 1
4457 *
4458  DO 50 j = jy, jn
4459 *
4460  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4461  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4462  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4463  IF( g( j-jy+1 ).NE.zero )
4464  $ erri = erri / g( j-jy+1 )
4465  err = max( err, erri )
4466  IF( err*sqrt( eps ).GE.one )
4467  $ info = 1
4468  jjy = jjy + 1
4469  END IF
4470 *
4471  ioffy = ioffy + incy
4472 *
4473  50 CONTINUE
4474 *
4475  icurcol = mod( icurcol+1, npcol )
4476 *
4477  DO 70 j = jn+1, jy+ml-1, descy( nb_ )
4478  jb = min( jy+ml-j, descy( nb_ ) )
4479 *
4480  DO 60 kk = 0, jb-1
4481 *
4482  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4483  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4484  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4485  IF( g( j+kk-jy+1 ).NE.zero )
4486  $ erri = erri / g( j+kk-jy+1 )
4487  err = max( err, erri )
4488  IF( err*sqrt( eps ).GE.one )
4489  $ info = 1
4490  jjy = jjy + 1
4491  END IF
4492 *
4493  ioffy = ioffy + incy
4494 *
4495  60 CONTINUE
4496 *
4497  icurcol = mod( icurcol+1, npcol )
4498 *
4499  70 CONTINUE
4500 *
4501  ELSE
4502 *
4503 * sub( Y ) is a column vector
4504 *
4505  ib = descy( imb_ ) - iy + 1
4506  IF( ib.LE.0 )
4507  $ ib = ( ( -ib ) / descy( mb_ ) + 1 ) * descy( mb_ ) + ib
4508  ib = min( ib, ml )
4509  in = iy + ib - 1
4510 *
4511  DO 80 i = iy, in
4512 *
4513  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4514  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4515  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4516  IF( g( i-iy+1 ).NE.zero )
4517  $ erri = erri / g( i-iy+1 )
4518  err = max( err, erri )
4519  IF( err*sqrt( eps ).GE.one )
4520  $ info = 1
4521  iiy = iiy + 1
4522  END IF
4523 *
4524  ioffy = ioffy + incy
4525 *
4526  80 CONTINUE
4527 *
4528  icurrow = mod( icurrow+1, nprow )
4529 *
4530  DO 100 i = in+1, iy+ml-1, descy( mb_ )
4531  ib = min( iy+ml-i, descy( mb_ ) )
4532 *
4533  DO 90 kk = 0, ib-1
4534 *
4535  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4536  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4537  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4538  IF( g( i+kk-iy+1 ).NE.zero )
4539  $ erri = erri / g( i+kk-iy+1 )
4540  err = max( err, erri )
4541  IF( err*sqrt( eps ).GE.one )
4542  $ info = 1
4543  iiy = iiy + 1
4544  END IF
4545 *
4546  ioffy = ioffy + incy
4547 *
4548  90 CONTINUE
4549 *
4550  icurrow = mod( icurrow+1, nprow )
4551 *
4552  100 CONTINUE
4553 *
4554  END IF
4555 *
4556 * If INFO = 0, all results are at least half accurate.
4557 *
4558  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4559  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4560  $ mycol )
4561 *
4562  RETURN
4563 *
4564 * End of PSMVCH
4565 *
4566  END
4567  SUBROUTINE psvmch( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4568  $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA,
4569  $ DESCA, G, ERR, INFO )
4571 * -- PBLAS test routine (version 2.0) --
4572 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4573 * and University of California, Berkeley.
4574 * April 1, 1998
4575 *
4576 * .. Scalar Arguments ..
4577  CHARACTER*1 UPLO
4578  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4579  $ JY, M, N
4580  REAL ALPHA, ERR
4581 * ..
4582 * .. Array Arguments ..
4583  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4584  REAL A( * ), G( * ), PA( * ), X( * ), Y( * )
4585 * ..
4586 *
4587 * Purpose
4588 * =======
4589 *
4590 * PSVMCH checks the results of the computational tests.
4591 *
4592 * Notes
4593 * =====
4594 *
4595 * A description vector is associated with each 2D block-cyclicly dis-
4596 * tributed matrix. This vector stores the information required to
4597 * establish the mapping between a matrix entry and its corresponding
4598 * process and memory location.
4599 *
4600 * In the following comments, the character _ should be read as
4601 * "of the distributed matrix". Let A be a generic term for any 2D
4602 * block cyclicly distributed matrix. Its description vector is DESCA:
4603 *
4604 * NOTATION STORED IN EXPLANATION
4605 * ---------------- --------------- ------------------------------------
4606 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4607 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4608 * the NPROW x NPCOL BLACS process grid
4609 * A is distributed over. The context
4610 * itself is global, but the handle
4611 * (the integer value) may vary.
4612 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4613 * ted matrix A, M_A >= 0.
4614 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4615 * buted matrix A, N_A >= 0.
4616 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4617 * block of the matrix A, IMB_A > 0.
4618 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4619 * left block of the matrix A,
4620 * INB_A > 0.
4621 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4622 * bute the last M_A-IMB_A rows of A,
4623 * MB_A > 0.
4624 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4625 * bute the last N_A-INB_A columns of
4626 * A, NB_A > 0.
4627 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4628 * row of the matrix A is distributed,
4629 * NPROW > RSRC_A >= 0.
4630 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4631 * first column of A is distributed.
4632 * NPCOL > CSRC_A >= 0.
4633 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4634 * array storing the local blocks of
4635 * the distributed matrix A,
4636 * IF( Lc( 1, N_A ) > 0 )
4637 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4638 * ELSE
4639 * LLD_A >= 1.
4640 *
4641 * Let K be the number of rows of a matrix A starting at the global in-
4642 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4643 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4644 * receive if these K rows were distributed over NPROW processes. If K
4645 * is the number of columns of a matrix A starting at the global index
4646 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4647 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4648 * these K columns were distributed over NPCOL processes.
4649 *
4650 * The values of Lr() and Lc() may be determined via a call to the func-
4651 * tion PB_NUMROC:
4652 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4653 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4654 *
4655 * Arguments
4656 * =========
4657 *
4658 * ICTXT (local input) INTEGER
4659 * On entry, ICTXT specifies the BLACS context handle, indica-
4660 * ting the global context of the operation. The context itself
4661 * is global, but the value of ICTXT is local.
4662 *
4663 * UPLO (global input) CHARACTER*1
4664 * On entry, UPLO specifies which part of the submatrix sub( A )
4665 * is to be referenced as follows:
4666 * If UPLO = 'L', only the lower triangular part,
4667 * If UPLO = 'U', only the upper triangular part,
4668 * else the entire matrix is to be referenced.
4669 *
4670 * M (global input) INTEGER
4671 * On entry, M specifies the number of rows of the submatrix
4672 * operand matrix A. M must be at least zero.
4673 *
4674 * N (global input) INTEGER
4675 * On entry, N specifies the number of columns of the subma-
4676 * trix operand matrix A. N must be at least zero.
4677 *
4678 * ALPHA (global input) REAL
4679 * On entry, ALPHA specifies the scalar alpha.
4680 *
4681 * X (local input) REAL array
4682 * On entry, X is an array of dimension (DESCX( M_ ),*). This
4683 * array contains a local copy of the initial entire matrix PX.
4684 *
4685 * IX (global input) INTEGER
4686 * On entry, IX specifies X's global row index, which points to
4687 * the beginning of the submatrix sub( X ).
4688 *
4689 * JX (global input) INTEGER
4690 * On entry, JX specifies X's global column index, which points
4691 * to the beginning of the submatrix sub( X ).
4692 *
4693 * DESCX (global and local input) INTEGER array
4694 * On entry, DESCX is an integer array of dimension DLEN_. This
4695 * is the array descriptor for the matrix X.
4696 *
4697 * INCX (global input) INTEGER
4698 * On entry, INCX specifies the global increment for the
4699 * elements of X. Only two values of INCX are supported in
4700 * this version, namely 1 and M_X. INCX must not be zero.
4701 *
4702 * Y (local input) REAL array
4703 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
4704 * array contains a local copy of the initial entire matrix PY.
4705 *
4706 * IY (global input) INTEGER
4707 * On entry, IY specifies Y's global row index, which points to
4708 * the beginning of the submatrix sub( Y ).
4709 *
4710 * JY (global input) INTEGER
4711 * On entry, JY specifies Y's global column index, which points
4712 * to the beginning of the submatrix sub( Y ).
4713 *
4714 * DESCY (global and local input) INTEGER array
4715 * On entry, DESCY is an integer array of dimension DLEN_. This
4716 * is the array descriptor for the matrix Y.
4717 *
4718 * INCY (global input) INTEGER
4719 * On entry, INCY specifies the global increment for the
4720 * elements of Y. Only two values of INCY are supported in
4721 * this version, namely 1 and M_Y. INCY must not be zero.
4722 *
4723 * A (local input/local output) REAL array
4724 * On entry, A is an array of dimension (DESCA( M_ ),*). This
4725 * array contains a local copy of the initial entire matrix PA.
4726 *
4727 * PA (local input) REAL array
4728 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
4729 * array contains the local entries of the matrix PA.
4730 *
4731 * IA (global input) INTEGER
4732 * On entry, IA specifies A's global row index, which points to
4733 * the beginning of the submatrix sub( A ).
4734 *
4735 * JA (global input) INTEGER
4736 * On entry, JA specifies A's global column index, which points
4737 * to the beginning of the submatrix sub( A ).
4738 *
4739 * DESCA (global and local input) INTEGER array
4740 * On entry, DESCA is an integer array of dimension DLEN_. This
4741 * is the array descriptor for the matrix A.
4742 *
4743 * G (workspace) REAL array
4744 * On entry, G is an array of dimension at least MAX( M, N ). G
4745 * is used to compute the gauges.
4746 *
4747 * ERR (global output) REAL
4748 * On exit, ERR specifies the largest error in absolute value.
4749 *
4750 * INFO (global output) INTEGER
4751 * On exit, if INFO <> 0, the result is less than half accurate.
4752 *
4753 * -- Written on April 1, 1998 by
4754 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4755 *
4756 * =====================================================================
4757 *
4758 * .. Parameters ..
4759  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4760  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4761  $ RSRC_
4762  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4763  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4764  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4765  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4766  REAL ZERO, ONE
4767  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
4768 * ..
4769 * .. Local Scalars ..
4770  LOGICAL COLREP, LOWER, ROWREP, UPPER
4771  INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
4772  $ in, ioffa, ioffx, ioffy, j, jja, kk, lda, ldpa,
4773  $ ldx, ldy, mycol, myrow, npcol, nprow
4774  REAL ATMP, EPS, ERRI, GTMP
4775 * ..
4776 * .. External Subroutines ..
4777  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4778 * ..
4779 * .. External Functions ..
4780  LOGICAL LSAME
4781  REAL PSLAMCH
4782  EXTERNAL LSAME, PSLAMCH
4783 * ..
4784 * .. Intrinsic Functions ..
4785  INTRINSIC abs, max, min, mod, sqrt
4786 * ..
4787 * .. Executable Statements ..
4788 *
4789  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4790 *
4791  eps = pslamch( ictxt, 'eps' )
4792 *
4793  upper = lsame( uplo, 'U' )
4794  lower = lsame( uplo, 'L' )
4795 *
4796  lda = max( 1, desca( m_ ) )
4797  ldx = max( 1, descx( m_ ) )
4798  ldy = max( 1, descy( m_ ) )
4799 *
4800 * Compute expected result in A using data in A, X and Y.
4801 * Compute gauges in G. This part of the computation is performed
4802 * by every process in the grid.
4803 *
4804  DO 70 j = 1, n
4805 *
4806  ioffy = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
4807 *
4808  IF( lower ) THEN
4809  ibeg = j
4810  iend = m
4811  DO 10 i = 1, j-1
4812  g( i ) = zero
4813  10 CONTINUE
4814  ELSE IF( upper ) THEN
4815  ibeg = 1
4816  iend = j
4817  DO 20 i = j+1, m
4818  g( i ) = zero
4819  20 CONTINUE
4820  ELSE
4821  ibeg = 1
4822  iend = m
4823  END IF
4824 *
4825  DO 30 i = ibeg, iend
4826 *
4827  ioffx = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
4828  ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
4829  atmp = x( ioffx ) * y( ioffy )
4830  gtmp = abs( x( ioffx ) * y( ioffy ) )
4831  g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
4832  a( ioffa ) = alpha * atmp + a( ioffa )
4833 *
4834  30 CONTINUE
4835 *
4836 * Compute the error ratio for this result.
4837 *
4838  info = 0
4839  err = zero
4840  ldpa = desca( lld_ )
4841  ioffa = ia + ( ja + j - 2 ) * lda
4842  CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
4843  $ iia, jja, iarow, iacol )
4844  rowrep = ( iarow.EQ.-1 )
4845  colrep = ( iacol.EQ.-1 )
4846 *
4847  IF( mycol.EQ.iacol .OR. colrep ) THEN
4848 *
4849  icurrow = iarow
4850  ib = desca( imb_ ) - ia + 1
4851  IF( ib.LE.0 )
4852  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
4853  ib = min( ib, m )
4854  in = ia + ib - 1
4855 *
4856  DO 40 i = ia, in
4857 *
4858  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4859  erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
4860  IF( g( i-ia+1 ).NE.zero )
4861  $ erri = erri / g( i-ia+1 )
4862  err = max( err, erri )
4863  IF( err*sqrt( eps ).GE.one )
4864  $ info = 1
4865  iia = iia + 1
4866  END IF
4867 *
4868  ioffa = ioffa + 1
4869 *
4870  40 CONTINUE
4871 *
4872  icurrow = mod( icurrow+1, nprow )
4873 *
4874  DO 60 i = in+1, ia+m-1, desca( mb_ )
4875  ib = min( ia+m-i, desca( mb_ ) )
4876 *
4877  DO 50 kk = 0, ib-1
4878 *
4879  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4880  erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
4881  IF( g( i+kk-ia+1 ).NE.zero )
4882  $ erri = erri / g( i+kk-ia+1 )
4883  err = max( err, erri )
4884  IF( err*sqrt( eps ).GE.one )
4885  $ info = 1
4886  iia = iia + 1
4887  END IF
4888 *
4889  ioffa = ioffa + 1
4890 *
4891  50 CONTINUE
4892 *
4893  icurrow = mod( icurrow+1, nprow )
4894 *
4895  60 CONTINUE
4896 *
4897  END IF
4898 *
4899 * If INFO = 0, all results are at least half accurate.
4900 *
4901  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4902  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4903  $ mycol )
4904  IF( info.NE.0 )
4905  $ GO TO 80
4906 *
4907  70 CONTINUE
4908 *
4909  80 CONTINUE
4910 *
4911  RETURN
4912 *
4913 * End of PSVMCH
4914 *
4915  END
4916  SUBROUTINE psvmch2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4917  $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA,
4918  $ JA, DESCA, G, ERR, INFO )
4920 * -- PBLAS test routine (version 2.0) --
4921 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4922 * and University of California, Berkeley.
4923 * April 1, 1998
4924 *
4925 * .. Scalar Arguments ..
4926  CHARACTER*1 UPLO
4927  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4928  $ jy, m, n
4929  REAL ALPHA, ERR
4930 * ..
4931 * .. Array Arguments ..
4932  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4933  REAL A( * ), G( * ), PA( * ), X( * ), Y( * )
4934 * ..
4935 *
4936 * Purpose
4937 * =======
4938 *
4939 * PSVMCH2 checks the results of the computational tests.
4940 *
4941 * Notes
4942 * =====
4943 *
4944 * A description vector is associated with each 2D block-cyclicly dis-
4945 * tributed matrix. This vector stores the information required to
4946 * establish the mapping between a matrix entry and its corresponding
4947 * process and memory location.
4948 *
4949 * In the following comments, the character _ should be read as
4950 * "of the distributed matrix". Let A be a generic term for any 2D
4951 * block cyclicly distributed matrix. Its description vector is DESCA:
4952 *
4953 * NOTATION STORED IN EXPLANATION
4954 * ---------------- --------------- ------------------------------------
4955 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4956 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4957 * the NPROW x NPCOL BLACS process grid
4958 * A is distributed over. The context
4959 * itself is global, but the handle
4960 * (the integer value) may vary.
4961 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4962 * ted matrix A, M_A >= 0.
4963 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4964 * buted matrix A, N_A >= 0.
4965 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4966 * block of the matrix A, IMB_A > 0.
4967 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4968 * left block of the matrix A,
4969 * INB_A > 0.
4970 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4971 * bute the last M_A-IMB_A rows of A,
4972 * MB_A > 0.
4973 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4974 * bute the last N_A-INB_A columns of
4975 * A, NB_A > 0.
4976 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4977 * row of the matrix A is distributed,
4978 * NPROW > RSRC_A >= 0.
4979 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4980 * first column of A is distributed.
4981 * NPCOL > CSRC_A >= 0.
4982 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4983 * array storing the local blocks of
4984 * the distributed matrix A,
4985 * IF( Lc( 1, N_A ) > 0 )
4986 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4987 * ELSE
4988 * LLD_A >= 1.
4989 *
4990 * Let K be the number of rows of a matrix A starting at the global in-
4991 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4992 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4993 * receive if these K rows were distributed over NPROW processes. If K
4994 * is the number of columns of a matrix A starting at the global index
4995 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4996 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4997 * these K columns were distributed over NPCOL processes.
4998 *
4999 * The values of Lr() and Lc() may be determined via a call to the func-
5000 * tion PB_NUMROC:
5001 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5002 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5003 *
5004 * Arguments
5005 * =========
5006 *
5007 * ICTXT (local input) INTEGER
5008 * On entry, ICTXT specifies the BLACS context handle, indica-
5009 * ting the global context of the operation. The context itself
5010 * is global, but the value of ICTXT is local.
5011 *
5012 * UPLO (global input) CHARACTER*1
5013 * On entry, UPLO specifies which part of the submatrix sub( A )
5014 * is to be referenced as follows:
5015 * If UPLO = 'L', only the lower triangular part,
5016 * If UPLO = 'U', only the upper triangular part,
5017 * else the entire matrix is to be referenced.
5018 *
5019 * M (global input) INTEGER
5020 * On entry, M specifies the number of rows of the submatrix
5021 * operand matrix A. M must be at least zero.
5022 *
5023 * N (global input) INTEGER
5024 * On entry, N specifies the number of columns of the subma-
5025 * trix operand matrix A. N must be at least zero.
5026 *
5027 * ALPHA (global input) REAL
5028 * On entry, ALPHA specifies the scalar alpha.
5029 *
5030 * X (local input) REAL array
5031 * On entry, X is an array of dimension (DESCX( M_ ),*). This
5032 * array contains a local copy of the initial entire matrix PX.
5033 *
5034 * IX (global input) INTEGER
5035 * On entry, IX specifies X's global row index, which points to
5036 * the beginning of the submatrix sub( X ).
5037 *
5038 * JX (global input) INTEGER
5039 * On entry, JX specifies X's global column index, which points
5040 * to the beginning of the submatrix sub( X ).
5041 *
5042 * DESCX (global and local input) INTEGER array
5043 * On entry, DESCX is an integer array of dimension DLEN_. This
5044 * is the array descriptor for the matrix X.
5045 *
5046 * INCX (global input) INTEGER
5047 * On entry, INCX specifies the global increment for the
5048 * elements of X. Only two values of INCX are supported in
5049 * this version, namely 1 and M_X. INCX must not be zero.
5050 *
5051 * Y (local input) REAL array
5052 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
5053 * array contains a local copy of the initial entire matrix PY.
5054 *
5055 * IY (global input) INTEGER
5056 * On entry, IY specifies Y's global row index, which points to
5057 * the beginning of the submatrix sub( Y ).
5058 *
5059 * JY (global input) INTEGER
5060 * On entry, JY specifies Y's global column index, which points
5061 * to the beginning of the submatrix sub( Y ).
5062 *
5063 * DESCY (global and local input) INTEGER array
5064 * On entry, DESCY is an integer array of dimension DLEN_. This
5065 * is the array descriptor for the matrix Y.
5066 *
5067 * INCY (global input) INTEGER
5068 * On entry, INCY specifies the global increment for the
5069 * elements of Y. Only two values of INCY are supported in
5070 * this version, namely 1 and M_Y. INCY must not be zero.
5071 *
5072 * A (local input/local output) REAL array
5073 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5074 * array contains a local copy of the initial entire matrix PA.
5075 *
5076 * PA (local input) REAL array
5077 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
5078 * array contains the local entries of the matrix PA.
5079 *
5080 * IA (global input) INTEGER
5081 * On entry, IA specifies A's global row index, which points to
5082 * the beginning of the submatrix sub( A ).
5083 *
5084 * JA (global input) INTEGER
5085 * On entry, JA specifies A's global column index, which points
5086 * to the beginning of the submatrix sub( A ).
5087 *
5088 * DESCA (global and local input) INTEGER array
5089 * On entry, DESCA is an integer array of dimension DLEN_. This
5090 * is the array descriptor for the matrix A.
5091 *
5092 * G (workspace) REAL array
5093 * On entry, G is an array of dimension at least MAX( M, N ). G
5094 * is used to compute the gauges.
5095 *
5096 * ERR (global output) REAL
5097 * On exit, ERR specifies the largest error in absolute value.
5098 *
5099 * INFO (global output) INTEGER
5100 * On exit, if INFO <> 0, the result is less than half accurate.
5101 *
5102 * -- Written on April 1, 1998 by
5103 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5104 *
5105 * =====================================================================
5106 *
5107 * .. Parameters ..
5108  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5109  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5110  $ RSRC_
5111  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5112  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5113  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5114  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5115  REAL ZERO, ONE
5116  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5117 * ..
5118 * .. Local Scalars ..
5119  LOGICAL COLREP, LOWER, ROWREP, UPPER
5120  INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
5121  $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J,
5122  $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW,
5123  $ npcol, nprow
5124  REAL EPS, ERRI, GTMP, ATMP
5125 * ..
5126 * .. External Subroutines ..
5127  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5128 * ..
5129 * .. External Functions ..
5130  LOGICAL LSAME
5131  REAL PSLAMCH
5132  EXTERNAL lsame, pslamch
5133 * ..
5134 * .. Intrinsic Functions ..
5135  INTRINSIC abs, max, min, mod, sqrt
5136 * ..
5137 * .. Executable Statements ..
5138 *
5139  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5140 *
5141  eps = pslamch( ictxt, 'eps' )
5142 *
5143  upper = lsame( uplo, 'U' )
5144  lower = lsame( uplo, 'L' )
5145 *
5146  lda = max( 1, desca( m_ ) )
5147  ldx = max( 1, descx( m_ ) )
5148  ldy = max( 1, descy( m_ ) )
5149 *
5150 * Compute expected result in A using data in A, X and Y.
5151 * Compute gauges in G. This part of the computation is performed
5152 * by every process in the grid.
5153 *
5154  DO 70 j = 1, n
5155 *
5156  ioffxj = ix + ( jx - 1 ) * ldx + ( j - 1 ) * incx
5157  ioffyj = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
5158 *
5159  IF( lower ) THEN
5160  ibeg = j
5161  iend = m
5162  DO 10 i = 1, j-1
5163  g( i ) = zero
5164  10 CONTINUE
5165  ELSE IF( upper ) THEN
5166  ibeg = 1
5167  iend = j
5168  DO 20 i = j+1, m
5169  g( i ) = zero
5170  20 CONTINUE
5171  ELSE
5172  ibeg = 1
5173  iend = m
5174  END IF
5175 *
5176  DO 30 i = ibeg, iend
5177  ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
5178  ioffxi = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
5179  ioffyi = iy + ( jy - 1 ) * ldy + ( i - 1 ) * incy
5180  atmp = x( ioffxi ) * y( ioffyj )
5181  atmp = atmp + y( ioffyi ) * x( ioffxj )
5182  gtmp = abs( x( ioffxi ) * y( ioffyj ) )
5183  gtmp = gtmp + abs( y( ioffyi ) * x( ioffxj ) )
5184  g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
5185  a( ioffa ) = alpha*atmp + a( ioffa )
5186 *
5187  30 CONTINUE
5188 *
5189 * Compute the error ratio for this result.
5190 *
5191  info = 0
5192  err = zero
5193  ldpa = desca( lld_ )
5194  ioffa = ia + ( ja + j - 2 ) * lda
5195  CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
5196  $ iia, jja, iarow, iacol )
5197  rowrep = ( iarow.EQ.-1 )
5198  colrep = ( iacol.EQ.-1 )
5199 *
5200  IF( mycol.EQ.iacol .OR. colrep ) THEN
5201 *
5202  icurrow = iarow
5203  ib = desca( imb_ ) - ia + 1
5204  IF( ib.LE.0 )
5205  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
5206  ib = min( ib, m )
5207  in = ia + ib - 1
5208 *
5209  DO 40 i = ia, in
5210 *
5211  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5212  erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
5213  IF( g( i-ia+1 ).NE.zero )
5214  $ erri = erri / g( i-ia+1 )
5215  err = max( err, erri )
5216  IF( err*sqrt( eps ).GE.one )
5217  $ info = 1
5218  iia = iia + 1
5219  END IF
5220 *
5221  ioffa = ioffa + 1
5222 *
5223  40 CONTINUE
5224 *
5225  icurrow = mod( icurrow+1, nprow )
5226 *
5227  DO 60 i = in+1, ia+m-1, desca( mb_ )
5228  ib = min( ia+m-i, desca( mb_ ) )
5229 *
5230  DO 50 kk = 0, ib-1
5231 *
5232  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5233  erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
5234  IF( g( i+kk-ia+1 ).NE.zero )
5235  $ erri = erri / g( i+kk-ia+1 )
5236  err = max( err, erri )
5237  IF( err*sqrt( eps ).GE.one )
5238  $ info = 1
5239  iia = iia + 1
5240  END IF
5241 *
5242  ioffa = ioffa + 1
5243 *
5244  50 CONTINUE
5245 *
5246  icurrow = mod( icurrow+1, nprow )
5247 *
5248  60 CONTINUE
5249 *
5250  END IF
5251 *
5252 * If INFO = 0, all results are at least half accurate.
5253 *
5254  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5255  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5256  $ mycol )
5257  IF( info.NE.0 )
5258  $ GO TO 80
5259 *
5260  70 CONTINUE
5261 *
5262  80 CONTINUE
5263 *
5264  RETURN
5265 *
5266 * End of PSVMCH2
5267 *
5268  END
5269  SUBROUTINE psmmch( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA,
5270  $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5271  $ JC, DESCC, CT, G, ERR, INFO )
5273 * -- PBLAS test routine (version 2.0) --
5274 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5275 * and University of California, Berkeley.
5276 * April 1, 1998
5277 *
5278 * .. Scalar Arguments ..
5279  CHARACTER*1 TRANSA, TRANSB
5280  INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5281  REAL ALPHA, BETA, ERR
5282 * ..
5283 * .. Array Arguments ..
5284  INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5285  REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
5286  $ PC( * )
5287 * ..
5288 *
5289 * Purpose
5290 * =======
5291 *
5292 * PSMMCH checks the results of the computational tests.
5293 *
5294 * Notes
5295 * =====
5296 *
5297 * A description vector is associated with each 2D block-cyclicly dis-
5298 * tributed matrix. This vector stores the information required to
5299 * establish the mapping between a matrix entry and its corresponding
5300 * process and memory location.
5301 *
5302 * In the following comments, the character _ should be read as
5303 * "of the distributed matrix". Let A be a generic term for any 2D
5304 * block cyclicly distributed matrix. Its description vector is DESCA:
5305 *
5306 * NOTATION STORED IN EXPLANATION
5307 * ---------------- --------------- ------------------------------------
5308 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5309 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5310 * the NPROW x NPCOL BLACS process grid
5311 * A is distributed over. The context
5312 * itself is global, but the handle
5313 * (the integer value) may vary.
5314 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
5315 * ted matrix A, M_A >= 0.
5316 * N_A (global) DESCA( N_ ) The number of columns in the distri-
5317 * buted matrix A, N_A >= 0.
5318 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5319 * block of the matrix A, IMB_A > 0.
5320 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
5321 * left block of the matrix A,
5322 * INB_A > 0.
5323 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5324 * bute the last M_A-IMB_A rows of A,
5325 * MB_A > 0.
5326 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5327 * bute the last N_A-INB_A columns of
5328 * A, NB_A > 0.
5329 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5330 * row of the matrix A is distributed,
5331 * NPROW > RSRC_A >= 0.
5332 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5333 * first column of A is distributed.
5334 * NPCOL > CSRC_A >= 0.
5335 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5336 * array storing the local blocks of
5337 * the distributed matrix A,
5338 * IF( Lc( 1, N_A ) > 0 )
5339 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
5340 * ELSE
5341 * LLD_A >= 1.
5342 *
5343 * Let K be the number of rows of a matrix A starting at the global in-
5344 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5345 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5346 * receive if these K rows were distributed over NPROW processes. If K
5347 * is the number of columns of a matrix A starting at the global index
5348 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5349 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5350 * these K columns were distributed over NPCOL processes.
5351 *
5352 * The values of Lr() and Lc() may be determined via a call to the func-
5353 * tion PB_NUMROC:
5354 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5355 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5356 *
5357 * Arguments
5358 * =========
5359 *
5360 * ICTXT (local input) INTEGER
5361 * On entry, ICTXT specifies the BLACS context handle, indica-
5362 * ting the global context of the operation. The context itself
5363 * is global, but the value of ICTXT is local.
5364 *
5365 * TRANSA (global input) CHARACTER*1
5366 * On entry, TRANSA specifies if the matrix operand A is to be
5367 * transposed.
5368 *
5369 * TRANSB (global input) CHARACTER*1
5370 * On entry, TRANSB specifies if the matrix operand B is to be
5371 * transposed.
5372 *
5373 * M (global input) INTEGER
5374 * On entry, M specifies the number of rows of C.
5375 *
5376 * N (global input) INTEGER
5377 * On entry, N specifies the number of columns of C.
5378 *
5379 * K (global input) INTEGER
5380 * On entry, K specifies the number of columns (resp. rows) of A
5381 * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5382 * PxSYR2K, PxHERK and PxHER2K.
5383 *
5384 * ALPHA (global input) REAL
5385 * On entry, ALPHA specifies the scalar alpha.
5386 *
5387 * A (local input) REAL array
5388 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5389 * array contains a local copy of the initial entire matrix PA.
5390 *
5391 * IA (global input) INTEGER
5392 * On entry, IA specifies A's global row index, which points to
5393 * the beginning of the submatrix sub( A ).
5394 *
5395 * JA (global input) INTEGER
5396 * On entry, JA specifies A's global column index, which points
5397 * to the beginning of the submatrix sub( A ).
5398 *
5399 * DESCA (global and local input) INTEGER array
5400 * On entry, DESCA is an integer array of dimension DLEN_. This
5401 * is the array descriptor for the matrix A.
5402 *
5403 * B (local input) REAL array
5404 * On entry, B is an array of dimension (DESCB( M_ ),*). This
5405 * array contains a local copy of the initial entire matrix PB.
5406 *
5407 * IB (global input) INTEGER
5408 * On entry, IB specifies B's global row index, which points to
5409 * the beginning of the submatrix sub( B ).
5410 *
5411 * JB (global input) INTEGER
5412 * On entry, JB specifies B's global column index, which points
5413 * to the beginning of the submatrix sub( B ).
5414 *
5415 * DESCB (global and local input) INTEGER array
5416 * On entry, DESCB is an integer array of dimension DLEN_. This
5417 * is the array descriptor for the matrix B.
5418 *
5419 * BETA (global input) REAL
5420 * On entry, BETA specifies the scalar beta.
5421 *
5422 * C (local input/local output) REAL array
5423 * On entry, C is an array of dimension (DESCC( M_ ),*). This
5424 * array contains a local copy of the initial entire matrix PC.
5425 *
5426 * PC (local input) REAL array
5427 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5428 * array contains the local pieces of the matrix PC.
5429 *
5430 * IC (global input) INTEGER
5431 * On entry, IC specifies C's global row index, which points to
5432 * the beginning of the submatrix sub( C ).
5433 *
5434 * JC (global input) INTEGER
5435 * On entry, JC specifies C's global column index, which points
5436 * to the beginning of the submatrix sub( C ).
5437 *
5438 * DESCC (global and local input) INTEGER array
5439 * On entry, DESCC is an integer array of dimension DLEN_. This
5440 * is the array descriptor for the matrix C.
5441 *
5442 * CT (workspace) REAL array
5443 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
5444 * holds a copy of the current column of C.
5445 *
5446 * G (workspace) REAL array
5447 * On entry, G is an array of dimension at least MAX(M,N,K). G
5448 * is used to compute the gauges.
5449 *
5450 * ERR (global output) REAL
5451 * On exit, ERR specifies the largest error in absolute value.
5452 *
5453 * INFO (global output) INTEGER
5454 * On exit, if INFO <> 0, the result is less than half accurate.
5455 *
5456 * -- Written on April 1, 1998 by
5457 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5458 *
5459 * =====================================================================
5460 *
5461 * .. Parameters ..
5462  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5463  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5464  $ RSRC_
5465  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5466  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5467  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5468  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5469  REAL ZERO, ONE
5470  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5471 * ..
5472 * .. Local Scalars ..
5473  LOGICAL COLREP, ROWREP, TRANA, TRANB
5474  INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5475  $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5476  $ mycol, myrow, npcol, nprow
5477  REAL EPS, ERRI
5478 * ..
5479 * .. External Subroutines ..
5480  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5481 * ..
5482 * .. External Functions ..
5483  LOGICAL LSAME
5484  REAL PSLAMCH
5485  EXTERNAL LSAME, PSLAMCH
5486 * ..
5487 * .. Intrinsic Functions ..
5488  INTRINSIC abs, max, min, mod, sqrt
5489 * ..
5490 * .. Executable Statements ..
5491 *
5492  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5493 *
5494  eps = pslamch( ictxt, 'eps' )
5495 *
5496  trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5497  tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5498 *
5499  lda = max( 1, desca( m_ ) )
5500  ldb = max( 1, descb( m_ ) )
5501  ldc = max( 1, descc( m_ ) )
5502 *
5503 * Compute expected result in C using data in A, B and C.
5504 * Compute gauges in G. This part of the computation is performed
5505 * by every process in the grid.
5506 *
5507  DO 240 j = 1, n
5508 *
5509  ioffc = ic + ( jc + j - 2 ) * ldc
5510  DO 10 i = 1, m
5511  ct( i ) = zero
5512  g( i ) = zero
5513  10 CONTINUE
5514 *
5515  IF( .NOT.trana .AND. .NOT.tranb ) THEN
5516  DO 30 kk = 1, k
5517  ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5518  DO 20 i = 1, m
5519  ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5520  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5521  g( i ) = g( i ) + abs( a( ioffa ) ) *
5522  $ abs( b( ioffb ) )
5523  20 CONTINUE
5524  30 CONTINUE
5525  ELSE IF( trana .AND. .NOT.tranb ) THEN
5526  DO 50 kk = 1, k
5527  ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5528  DO 40 i = 1, m
5529  ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5530  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5531  g( i ) = g( i ) + abs( a( ioffa ) ) *
5532  $ abs( b( ioffb ) )
5533  40 CONTINUE
5534  50 CONTINUE
5535  ELSE IF( .NOT.trana .AND. tranb ) THEN
5536  DO 70 kk = 1, k
5537  ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5538  DO 60 i = 1, m
5539  ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5540  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5541  g( i ) = g( i ) + abs( a( ioffa ) ) *
5542  $ abs( b( ioffb ) )
5543  60 CONTINUE
5544  70 CONTINUE
5545  ELSE IF( trana .AND. tranb ) THEN
5546  DO 90 kk = 1, k
5547  ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5548  DO 80 i = 1, m
5549  ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5550  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5551  g( i ) = g( i ) + abs( a( ioffa ) ) *
5552  $ abs( b( ioffb ) )
5553  80 CONTINUE
5554  90 CONTINUE
5555  END IF
5556 *
5557  DO 200 i = 1, m
5558  ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5559  g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5560  c( ioffc ) = ct( i )
5561  ioffc = ioffc + 1
5562  200 CONTINUE
5563 *
5564 * Compute the error ratio for this result.
5565 *
5566  err = zero
5567  info = 0
5568  ldpc = descc( lld_ )
5569  ioffc = ic + ( jc + j - 2 ) * ldc
5570  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5571  $ iic, jjc, icrow, iccol )
5572  icurrow = icrow
5573  rowrep = ( icrow.EQ.-1 )
5574  colrep = ( iccol.EQ.-1 )
5575 *
5576  IF( mycol.EQ.iccol .OR. colrep ) THEN
5577 *
5578  ibb = descc( imb_ ) - ic + 1
5579  IF( ibb.LE.0 )
5580  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5581  ibb = min( ibb, m )
5582  in = ic + ibb - 1
5583 *
5584  DO 210 i = ic, in
5585 *
5586  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5587  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5588  $ c( ioffc ) ) / eps
5589  IF( g( i-ic+1 ).NE.zero )
5590  $ erri = erri / g( i-ic+1 )
5591  err = max( err, erri )
5592  IF( err*sqrt( eps ).GE.one )
5593  $ info = 1
5594  iic = iic + 1
5595  END IF
5596 *
5597  ioffc = ioffc + 1
5598 *
5599  210 CONTINUE
5600 *
5601  icurrow = mod( icurrow+1, nprow )
5602 *
5603  DO 230 i = in+1, ic+m-1, descc( mb_ )
5604  ibb = min( ic+m-i, descc( mb_ ) )
5605 *
5606  DO 220 kk = 0, ibb-1
5607 *
5608  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5609  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5610  $ c( ioffc ) )/eps
5611  IF( g( i+kk-ic+1 ).NE.zero )
5612  $ erri = erri / g( i+kk-ic+1 )
5613  err = max( err, erri )
5614  IF( err*sqrt( eps ).GE.one )
5615  $ info = 1
5616  iic = iic + 1
5617  END IF
5618 *
5619  ioffc = ioffc + 1
5620 *
5621  220 CONTINUE
5622 *
5623  icurrow = mod( icurrow+1, nprow )
5624 *
5625  230 CONTINUE
5626 *
5627  END IF
5628 *
5629 * If INFO = 0, all results are at least half accurate.
5630 *
5631  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5632  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5633  $ mycol )
5634  IF( info.NE.0 )
5635  $ GO TO 250
5636 *
5637  240 CONTINUE
5638 *
5639  250 CONTINUE
5640 *
5641  RETURN
5642 *
5643 * End of PSMMCH
5644 *
5645  END
5646  SUBROUTINE psmmch1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5647  $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G,
5648  $ ERR, INFO )
5650 * -- PBLAS test routine (version 2.0) --
5651 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5652 * and University of California, Berkeley.
5653 * April 1, 1998
5654 *
5655 * .. Scalar Arguments ..
5656  CHARACTER*1 TRANS, UPLO
5657  INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5658  REAL ALPHA, BETA, ERR
5659 * ..
5660 * .. Array Arguments ..
5661  INTEGER DESCA( * ), DESCC( * )
5662  REAL A( * ), C( * ), CT( * ), G( * ), PC( * )
5663 * ..
5664 *
5665 * Purpose
5666 * =======
5667 *
5668 * PSMMCH1 checks the results of the computational tests.
5669 *
5670 * Notes
5671 * =====
5672 *
5673 * A description vector is associated with each 2D block-cyclicly dis-
5674 * tributed matrix. This vector stores the information required to
5675 * establish the mapping between a matrix entry and its corresponding
5676 * process and memory location.
5677 *
5678 * In the following comments, the character _ should be read as
5679 * "of the distributed matrix". Let A be a generic term for any 2D
5680 * block cyclicly distributed matrix. Its description vector is DESCA:
5681 *
5682 * NOTATION STORED IN EXPLANATION
5683 * ---------------- --------------- ------------------------------------
5684 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5685 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5686 * the NPROW x NPCOL BLACS process grid
5687 * A is distributed over. The context
5688 * itself is global, but the handle
5689 * (the integer value) may vary.
5690 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
5691 * ted matrix A, M_A >= 0.
5692 * N_A (global) DESCA( N_ ) The number of columns in the distri-
5693 * buted matrix A, N_A >= 0.
5694 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5695 * block of the matrix A, IMB_A > 0.
5696 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
5697 * left block of the matrix A,
5698 * INB_A > 0.
5699 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5700 * bute the last M_A-IMB_A rows of A,
5701 * MB_A > 0.
5702 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5703 * bute the last N_A-INB_A columns of
5704 * A, NB_A > 0.
5705 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5706 * row of the matrix A is distributed,
5707 * NPROW > RSRC_A >= 0.
5708 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5709 * first column of A is distributed.
5710 * NPCOL > CSRC_A >= 0.
5711 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5712 * array storing the local blocks of
5713 * the distributed matrix A,
5714 * IF( Lc( 1, N_A ) > 0 )
5715 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
5716 * ELSE
5717 * LLD_A >= 1.
5718 *
5719 * Let K be the number of rows of a matrix A starting at the global in-
5720 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5721 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5722 * receive if these K rows were distributed over NPROW processes. If K
5723 * is the number of columns of a matrix A starting at the global index
5724 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5725 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5726 * these K columns were distributed over NPCOL processes.
5727 *
5728 * The values of Lr() and Lc() may be determined via a call to the func-
5729 * tion PB_NUMROC:
5730 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5731 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5732 *
5733 * Arguments
5734 * =========
5735 *
5736 * ICTXT (local input) INTEGER
5737 * On entry, ICTXT specifies the BLACS context handle, indica-
5738 * ting the global context of the operation. The context itself
5739 * is global, but the value of ICTXT is local.
5740 *
5741 * UPLO (global input) CHARACTER*1
5742 * On entry, UPLO specifies which part of C should contain the
5743 * result.
5744 *
5745 * TRANS (global input) CHARACTER*1
5746 * On entry, TRANS specifies whether the matrix A has to be
5747 * transposed or not before computing the matrix-matrix product.
5748 *
5749 * N (global input) INTEGER
5750 * On entry, N specifies the order the submatrix operand C. N
5751 * must be at least zero.
5752 *
5753 * K (global input) INTEGER
5754 * On entry, K specifies the number of columns (resp. rows) of A
5755 * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5756 * zero.
5757 *
5758 * ALPHA (global input) REAL
5759 * On entry, ALPHA specifies the scalar alpha.
5760 *
5761 * A (local input) REAL array
5762 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5763 * array contains a local copy of the initial entire matrix PA.
5764 *
5765 * IA (global input) INTEGER
5766 * On entry, IA specifies A's global row index, which points to
5767 * the beginning of the submatrix sub( A ).
5768 *
5769 * JA (global input) INTEGER
5770 * On entry, JA specifies A's global column index, which points
5771 * to the beginning of the submatrix sub( A ).
5772 *
5773 * DESCA (global and local input) INTEGER array
5774 * On entry, DESCA is an integer array of dimension DLEN_. This
5775 * is the array descriptor for the matrix A.
5776 *
5777 * BETA (global input) REAL
5778 * On entry, BETA specifies the scalar beta.
5779 *
5780 * C (local input/local output) REAL array
5781 * On entry, C is an array of dimension (DESCC( M_ ),*). This
5782 * array contains a local copy of the initial entire matrix PC.
5783 *
5784 * PC (local input) REAL array
5785 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5786 * array contains the local pieces of the matrix PC.
5787 *
5788 * IC (global input) INTEGER
5789 * On entry, IC specifies C's global row index, which points to
5790 * the beginning of the submatrix sub( C ).
5791 *
5792 * JC (global input) INTEGER
5793 * On entry, JC specifies C's global column index, which points
5794 * to the beginning of the submatrix sub( C ).
5795 *
5796 * DESCC (global and local input) INTEGER array
5797 * On entry, DESCC is an integer array of dimension DLEN_. This
5798 * is the array descriptor for the matrix C.
5799 *
5800 * CT (workspace) REAL array
5801 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
5802 * holds a copy of the current column of C.
5803 *
5804 * G (workspace) REAL array
5805 * On entry, G is an array of dimension at least MAX(M,N,K). G
5806 * is used to compute the gauges.
5807 *
5808 * ERR (global output) REAL
5809 * On exit, ERR specifies the largest error in absolute value.
5810 *
5811 * INFO (global output) INTEGER
5812 * On exit, if INFO <> 0, the result is less than half accurate.
5813 *
5814 * -- Written on April 1, 1998 by
5815 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5816 *
5817 * =====================================================================
5818 *
5819 * .. Parameters ..
5820  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5821  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5822  $ RSRC_
5823  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5824  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5825  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5826  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5827  REAL ZERO, ONE
5828  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5829 * ..
5830 * .. Local Scalars ..
5831  LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
5832  INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5833  $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5834  $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5835  REAL EPS, ERRI
5836 * ..
5837 * .. External Subroutines ..
5838  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5839 * ..
5840 * .. External Functions ..
5841  LOGICAL LSAME
5842  REAL PSLAMCH
5843  EXTERNAL lsame, pslamch
5844 * ..
5845 * .. Intrinsic Functions ..
5846  INTRINSIC abs, max, min, mod, sqrt
5847 * ..
5848 * .. Executable Statements ..
5849 *
5850  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5851 *
5852  eps = pslamch( ictxt, 'eps' )
5853 *
5854  upper = lsame( uplo, 'U' )
5855  notran = lsame( trans, 'N' )
5856  tran = lsame( trans, 'T' )
5857 *
5858  lda = max( 1, desca( m_ ) )
5859  ldc = max( 1, descc( m_ ) )
5860 *
5861 * Compute expected result in C using data in A, B and C.
5862 * Compute gauges in G. This part of the computation is performed
5863 * by every process in the grid.
5864 *
5865  DO 140 j = 1, n
5866 *
5867  IF( upper ) THEN
5868  ibeg = 1
5869  iend = j
5870  ELSE
5871  ibeg = j
5872  iend = n
5873  END IF
5874 *
5875  DO 10 i = 1, n
5876  ct( i ) = zero
5877  g( i ) = zero
5878  10 CONTINUE
5879 *
5880  IF( notran ) THEN
5881  DO 30 kk = 1, k
5882  ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
5883  DO 20 i = ibeg, iend
5884  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
5885  ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5886  g( i ) = g( i ) + abs( a( ioffak ) ) *
5887  $ abs( a( ioffan ) )
5888  20 CONTINUE
5889  30 CONTINUE
5890  ELSE IF( tran ) THEN
5891  DO 50 kk = 1, k
5892  ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
5893  DO 40 i = ibeg, iend
5894  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
5895  ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5896  g( i ) = g( i ) + abs( a( ioffak ) ) *
5897  $ abs( a( ioffan ) )
5898  40 CONTINUE
5899  50 CONTINUE
5900  END IF
5901 *
5902  ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
5903 *
5904  DO 100 i = ibeg, iend
5905  ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5906  g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5907  c( ioffc ) = ct( i )
5908  ioffc = ioffc + 1
5909  100 CONTINUE
5910 *
5911 * Compute the error ratio for this result.
5912 *
5913  err = zero
5914  info = 0
5915  ldpc = descc( lld_ )
5916  ioffc = ic + ( jc + j - 2 ) * ldc
5917  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5918  $ iic, jjc, icrow, iccol )
5919  icurrow = icrow
5920  rowrep = ( icrow.EQ.-1 )
5921  colrep = ( iccol.EQ.-1 )
5922 *
5923  IF( mycol.EQ.iccol .OR. colrep ) THEN
5924 *
5925  ibb = descc( imb_ ) - ic + 1
5926  IF( ibb.LE.0 )
5927  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5928  ibb = min( ibb, n )
5929  in = ic + ibb - 1
5930 *
5931  DO 110 i = ic, in
5932 *
5933  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5934  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5935  $ c( ioffc ) ) / eps
5936  IF( g( i-ic+1 ).NE.zero )
5937  $ erri = erri / g( i-ic+1 )
5938  err = max( err, erri )
5939  IF( err*sqrt( eps ).GE.one )
5940  $ info = 1
5941  iic = iic + 1
5942  END IF
5943 *
5944  ioffc = ioffc + 1
5945 *
5946  110 CONTINUE
5947 *
5948  icurrow = mod( icurrow+1, nprow )
5949 *
5950  DO 130 i = in+1, ic+n-1, descc( mb_ )
5951  ibb = min( ic+n-i, descc( mb_ ) )
5952 *
5953  DO 120 kk = 0, ibb-1
5954 *
5955  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5956  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5957  $ c( ioffc ) )/eps
5958  IF( g( i+kk-ic+1 ).NE.zero )
5959  $ erri = erri / g( i+kk-ic+1 )
5960  err = max( err, erri )
5961  IF( err*sqrt( eps ).GE.one )
5962  $ info = 1
5963  iic = iic + 1
5964  END IF
5965 *
5966  ioffc = ioffc + 1
5967 *
5968  120 CONTINUE
5969 *
5970  icurrow = mod( icurrow+1, nprow )
5971 *
5972  130 CONTINUE
5973 *
5974  END IF
5975 *
5976 * If INFO = 0, all results are at least half accurate.
5977 *
5978  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5979  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5980  $ mycol )
5981  IF( info.NE.0 )
5982  $ GO TO 150
5983 *
5984  140 CONTINUE
5985 *
5986  150 CONTINUE
5987 *
5988  RETURN
5989 *
5990 * End of PSMMCH1
5991 *
5992  END
5993  SUBROUTINE psmmch2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5994  $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5995  $ JC, DESCC, CT, G, ERR, INFO )
5997 * -- PBLAS test routine (version 2.0) --
5998 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5999 * and University of California, Berkeley.
6000 * April 1, 1998
6001 *
6002 * .. Scalar Arguments ..
6003  CHARACTER*1 TRANS, UPLO
6004  INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6005  REAL ALPHA, BETA, ERR
6006 * ..
6007 * .. Array Arguments ..
6008  INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6009  REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
6010  $ pc( * )
6011 * ..
6012 *
6013 * Purpose
6014 * =======
6015 *
6016 * PSMMCH2 checks the results of the computational tests.
6017 *
6018 * Notes
6019 * =====
6020 *
6021 * A description vector is associated with each 2D block-cyclicly dis-
6022 * tributed matrix. This vector stores the information required to
6023 * establish the mapping between a matrix entry and its corresponding
6024 * process and memory location.
6025 *
6026 * In the following comments, the character _ should be read as
6027 * "of the distributed matrix". Let A be a generic term for any 2D
6028 * block cyclicly distributed matrix. Its description vector is DESCA:
6029 *
6030 * NOTATION STORED IN EXPLANATION
6031 * ---------------- --------------- ------------------------------------
6032 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6033 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6034 * the NPROW x NPCOL BLACS process grid
6035 * A is distributed over. The context
6036 * itself is global, but the handle
6037 * (the integer value) may vary.
6038 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6039 * ted matrix A, M_A >= 0.
6040 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6041 * buted matrix A, N_A >= 0.
6042 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6043 * block of the matrix A, IMB_A > 0.
6044 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6045 * left block of the matrix A,
6046 * INB_A > 0.
6047 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6048 * bute the last M_A-IMB_A rows of A,
6049 * MB_A > 0.
6050 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6051 * bute the last N_A-INB_A columns of
6052 * A, NB_A > 0.
6053 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6054 * row of the matrix A is distributed,
6055 * NPROW > RSRC_A >= 0.
6056 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6057 * first column of A is distributed.
6058 * NPCOL > CSRC_A >= 0.
6059 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6060 * array storing the local blocks of
6061 * the distributed matrix A,
6062 * IF( Lc( 1, N_A ) > 0 )
6063 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6064 * ELSE
6065 * LLD_A >= 1.
6066 *
6067 * Let K be the number of rows of a matrix A starting at the global in-
6068 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6069 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6070 * receive if these K rows were distributed over NPROW processes. If K
6071 * is the number of columns of a matrix A starting at the global index
6072 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6073 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6074 * these K columns were distributed over NPCOL processes.
6075 *
6076 * The values of Lr() and Lc() may be determined via a call to the func-
6077 * tion PB_NUMROC:
6078 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6079 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6080 *
6081 * Arguments
6082 * =========
6083 *
6084 * ICTXT (local input) INTEGER
6085 * On entry, ICTXT specifies the BLACS context handle, indica-
6086 * ting the global context of the operation. The context itself
6087 * is global, but the value of ICTXT is local.
6088 *
6089 * UPLO (global input) CHARACTER*1
6090 * On entry, UPLO specifies which part of C should contain the
6091 * result.
6092 *
6093 * TRANS (global input) CHARACTER*1
6094 * On entry, TRANS specifies whether the matrices A and B have
6095 * to be transposed or not before computing the matrix-matrix
6096 * product.
6097 *
6098 * N (global input) INTEGER
6099 * On entry, N specifies the order the submatrix operand C. N
6100 * must be at least zero.
6101 *
6102 * K (global input) INTEGER
6103 * On entry, K specifies the number of columns (resp. rows) of A
6104 * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6105 * least zero.
6106 *
6107 * ALPHA (global input) REAL
6108 * On entry, ALPHA specifies the scalar alpha.
6109 *
6110 * A (local input) REAL array
6111 * On entry, A is an array of dimension (DESCA( M_ ),*). This
6112 * array contains a local copy of the initial entire matrix PA.
6113 *
6114 * IA (global input) INTEGER
6115 * On entry, IA specifies A's global row index, which points to
6116 * the beginning of the submatrix sub( A ).
6117 *
6118 * JA (global input) INTEGER
6119 * On entry, JA specifies A's global column index, which points
6120 * to the beginning of the submatrix sub( A ).
6121 *
6122 * DESCA (global and local input) INTEGER array
6123 * On entry, DESCA is an integer array of dimension DLEN_. This
6124 * is the array descriptor for the matrix A.
6125 *
6126 * B (local input) REAL array
6127 * On entry, B is an array of dimension (DESCB( M_ ),*). This
6128 * array contains a local copy of the initial entire matrix PB.
6129 *
6130 * IB (global input) INTEGER
6131 * On entry, IB specifies B's global row index, which points to
6132 * the beginning of the submatrix sub( B ).
6133 *
6134 * JB (global input) INTEGER
6135 * On entry, JB specifies B's global column index, which points
6136 * to the beginning of the submatrix sub( B ).
6137 *
6138 * DESCB (global and local input) INTEGER array
6139 * On entry, DESCB is an integer array of dimension DLEN_. This
6140 * is the array descriptor for the matrix B.
6141 *
6142 * BETA (global input) REAL
6143 * On entry, BETA specifies the scalar beta.
6144 *
6145 * C (local input/local output) REAL array
6146 * On entry, C is an array of dimension (DESCC( M_ ),*). This
6147 * array contains a local copy of the initial entire matrix PC.
6148 *
6149 * PC (local input) REAL array
6150 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6151 * array contains the local pieces of the matrix PC.
6152 *
6153 * IC (global input) INTEGER
6154 * On entry, IC specifies C's global row index, which points to
6155 * the beginning of the submatrix sub( C ).
6156 *
6157 * JC (global input) INTEGER
6158 * On entry, JC specifies C's global column index, which points
6159 * to the beginning of the submatrix sub( C ).
6160 *
6161 * DESCC (global and local input) INTEGER array
6162 * On entry, DESCC is an integer array of dimension DLEN_. This
6163 * is the array descriptor for the matrix C.
6164 *
6165 * CT (workspace) REAL array
6166 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
6167 * holds a copy of the current column of C.
6168 *
6169 * G (workspace) REAL array
6170 * On entry, G is an array of dimension at least MAX(M,N,K). G
6171 * is used to compute the gauges.
6172 *
6173 * ERR (global output) REAL
6174 * On exit, ERR specifies the largest error in absolute value.
6175 *
6176 * INFO (global output) INTEGER
6177 * On exit, if INFO <> 0, the result is less than half accurate.
6178 *
6179 * -- Written on April 1, 1998 by
6180 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6181 *
6182 * =====================================================================
6183 *
6184 * .. Parameters ..
6185  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6186  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6187  $ RSRC_
6188  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6189  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6190  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6191  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6192  REAL ZERO, ONE
6193  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
6194 * ..
6195 * .. Local Scalars ..
6196  LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
6197  INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6198  $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6199  $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6200  $ NPCOL, NPROW
6201  REAL EPS, ERRI
6202 * ..
6203 * .. External Subroutines ..
6204  EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
6205 * ..
6206 * .. External Functions ..
6207  LOGICAL LSAME
6208  REAL PSLAMCH
6209  EXTERNAL LSAME, PSLAMCH
6210 * ..
6211 * .. Intrinsic Functions ..
6212  INTRINSIC abs, max, min, mod, sqrt
6213 * ..
6214 * .. Executable Statements ..
6215 *
6216  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6217 *
6218  eps = pslamch( ictxt, 'eps' )
6219 *
6220  upper = lsame( uplo, 'U' )
6221  notran = lsame( trans, 'N' )
6222  tran = lsame( trans, 'T' )
6223 *
6224  lda = max( 1, desca( m_ ) )
6225  ldb = max( 1, descb( m_ ) )
6226  ldc = max( 1, descc( m_ ) )
6227 *
6228 * Compute expected result in C using data in A, B and C.
6229 * Compute gauges in G. This part of the computation is performed
6230 * by every process in the grid.
6231 *
6232  DO 140 j = 1, n
6233 *
6234  IF( upper ) THEN
6235  ibeg = 1
6236  iend = j
6237  ELSE
6238  ibeg = j
6239  iend = n
6240  END IF
6241 *
6242  DO 10 i = 1, n
6243  ct( i ) = zero
6244  g( i ) = zero
6245  10 CONTINUE
6246 *
6247  IF( notran ) THEN
6248  DO 30 kk = 1, k
6249  ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6250  ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6251  DO 20 i = ibeg, iend
6252  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6253  ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6254  ct( i ) = ct( i ) + alpha * (
6255  $ a( ioffan ) * b( ioffbk ) +
6256  $ b( ioffbn ) * a( ioffak ) )
6257  g( i ) = g( i ) + abs( alpha ) * (
6258  $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6259  $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6260  20 CONTINUE
6261  30 CONTINUE
6262  ELSE IF( tran ) THEN
6263  DO 50 kk = 1, k
6264  ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6265  ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6266  DO 40 i = ibeg, iend
6267  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6268  ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6269  ct( i ) = ct( i ) + alpha * (
6270  $ a( ioffan ) * b( ioffbk ) +
6271  $ b( ioffbn ) * a( ioffak ) )
6272  g( i ) = g( i ) + abs( alpha ) * (
6273  $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6274  $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6275  40 CONTINUE
6276  50 CONTINUE
6277  END IF
6278 *
6279  ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6280 *
6281  DO 100 i = ibeg, iend
6282  ct( i ) = ct( i ) + beta * c( ioffc )
6283  g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
6284  c( ioffc ) = ct( i )
6285  ioffc = ioffc + 1
6286  100 CONTINUE
6287 *
6288 * Compute the error ratio for this result.
6289 *
6290  err = zero
6291  info = 0
6292  ldpc = descc( lld_ )
6293  ioffc = ic + ( jc + j - 2 ) * ldc
6294  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6295  $ iic, jjc, icrow, iccol )
6296  icurrow = icrow
6297  rowrep = ( icrow.EQ.-1 )
6298  colrep = ( iccol.EQ.-1 )
6299 *
6300  IF( mycol.EQ.iccol .OR. colrep ) THEN
6301 *
6302  ibb = descc( imb_ ) - ic + 1
6303  IF( ibb.LE.0 )
6304  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6305  ibb = min( ibb, n )
6306  in = ic + ibb - 1
6307 *
6308  DO 110 i = ic, in
6309 *
6310  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6311  erri = abs( pc( iic+(jjc-1)*ldpc ) -
6312  $ c( ioffc ) ) / eps
6313  IF( g( i-ic+1 ).NE.zero )
6314  $ erri = erri / g( i-ic+1 )
6315  err = max( err, erri )
6316  IF( err*sqrt( eps ).GE.one )
6317  $ info = 1
6318  iic = iic + 1
6319  END IF
6320 *
6321  ioffc = ioffc + 1
6322 *
6323  110 CONTINUE
6324 *
6325  icurrow = mod( icurrow+1, nprow )
6326 *
6327  DO 130 i = in+1, ic+n-1, descc( mb_ )
6328  ibb = min( ic+n-i, descc( mb_ ) )
6329 *
6330  DO 120 kk = 0, ibb-1
6331 *
6332  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6333  erri = abs( pc( iic+(jjc-1)*ldpc ) -
6334  $ c( ioffc ) )/eps
6335  IF( g( i+kk-ic+1 ).NE.zero )
6336  $ erri = erri / g( i+kk-ic+1 )
6337  err = max( err, erri )
6338  IF( err*sqrt( eps ).GE.one )
6339  $ info = 1
6340  iic = iic + 1
6341  END IF
6342 *
6343  ioffc = ioffc + 1
6344 *
6345  120 CONTINUE
6346 *
6347  icurrow = mod( icurrow+1, nprow )
6348 *
6349  130 CONTINUE
6350 *
6351  END IF
6352 *
6353 * If INFO = 0, all results are at least half accurate.
6354 *
6355  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6356  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6357  $ mycol )
6358  IF( info.NE.0 )
6359  $ GO TO 150
6360 *
6361  140 CONTINUE
6362 *
6363  150 CONTINUE
6364 *
6365  RETURN
6366 *
6367 * End of PSMMCH2
6368 *
6369  END
6370  SUBROUTINE psmmch3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
6371  $ BETA, C, PC, IC, JC, DESCC, ERR, INFO )
6373 * -- PBLAS test routine (version 2.0) --
6374 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6375 * and University of California, Berkeley.
6376 * April 1, 1998
6377 *
6378 * .. Scalar Arguments ..
6379  CHARACTER*1 TRANS, UPLO
6380  INTEGER IA, IC, INFO, JA, JC, M, N
6381  REAL ALPHA, BETA, ERR
6382 * ..
6383 * .. Array Arguments ..
6384  INTEGER DESCA( * ), DESCC( * )
6385  REAL A( * ), C( * ), PC( * )
6386 * ..
6387 *
6388 * Purpose
6389 * =======
6390 *
6391 * PSMMCH3 checks the results of the computational tests.
6392 *
6393 * Notes
6394 * =====
6395 *
6396 * A description vector is associated with each 2D block-cyclicly dis-
6397 * tributed matrix. This vector stores the information required to
6398 * establish the mapping between a matrix entry and its corresponding
6399 * process and memory location.
6400 *
6401 * In the following comments, the character _ should be read as
6402 * "of the distributed matrix". Let A be a generic term for any 2D
6403 * block cyclicly distributed matrix. Its description vector is DESCA:
6404 *
6405 * NOTATION STORED IN EXPLANATION
6406 * ---------------- --------------- ------------------------------------
6407 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6408 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6409 * the NPROW x NPCOL BLACS process grid
6410 * A is distributed over. The context
6411 * itself is global, but the handle
6412 * (the integer value) may vary.
6413 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6414 * ted matrix A, M_A >= 0.
6415 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6416 * buted matrix A, N_A >= 0.
6417 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6418 * block of the matrix A, IMB_A > 0.
6419 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6420 * left block of the matrix A,
6421 * INB_A > 0.
6422 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6423 * bute the last M_A-IMB_A rows of A,
6424 * MB_A > 0.
6425 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6426 * bute the last N_A-INB_A columns of
6427 * A, NB_A > 0.
6428 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6429 * row of the matrix A is distributed,
6430 * NPROW > RSRC_A >= 0.
6431 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6432 * first column of A is distributed.
6433 * NPCOL > CSRC_A >= 0.
6434 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6435 * array storing the local blocks of
6436 * the distributed matrix A,
6437 * IF( Lc( 1, N_A ) > 0 )
6438 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6439 * ELSE
6440 * LLD_A >= 1.
6441 *
6442 * Let K be the number of rows of a matrix A starting at the global in-
6443 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6444 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6445 * receive if these K rows were distributed over NPROW processes. If K
6446 * is the number of columns of a matrix A starting at the global index
6447 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6448 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6449 * these K columns were distributed over NPCOL processes.
6450 *
6451 * The values of Lr() and Lc() may be determined via a call to the func-
6452 * tion PB_NUMROC:
6453 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6454 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6455 *
6456 * Arguments
6457 * =========
6458 *
6459 * UPLO (global input) CHARACTER*1
6460 * On entry, UPLO specifies which part of C should contain the
6461 * result.
6462 *
6463 * TRANS (global input) CHARACTER*1
6464 * On entry, TRANS specifies whether the matrix A has to be
6465 * transposed or not before computing the matrix-matrix addi-
6466 * tion.
6467 *
6468 * M (global input) INTEGER
6469 * On entry, M specifies the number of rows of C.
6470 *
6471 * N (global input) INTEGER
6472 * On entry, N specifies the number of columns of C.
6473 *
6474 * ALPHA (global input) REAL
6475 * On entry, ALPHA specifies the scalar alpha.
6476 *
6477 * A (local input) REAL array
6478 * On entry, A is an array of dimension (DESCA( M_ ),*). This
6479 * array contains a local copy of the initial entire matrix PA.
6480 *
6481 * IA (global input) INTEGER
6482 * On entry, IA specifies A's global row index, which points to
6483 * the beginning of the submatrix sub( A ).
6484 *
6485 * JA (global input) INTEGER
6486 * On entry, JA specifies A's global column index, which points
6487 * to the beginning of the submatrix sub( A ).
6488 *
6489 * DESCA (global and local input) INTEGER array
6490 * On entry, DESCA is an integer array of dimension DLEN_. This
6491 * is the array descriptor for the matrix A.
6492 *
6493 * BETA (global input) REAL
6494 * On entry, BETA specifies the scalar beta.
6495 *
6496 * C (local input/local output) REAL array
6497 * On entry, C is an array of dimension (DESCC( M_ ),*). This
6498 * array contains a local copy of the initial entire matrix PC.
6499 *
6500 * PC (local input) REAL array
6501 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6502 * array contains the local pieces of the matrix PC.
6503 *
6504 * IC (global input) INTEGER
6505 * On entry, IC specifies C's global row index, which points to
6506 * the beginning of the submatrix sub( C ).
6507 *
6508 * JC (global input) INTEGER
6509 * On entry, JC specifies C's global column index, which points
6510 * to the beginning of the submatrix sub( C ).
6511 *
6512 * DESCC (global and local input) INTEGER array
6513 * On entry, DESCC is an integer array of dimension DLEN_. This
6514 * is the array descriptor for the matrix C.
6515 *
6516 * ERR (global output) REAL
6517 * On exit, ERR specifies the largest error in absolute value.
6518 *
6519 * INFO (global output) INTEGER
6520 * On exit, if INFO <> 0, the result is less than half accurate.
6521 *
6522 * -- Written on April 1, 1998 by
6523 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6524 *
6525 * =====================================================================
6526 *
6527 * .. Parameters ..
6528  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6529  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6530  $ RSRC_
6531  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6532  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6533  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6534  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6535  REAL ZERO
6536  PARAMETER ( ZERO = 0.0e+0 )
6537 * ..
6538 * .. Local Scalars ..
6539  LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER
6540  INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J,
6541  $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL,
6542  $ NPROW
6543  REAL ERR0, ERRI, PREC
6544 * ..
6545 * .. External Subroutines ..
6546  EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L,
6547  $ pserraxpby, sgamx2d
6548 * ..
6549 * .. External Functions ..
6550  LOGICAL LSAME
6551  REAL PSLAMCH
6552  EXTERNAL LSAME, PSLAMCH
6553 * ..
6554 * .. Intrinsic Functions ..
6555  INTRINSIC abs, max
6556 * ..
6557 * .. Executable Statements ..
6558 *
6559  ictxt = descc( ctxt_ )
6560  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6561 *
6562  prec = pslamch( ictxt, 'eps' )
6563 *
6564  upper = lsame( uplo, 'U' )
6565  lower = lsame( uplo, 'L' )
6566  notran = lsame( trans, 'N' )
6567 *
6568 * Compute expected result in C using data in A and C. This part of
6569 * the computation is performed by every process in the grid.
6570 *
6571  info = 0
6572  err = zero
6573 *
6574  lda = max( 1, desca( m_ ) )
6575  ldc = max( 1, descc( m_ ) )
6576  ldpc = max( 1, descc( lld_ ) )
6577  rowrep = ( descc( rsrc_ ).EQ.-1 )
6578  colrep = ( descc( csrc_ ).EQ.-1 )
6579 *
6580  IF( notran ) THEN
6581 *
6582  DO 20 j = jc, jc + n - 1
6583 *
6584  ioffc = ic + ( j - 1 ) * ldc
6585  ioffa = ia + ( ja - 1 + j - jc ) * lda
6586 *
6587  DO 10 i = ic, ic + m - 1
6588 *
6589  IF( upper ) THEN
6590  IF( ( j - jc ).GE.( i - ic ) ) THEN
6591  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6592  $ c( ioffc ), prec )
6593  ELSE
6594  erri = zero
6595  END IF
6596  ELSE IF( lower ) THEN
6597  IF( ( j - jc ).LE.( i - ic ) ) THEN
6598  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6599  $ c( ioffc ), prec )
6600  ELSE
6601  erri = zero
6602  END IF
6603  ELSE
6604  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6605  $ c( ioffc ), prec )
6606  END IF
6607 *
6608  CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6609  $ iic, jjc, icrow, iccol )
6610  IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6611  $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6612  err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6613  IF( err0.GT.erri )
6614  $ info = 1
6615  err = max( err, err0 )
6616  END IF
6617 *
6618  ioffa = ioffa + 1
6619  ioffc = ioffc + 1
6620 *
6621  10 CONTINUE
6622 *
6623  20 CONTINUE
6624 *
6625  ELSE
6626 *
6627  DO 40 j = jc, jc + n - 1
6628 *
6629  ioffc = ic + ( j - 1 ) * ldc
6630  ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6631 *
6632  DO 30 i = ic, ic + m - 1
6633 *
6634  IF( upper ) THEN
6635  IF( ( j - jc ).GE.( i - ic ) ) THEN
6636  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6637  $ c( ioffc ), prec )
6638  ELSE
6639  erri = zero
6640  END IF
6641  ELSE IF( lower ) THEN
6642  IF( ( j - jc ).LE.( i - ic ) ) THEN
6643  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6644  $ c( ioffc ), prec )
6645  ELSE
6646  erri = zero
6647  END IF
6648  ELSE
6649  CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6650  $ c( ioffc ), prec )
6651  END IF
6652 *
6653  CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6654  $ iic, jjc, icrow, iccol )
6655  IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6656  $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6657  err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6658  IF( err0.GT.erri )
6659  $ info = 1
6660  err = max( err, err0 )
6661  END IF
6662 *
6663  ioffc = ioffc + 1
6664  ioffa = ioffa + lda
6665 *
6666  30 CONTINUE
6667 *
6668  40 CONTINUE
6669 *
6670  END IF
6671 *
6672 * If INFO = 0, all results are at least half accurate.
6673 *
6674  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6675  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6676  $ mycol )
6677 *
6678  RETURN
6679 *
6680 * End of PSMMCH3
6681 *
6682  END
6683  SUBROUTINE pserraxpby( ERRBND, ALPHA, X, BETA, Y, PREC )
6685 * -- PBLAS test routine (version 2.0) --
6686 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6687 * and University of California, Berkeley.
6688 * April 1, 1998
6689 *
6690 * .. Scalar Arguments ..
6691  REAL ALPHA, BETA, ERRBND, PREC, X, Y
6692 * ..
6693 *
6694 * Purpose
6695 * =======
6696 *
6697 * PSERRAXPBY serially computes y := beta*y + alpha * x and returns a
6698 * scaled relative acceptable error bound on the result.
6699 *
6700 * Arguments
6701 * =========
6702 *
6703 * ERRBND (global output) REAL
6704 * On exit, ERRBND specifies the scaled relative acceptable er-
6705 * ror bound.
6706 *
6707 * ALPHA (global input) REAL
6708 * On entry, ALPHA specifies the scalar alpha.
6709 *
6710 * X (global input) REAL
6711 * On entry, X specifies the scalar x to be scaled.
6712 *
6713 * BETA (global input) REAL
6714 * On entry, BETA specifies the scalar beta.
6715 *
6716 * Y (global input/global output) REAL
6717 * On entry, Y specifies the scalar y to be added. On exit, Y
6718 * contains the resulting scalar y.
6719 *
6720 * PREC (global input) REAL
6721 * On entry, PREC specifies the machine precision.
6722 *
6723 * -- Written on April 1, 1998 by
6724 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6725 *
6726 * =====================================================================
6727 *
6728 * .. Parameters ..
6729  REAL ONE, TWO, ZERO
6730  PARAMETER ( ONE = 1.0e+0, two = 2.0e+0,
6731  $ zero = 0.0e+0 )
6732 * ..
6733 * .. Local Scalars ..
6734  REAL ADDBND, FACT, SUMPOS, SUMNEG, TMP
6735 * ..
6736 * .. Intrinsic Functions ..
6737 * ..
6738 * .. Executable Statements ..
6739 *
6740  SUMPOS = zero
6741  sumneg = zero
6742  fact = one + two * prec
6743  addbnd = two * two * two * prec
6744 *
6745  tmp = alpha * x
6746  IF( tmp.GE.zero ) THEN
6747  sumpos = sumpos + tmp * fact
6748  ELSE
6749  sumneg = sumneg - tmp * fact
6750  END IF
6751 *
6752  tmp = beta * y
6753  IF( tmp.GE.zero ) THEN
6754  sumpos = sumpos + tmp * fact
6755  ELSE
6756  sumneg = sumneg - tmp * fact
6757  END IF
6758 *
6759  y = ( beta * y ) + ( alpha * x )
6760 *
6761  errbnd = addbnd * max( sumpos, sumneg )
6762 *
6763  RETURN
6764 *
6765 * End of PSERRAXPBY
6766 *
6767  END
6768  REAL FUNCTION PSLAMCH( ICTXT, CMACH )
6770 * -- PBLAS test routine (version 2.0) --
6771 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6772 * and University of California, Berkeley.
6773 * April 1, 1998
6774 *
6775 * .. Scalar Arguments ..
6776  CHARACTER*1 cmach
6777  INTEGER ictxt
6778 * ..
6779 *
6780 * Purpose
6781 * =======
6782 *
6783 * PSLAMCH determines single precision machine parameters.
6784 *
6785 * Arguments
6786 * =========
6787 *
6788 * ICTXT (local input) INTEGER
6789 * On entry, ICTXT specifies the BLACS context handle, indica-
6790 * ting the global context of the operation. The context itself
6791 * is global, but the value of ICTXT is local.
6792 *
6793 * CMACH (global input) CHARACTER*1
6794 * On entry, CMACH specifies the value to be returned by PSLAMCH
6795 * as follows:
6796 * = 'E' or 'e', PSLAMCH := eps,
6797 * = 'S' or 's , PSLAMCH := sfmin,
6798 * = 'B' or 'b', PSLAMCH := base,
6799 * = 'P' or 'p', PSLAMCH := eps*base,
6800 * = 'N' or 'n', PSLAMCH := t,
6801 * = 'R' or 'r', PSLAMCH := rnd,
6802 * = 'M' or 'm', PSLAMCH := emin,
6803 * = 'U' or 'u', PSLAMCH := rmin,
6804 * = 'L' or 'l', PSLAMCH := emax,
6805 * = 'O' or 'o', PSLAMCH := rmax,
6806 *
6807 * where
6808 *
6809 * eps = relative machine precision,
6810 * sfmin = safe minimum, such that 1/sfmin does not overflow,
6811 * base = base of the machine,
6812 * prec = eps*base,
6813 * t = number of (base) digits in the mantissa,
6814 * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise,
6815 * emin = minimum exponent before (gradual) underflow,
6816 * rmin = underflow threshold - base**(emin-1),
6817 * emax = largest exponent before overflow,
6818 * rmax = overflow threshold - (base**emax)*(1-eps).
6819 *
6820 * -- Written on April 1, 1998 by
6821 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6822 *
6823 * =====================================================================
6824 *
6825 * .. Local Scalars ..
6826  CHARACTER*1 top
6827  INTEGER idumm
6828  REAL temp
6829 * ..
6830 * .. External Subroutines ..
6831  EXTERNAL pb_topget, sgamn2d, sgamx2d
6832 * ..
6833 * .. External Functions ..
6834  LOGICAL lsame
6835  REAL slamch
6836  EXTERNAL lsame, slamch
6837 * ..
6838 * .. Executable Statements ..
6839 *
6840  temp = slamch( cmach )
6841 *
6842  IF( lsame( cmach, 'E' ).OR.lsame( cmach, 'S' ).OR.
6843  $ lsame( cmach, 'M' ).OR.lsame( cmach, 'U' ) ) THEN
6844  CALL pb_topget( ictxt, 'Combine', 'All', top )
6845  idumm = 0
6846  CALL sgamx2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6847  $ idumm, -1, -1, idumm )
6848  ELSE IF( lsame( cmach, 'L' ).OR.lsame( cmach, 'O' ) ) THEN
6849  CALL pb_topget( ictxt, 'Combine', 'All', top )
6850  idumm = 0
6851  CALL sgamn2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6852  $ idumm, -1, -1, idumm )
6853  END IF
6854 *
6855  pslamch = temp
6856 *
6857  RETURN
6858 *
6859 * End of PSLAMCH
6860 *
6861  END
6862  SUBROUTINE pslaset( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA )
6864 * -- PBLAS test routine (version 2.0) --
6865 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6866 * and University of California, Berkeley.
6867 * April 1, 1998
6868 *
6869 * .. Scalar Arguments ..
6870  CHARACTER*1 UPLO
6871  INTEGER IA, JA, M, N
6872  REAL ALPHA, BETA
6873 * ..
6874 * .. Array Arguments ..
6875  INTEGER DESCA( * )
6876  REAL A( * )
6877 * ..
6878 *
6879 * Purpose
6880 * =======
6881 *
6882 * PSLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno-
6883 * ted by sub( A ) to beta on the diagonal and alpha on the offdiago-
6884 * nals.
6885 *
6886 * Notes
6887 * =====
6888 *
6889 * A description vector is associated with each 2D block-cyclicly dis-
6890 * tributed matrix. This vector stores the information required to
6891 * establish the mapping between a matrix entry and its corresponding
6892 * process and memory location.
6893 *
6894 * In the following comments, the character _ should be read as
6895 * "of the distributed matrix". Let A be a generic term for any 2D
6896 * block cyclicly distributed matrix. Its description vector is DESCA:
6897 *
6898 * NOTATION STORED IN EXPLANATION
6899 * ---------------- --------------- ------------------------------------
6900 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6901 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6902 * the NPROW x NPCOL BLACS process grid
6903 * A is distributed over. The context
6904 * itself is global, but the handle
6905 * (the integer value) may vary.
6906 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6907 * ted matrix A, M_A >= 0.
6908 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6909 * buted matrix A, N_A >= 0.
6910 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6911 * block of the matrix A, IMB_A > 0.
6912 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6913 * left block of the matrix A,
6914 * INB_A > 0.
6915 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6916 * bute the last M_A-IMB_A rows of A,
6917 * MB_A > 0.
6918 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6919 * bute the last N_A-INB_A columns of
6920 * A, NB_A > 0.
6921 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6922 * row of the matrix A is distributed,
6923 * NPROW > RSRC_A >= 0.
6924 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6925 * first column of A is distributed.
6926 * NPCOL > CSRC_A >= 0.
6927 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6928 * array storing the local blocks of
6929 * the distributed matrix A,
6930 * IF( Lc( 1, N_A ) > 0 )
6931 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6932 * ELSE
6933 * LLD_A >= 1.
6934 *
6935 * Let K be the number of rows of a matrix A starting at the global in-
6936 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6937 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6938 * receive if these K rows were distributed over NPROW processes. If K
6939 * is the number of columns of a matrix A starting at the global index
6940 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6941 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6942 * these K columns were distributed over NPCOL processes.
6943 *
6944 * The values of Lr() and Lc() may be determined via a call to the func-
6945 * tion PB_NUMROC:
6946 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6947 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6948 *
6949 * Arguments
6950 * =========
6951 *
6952 * UPLO (global input) CHARACTER*1
6953 * On entry, UPLO specifies the part of the submatrix sub( A )
6954 * to be set:
6955 * = 'L' or 'l': Lower triangular part is set; the strictly
6956 * upper triangular part of sub( A ) is not changed;
6957 * = 'U' or 'u': Upper triangular part is set; the strictly
6958 * lower triangular part of sub( A ) is not changed;
6959 * Otherwise: All of the matrix sub( A ) is set.
6960 *
6961 * M (global input) INTEGER
6962 * On entry, M specifies the number of rows of the submatrix
6963 * sub( A ). M must be at least zero.
6964 *
6965 * N (global input) INTEGER
6966 * On entry, N specifies the number of columns of the submatrix
6967 * sub( A ). N must be at least zero.
6968 *
6969 * ALPHA (global input) REAL
6970 * On entry, ALPHA specifies the scalar alpha, i.e., the cons-
6971 * tant to which the offdiagonal elements are to be set.
6972 *
6973 * BETA (global input) REAL
6974 * On entry, BETA specifies the scalar beta, i.e., the constant
6975 * to which the diagonal elements are to be set.
6976 *
6977 * A (local input/local output) REAL array
6978 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
6979 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
6980 * the local entries of the matrix A to be set. On exit, the
6981 * leading m by n submatrix sub( A ) is set as follows:
6982 *
6983 * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N,
6984 * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N,
6985 * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N,
6986 * and IA+i.NE.JA+j,
6987 * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N).
6988 *
6989 * IA (global input) INTEGER
6990 * On entry, IA specifies A's global row index, which points to
6991 * the beginning of the submatrix sub( A ).
6992 *
6993 * JA (global input) INTEGER
6994 * On entry, JA specifies A's global column index, which points
6995 * to the beginning of the submatrix sub( A ).
6996 *
6997 * DESCA (global and local input) INTEGER array
6998 * On entry, DESCA is an integer array of dimension DLEN_. This
6999 * is the array descriptor for the matrix A.
7000 *
7001 * -- Written on April 1, 1998 by
7002 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7003 *
7004 * =====================================================================
7005 *
7006 * .. Parameters ..
7007  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7008  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7009  $ RSRC_
7010  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7011  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7012  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7013  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7014 * ..
7015 * .. Local Scalars ..
7016  LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER,
7017  $ UPPER
7018  INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7019  $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA,
7020  $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC,
7021  $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP,
7022  $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD,
7023  $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1,
7024  $ UPP
7025 * ..
7026 * .. Local Arrays ..
7027  INTEGER DESCA2( DLEN_ )
7028 * ..
7029 * .. External Subroutines ..
7030  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7032 * ..
7033 * .. External Functions ..
7034  LOGICAL LSAME
7035  EXTERNAL lsame
7036 * ..
7037 * .. Intrinsic Functions ..
7038  INTRINSIC min
7039 * ..
7040 * .. Executable Statements ..
7041 *
7042  IF( m.EQ.0 .OR. n.EQ.0 )
7043  $ RETURN
7044 *
7045 * Convert descriptor
7046 *
7047  CALL pb_desctrans( desca, desca2 )
7048 *
7049 * Get grid parameters
7050 *
7051  ictxt = desca2( ctxt_ )
7052  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7053 *
7054  CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7055  $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7056  $ iacol, mrrow, mrcol )
7057 *
7058  IF( mp.LE.0 .OR. nq.LE.0 )
7059  $ RETURN
7060 *
7061  isrowrep = ( desca2( rsrc_ ).LT.0 )
7062  iscolrep = ( desca2( csrc_ ).LT.0 )
7063  lda = desca2( lld_ )
7064 *
7065  upper = .NOT.( lsame( uplo, 'L' ) )
7066  lower = .NOT.( lsame( uplo, 'U' ) )
7067 *
7068  IF( ( ( lower.AND.upper ).AND.( alpha.EQ.beta ) ).OR.
7069  $ ( isrowrep .AND. iscolrep ) ) THEN
7070  IF( ( mp.GT.0 ).AND.( nq.GT.0 ) )
7071  $ CALL pb_slaset( uplo, mp, nq, 0, alpha, beta,
7072  $ a( iia + ( jja - 1 ) * lda ), lda )
7073  RETURN
7074  END IF
7075 *
7076 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
7077 * ILOW, LOW, IUPP, and UPP.
7078 *
7079  mb = desca2( mb_ )
7080  nb = desca2( nb_ )
7081  CALL pb_binfo( 0, mp, nq, imb1, inb1, mb, nb, mrrow, mrcol,
7082  $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
7083  $ lnbloc, ilow, low, iupp, upp )
7084 *
7085  ioffa = iia - 1
7086  joffa = jja - 1
7087  iimax = ioffa + mp
7088  jjmax = joffa + nq
7089 *
7090  IF( isrowrep ) THEN
7091  pmb = mb
7092  ELSE
7093  pmb = nprow * mb
7094  END IF
7095  IF( iscolrep ) THEN
7096  qnb = nb
7097  ELSE
7098  qnb = npcol * nb
7099  END IF
7100 *
7101  m1 = mp
7102  n1 = nq
7103 *
7104 * Handle the first block of rows or columns separately, and update
7105 * LCMT00, MBLKS and NBLKS.
7106 *
7107  godown = ( lcmt00.GT.iupp )
7108  goleft = ( lcmt00.LT.ilow )
7109 *
7110  IF( .NOT.godown .AND. .NOT.goleft ) THEN
7111 *
7112 * LCMT00 >= ILOW && LCMT00 <= IUPP
7113 *
7114  goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7115  godown = .NOT.goleft
7116 *
7117  CALL pb_slaset( uplo, imbloc, inbloc, lcmt00, alpha, beta,
7118  $ a( iia+joffa*lda ), lda )
7119  IF( godown ) THEN
7120  IF( upper .AND. nq.GT.inbloc )
7121  $ CALL pb_slaset( 'All', imbloc, nq-inbloc, 0, alpha,
7122  $ alpha, a( iia+(joffa+inbloc)*lda ), lda )
7123  iia = iia + imbloc
7124  m1 = m1 - imbloc
7125  ELSE
7126  IF( lower .AND. mp.GT.imbloc )
7127  $ CALL pb_slaset( 'All', mp-imbloc, inbloc, 0, alpha,
7128  $ alpha, a( iia+imbloc+joffa*lda ), lda )
7129  jja = jja + inbloc
7130  n1 = n1 - inbloc
7131  END IF
7132 *
7133  END IF
7134 *
7135  IF( godown ) THEN
7136 *
7137  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7138  mblks = mblks - 1
7139  ioffa = ioffa + imbloc
7140 *
7141  10 CONTINUE
7142  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7143  lcmt00 = lcmt00 - pmb
7144  mblks = mblks - 1
7145  ioffa = ioffa + mb
7146  GO TO 10
7147  END IF
7148 *
7149  tmp1 = min( ioffa, iimax ) - iia + 1
7150  IF( upper .AND. tmp1.GT.0 ) THEN
7151  CALL pb_slaset( 'All', tmp1, n1, 0, alpha, alpha,
7152  $ a( iia+joffa*lda ), lda )
7153  iia = iia + tmp1
7154  m1 = m1 - tmp1
7155  END IF
7156 *
7157  IF( mblks.LE.0 )
7158  $ RETURN
7159 *
7160  lcmt = lcmt00
7161  mblkd = mblks
7162  ioffd = ioffa
7163 *
7164  mbloc = mb
7165  20 CONTINUE
7166  IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7167  IF( mblkd.EQ.1 )
7168  $ mbloc = lmbloc
7169  CALL pb_slaset( uplo, mbloc, inbloc, lcmt, alpha, beta,
7170  $ a( ioffd+1+joffa*lda ), lda )
7171  lcmt00 = lcmt
7172  lcmt = lcmt - pmb
7173  mblks = mblkd
7174  mblkd = mblkd - 1
7175  ioffa = ioffd
7176  ioffd = ioffd + mbloc
7177  GO TO 20
7178  END IF
7179 *
7180  tmp1 = m1 - ioffd + iia - 1
7181  IF( lower .AND. tmp1.GT.0 )
7182  $ CALL pb_slaset( 'ALL', tmp1, inbloc, 0, alpha, alpha,
7183  $ a( ioffd+1+joffa*lda ), lda )
7184 *
7185  tmp1 = ioffa - iia + 1
7186  m1 = m1 - tmp1
7187  n1 = n1 - inbloc
7188  lcmt00 = lcmt00 + low - ilow + qnb
7189  nblks = nblks - 1
7190  joffa = joffa + inbloc
7191 *
7192  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7193  $ CALL pb_slaset( 'ALL', tmp1, n1, 0, alpha, alpha,
7194  $ a( iia+joffa*lda ), lda )
7195 *
7196  iia = ioffa + 1
7197  jja = joffa + 1
7198 *
7199  ELSE IF( goleft ) THEN
7200 *
7201  lcmt00 = lcmt00 + low - ilow + qnb
7202  nblks = nblks - 1
7203  joffa = joffa + inbloc
7204 *
7205  30 CONTINUE
7206  IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7207  lcmt00 = lcmt00 + qnb
7208  nblks = nblks - 1
7209  joffa = joffa + nb
7210  GO TO 30
7211  END IF
7212 *
7213  tmp1 = min( joffa, jjmax ) - jja + 1
7214  IF( lower .AND. tmp1.GT.0 ) THEN
7215  CALL pb_slaset( 'All', m1, tmp1, 0, alpha, alpha,
7216  $ a( iia+(jja-1)*lda ), lda )
7217  jja = jja + tmp1
7218  n1 = n1 - tmp1
7219  END IF
7220 *
7221  IF( nblks.LE.0 )
7222  $ RETURN
7223 *
7224  lcmt = lcmt00
7225  nblkd = nblks
7226  joffd = joffa
7227 *
7228  nbloc = nb
7229  40 CONTINUE
7230  IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7231  IF( nblkd.EQ.1 )
7232  $ nbloc = lnbloc
7233  CALL pb_slaset( uplo, imbloc, nbloc, lcmt, alpha, beta,
7234  $ a( iia+joffd*lda ), lda )
7235  lcmt00 = lcmt
7236  lcmt = lcmt + qnb
7237  nblks = nblkd
7238  nblkd = nblkd - 1
7239  joffa = joffd
7240  joffd = joffd + nbloc
7241  GO TO 40
7242  END IF
7243 *
7244  tmp1 = n1 - joffd + jja - 1
7245  IF( upper .AND. tmp1.GT.0 )
7246  $ CALL pb_slaset( 'All', imbloc, tmp1, 0, alpha, alpha,
7247  $ a( iia+joffd*lda ), lda )
7248 *
7249  tmp1 = joffa - jja + 1
7250  m1 = m1 - imbloc
7251  n1 = n1 - tmp1
7252  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7253  mblks = mblks - 1
7254  ioffa = ioffa + imbloc
7255 *
7256  IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7257  $ CALL pb_slaset( 'All', m1, tmp1, 0, alpha, alpha,
7258  $ a( ioffa+1+(jja-1)*lda ), lda )
7259 *
7260  iia = ioffa + 1
7261  jja = joffa + 1
7262 *
7263  END IF
7264 *
7265  nbloc = nb
7266  50 CONTINUE
7267  IF( nblks.GT.0 ) THEN
7268  IF( nblks.EQ.1 )
7269  $ nbloc = lnbloc
7270  60 CONTINUE
7271  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7272  lcmt00 = lcmt00 - pmb
7273  mblks = mblks - 1
7274  ioffa = ioffa + mb
7275  GO TO 60
7276  END IF
7277 *
7278  tmp1 = min( ioffa, iimax ) - iia + 1
7279  IF( upper .AND. tmp1.GT.0 ) THEN
7280  CALL pb_slaset( 'All', tmp1, n1, 0, alpha, alpha,
7281  $ a( iia+joffa*lda ), lda )
7282  iia = iia + tmp1
7283  m1 = m1 - tmp1
7284  END IF
7285 *
7286  IF( mblks.LE.0 )
7287  $ RETURN
7288 *
7289  lcmt = lcmt00
7290  mblkd = mblks
7291  ioffd = ioffa
7292 *
7293  mbloc = mb
7294  70 CONTINUE
7295  IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7296  IF( mblkd.EQ.1 )
7297  $ mbloc = lmbloc
7298  CALL pb_slaset( uplo, mbloc, nbloc, lcmt, alpha, beta,
7299  $ a( ioffd+1+joffa*lda ), lda )
7300  lcmt00 = lcmt
7301  lcmt = lcmt - pmb
7302  mblks = mblkd
7303  mblkd = mblkd - 1
7304  ioffa = ioffd
7305  ioffd = ioffd + mbloc
7306  GO TO 70
7307  END IF
7308 *
7309  tmp1 = m1 - ioffd + iia - 1
7310  IF( lower .AND. tmp1.GT.0 )
7311  $ CALL pb_slaset( 'All', tmp1, nbloc, 0, alpha, alpha,
7312  $ a( ioffd+1+joffa*lda ), lda )
7313 *
7314  tmp1 = min( ioffa, iimax ) - iia + 1
7315  m1 = m1 - tmp1
7316  n1 = n1 - nbloc
7317  lcmt00 = lcmt00 + qnb
7318  nblks = nblks - 1
7319  joffa = joffa + nbloc
7320 *
7321  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7322  $ CALL pb_slaset( 'All', tmp1, n1, 0, alpha, alpha,
7323  $ a( iia+joffa*lda ), lda )
7324 *
7325  iia = ioffa + 1
7326  jja = joffa + 1
7327 *
7328  GO TO 50
7329 *
7330  END IF
7331 *
7332  RETURN
7333 *
7334 * End of PSLASET
7335 *
7336  END
7337  SUBROUTINE pslascal( TYPE, M, N, ALPHA, A, IA, JA, DESCA )
7339 * -- PBLAS test routine (version 2.0) --
7340 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7341 * and University of California, Berkeley.
7342 * April 1, 1998
7343 *
7344 * .. Scalar Arguments ..
7345  CHARACTER*1 TYPE
7346  INTEGER IA, JA, M, N
7347  REAL ALPHA
7348 * ..
7349 * .. Array Arguments ..
7350  INTEGER DESCA( * )
7351  REAL A( * )
7352 * ..
7353 *
7354 * Purpose
7355 * =======
7356 *
7357 * PSLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted
7358 * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full,
7359 * upper triangular, lower triangular or upper Hessenberg.
7360 *
7361 * Notes
7362 * =====
7363 *
7364 * A description vector is associated with each 2D block-cyclicly dis-
7365 * tributed matrix. This vector stores the information required to
7366 * establish the mapping between a matrix entry and its corresponding
7367 * process and memory location.
7368 *
7369 * In the following comments, the character _ should be read as
7370 * "of the distributed matrix". Let A be a generic term for any 2D
7371 * block cyclicly distributed matrix. Its description vector is DESCA:
7372 *
7373 * NOTATION STORED IN EXPLANATION
7374 * ---------------- --------------- ------------------------------------
7375 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7376 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7377 * the NPROW x NPCOL BLACS process grid
7378 * A is distributed over. The context
7379 * itself is global, but the handle
7380 * (the integer value) may vary.
7381 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
7382 * ted matrix A, M_A >= 0.
7383 * N_A (global) DESCA( N_ ) The number of columns in the distri-
7384 * buted matrix A, N_A >= 0.
7385 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7386 * block of the matrix A, IMB_A > 0.
7387 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
7388 * left block of the matrix A,
7389 * INB_A > 0.
7390 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7391 * bute the last M_A-IMB_A rows of A,
7392 * MB_A > 0.
7393 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7394 * bute the last N_A-INB_A columns of
7395 * A, NB_A > 0.
7396 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7397 * row of the matrix A is distributed,
7398 * NPROW > RSRC_A >= 0.
7399 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7400 * first column of A is distributed.
7401 * NPCOL > CSRC_A >= 0.
7402 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7403 * array storing the local blocks of
7404 * the distributed matrix A,
7405 * IF( Lc( 1, N_A ) > 0 )
7406 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
7407 * ELSE
7408 * LLD_A >= 1.
7409 *
7410 * Let K be the number of rows of a matrix A starting at the global in-
7411 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7412 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7413 * receive if these K rows were distributed over NPROW processes. If K
7414 * is the number of columns of a matrix A starting at the global index
7415 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7416 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7417 * these K columns were distributed over NPCOL processes.
7418 *
7419 * The values of Lr() and Lc() may be determined via a call to the func-
7420 * tion PB_NUMROC:
7421 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7422 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7423 *
7424 * Arguments
7425 * =========
7426 *
7427 * TYPE (global input) CHARACTER*1
7428 * On entry, TYPE specifies the type of the input submatrix as
7429 * follows:
7430 * = 'L' or 'l': sub( A ) is a lower triangular matrix,
7431 * = 'U' or 'u': sub( A ) is an upper triangular matrix,
7432 * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix,
7433 * otherwise sub( A ) is a full matrix.
7434 *
7435 * M (global input) INTEGER
7436 * On entry, M specifies the number of rows of the submatrix
7437 * sub( A ). M must be at least zero.
7438 *
7439 * N (global input) INTEGER
7440 * On entry, N specifies the number of columns of the submatrix
7441 * sub( A ). N must be at least zero.
7442 *
7443 * ALPHA (global input) REAL
7444 * On entry, ALPHA specifies the scalar alpha.
7445 *
7446 * A (local input/local output) REAL array
7447 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7448 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
7449 * the local entries of the matrix A.
7450 * On exit, the local entries of this array corresponding to the
7451 * to the entries of the submatrix sub( A ) are overwritten by
7452 * the local entries of the m by n scaled submatrix.
7453 *
7454 * IA (global input) INTEGER
7455 * On entry, IA specifies A's global row index, which points to
7456 * the beginning of the submatrix sub( A ).
7457 *
7458 * JA (global input) INTEGER
7459 * On entry, JA specifies A's global column index, which points
7460 * to the beginning of the submatrix sub( A ).
7461 *
7462 * DESCA (global and local input) INTEGER array
7463 * On entry, DESCA is an integer array of dimension DLEN_. This
7464 * is the array descriptor for the matrix A.
7465 *
7466 * -- Written on April 1, 1998 by
7467 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7468 *
7469 * =====================================================================
7470 *
7471 * .. Parameters ..
7472  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7473  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7474  $ RSRC_
7475  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7476  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7477  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7478  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7479 * ..
7480 * .. Local Scalars ..
7481  CHARACTER*1 UPLO
7482  LOGICAL GODOWN, GOLEFT, LOWER, UPPER
7483  INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7484  $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE,
7485  $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00,
7486  $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS,
7487  $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB,
7488  $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB,
7489  $ QNB, TMP1, UPP
7490 * ..
7491 * .. Local Arrays ..
7492  INTEGER DESCA2( DLEN_ )
7493 * ..
7494 * .. External Subroutines ..
7495  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7497 * ..
7498 * .. External Functions ..
7499  LOGICAL LSAME
7500  INTEGER PB_NUMROC
7501  EXTERNAL lsame, pb_numroc
7502 * ..
7503 * .. Intrinsic Functions ..
7504  INTRINSIC min
7505 * ..
7506 * .. Executable Statements ..
7507 *
7508 * Convert descriptor
7509 *
7510  CALL pb_desctrans( desca, desca2 )
7511 *
7512 * Get grid parameters
7513 *
7514  ictxt = desca2( ctxt_ )
7515  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7516 *
7517 * Quick return if possible
7518 *
7519  IF( m.EQ.0 .OR. n.EQ.0 )
7520  $ RETURN
7521 *
7522  IF( lsame( TYPE, 'L' ) ) then
7523  itype = 1
7524  uplo = TYPE
7525  upper = .false.
7526  lower = .true.
7527  ioffd = 0
7528  ELSE IF( lsame( TYPE, 'U' ) ) then
7529  itype = 2
7530  uplo = TYPE
7531  upper = .true.
7532  lower = .false.
7533  ioffd = 0
7534  ELSE IF( lsame( TYPE, 'H' ) ) then
7535  itype = 3
7536  uplo = 'U'
7537  upper = .true.
7538  lower = .false.
7539  ioffd = 1
7540  ELSE
7541  itype = 0
7542  uplo = 'A'
7543  upper = .true.
7544  lower = .true.
7545  ioffd = 0
7546  END IF
7547 *
7548 * Compute local indexes
7549 *
7550  IF( itype.EQ.0 ) THEN
7551 *
7552 * Full matrix
7553 *
7554  CALL pb_infog2l( ia, ja, desca2, nprow, npcol, myrow, mycol,
7555  $ iia, jja, iarow, iacol )
7556  mp = pb_numroc( m, ia, desca2( imb_ ), desca2( mb_ ), myrow,
7557  $ desca2( rsrc_ ), nprow )
7558  nq = pb_numroc( n, ja, desca2( inb_ ), desca2( nb_ ), mycol,
7559  $ desca2( csrc_ ), npcol )
7560 *
7561  IF( mp.LE.0 .OR. nq.LE.0 )
7562  $ RETURN
7563 *
7564  lda = desca2( lld_ )
7565  ioffa = iia + ( jja - 1 ) * lda
7566 *
7567  CALL pb_slascal( 'All', mp, nq, 0, alpha, a( ioffa ), lda )
7568 *
7569  ELSE
7570 *
7571 * Trapezoidal matrix
7572 *
7573  CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7574  $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7575  $ iacol, mrrow, mrcol )
7576 *
7577  IF( mp.LE.0 .OR. nq.LE.0 )
7578  $ RETURN
7579 *
7580 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC,
7581 * LNBLOC, ILOW, LOW, IUPP, and UPP.
7582 *
7583  mb = desca2( mb_ )
7584  nb = desca2( nb_ )
7585  lda = desca2( lld_ )
7586 *
7587  CALL pb_binfo( ioffd, mp, nq, imb1, inb1, mb, nb, mrrow,
7588  $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
7589  $ lmbloc, lnbloc, ilow, low, iupp, upp )
7590 *
7591  m1 = mp
7592  n1 = nq
7593  ioffa = iia - 1
7594  joffa = jja - 1
7595  iimax = ioffa + mp
7596  jjmax = joffa + nq
7597 *
7598  IF( desca2( rsrc_ ).LT.0 ) THEN
7599  pmb = mb
7600  ELSE
7601  pmb = nprow * mb
7602  END IF
7603  IF( desca2( csrc_ ).LT.0 ) THEN
7604  qnb = nb
7605  ELSE
7606  qnb = npcol * nb
7607  END IF
7608 *
7609 * Handle the first block of rows or columns separately, and
7610 * update LCMT00, MBLKS and NBLKS.
7611 *
7612  godown = ( lcmt00.GT.iupp )
7613  goleft = ( lcmt00.LT.ilow )
7614 *
7615  IF( .NOT.godown .AND. .NOT.goleft ) THEN
7616 *
7617 * LCMT00 >= ILOW && LCMT00 <= IUPP
7618 *
7619  goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7620  godown = .NOT.goleft
7621 *
7622  CALL pb_slascal( uplo, imbloc, inbloc, lcmt00, alpha,
7623  $ a( iia+joffa*lda ), lda )
7624  IF( godown ) THEN
7625  IF( upper .AND. nq.GT.inbloc )
7626  $ CALL pb_slascal( 'All', imbloc, nq-inbloc, 0, alpha,
7627  $ a( iia+(joffa+inbloc)*lda ), lda )
7628  iia = iia + imbloc
7629  m1 = m1 - imbloc
7630  ELSE
7631  IF( lower .AND. mp.GT.imbloc )
7632  $ CALL pb_slascal( 'All', mp-imbloc, inbloc, 0, alpha,
7633  $ a( iia+imbloc+joffa*lda ), lda )
7634  jja = jja + inbloc
7635  n1 = n1 - inbloc
7636  END IF
7637 *
7638  END IF
7639 *
7640  IF( godown ) THEN
7641 *
7642  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7643  mblks = mblks - 1
7644  ioffa = ioffa + imbloc
7645 *
7646  10 CONTINUE
7647  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7648  lcmt00 = lcmt00 - pmb
7649  mblks = mblks - 1
7650  ioffa = ioffa + mb
7651  GO TO 10
7652  END IF
7653 *
7654  tmp1 = min( ioffa, iimax ) - iia + 1
7655  IF( upper .AND. tmp1.GT.0 ) THEN
7656  CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7657  $ a( iia+joffa*lda ), lda )
7658  iia = iia + tmp1
7659  m1 = m1 - tmp1
7660  END IF
7661 *
7662  IF( mblks.LE.0 )
7663  $ RETURN
7664 *
7665  lcmt = lcmt00
7666  mblkd = mblks
7667  ioffd = ioffa
7668 *
7669  mbloc = mb
7670  20 CONTINUE
7671  IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7672  IF( mblkd.EQ.1 )
7673  $ mbloc = lmbloc
7674  CALL pb_slascal( uplo, mbloc, inbloc, lcmt, alpha,
7675  $ a( ioffd+1+joffa*lda ), lda )
7676  lcmt00 = lcmt
7677  lcmt = lcmt - pmb
7678  mblks = mblkd
7679  mblkd = mblkd - 1
7680  ioffa = ioffd
7681  ioffd = ioffd + mbloc
7682  GO TO 20
7683  END IF
7684 *
7685  tmp1 = m1 - ioffd + iia - 1
7686  IF( lower .AND. tmp1.GT.0 )
7687  $ CALL pb_slascal( 'All', tmp1, inbloc, 0, alpha,
7688  $ a( ioffd+1+joffa*lda ), lda )
7689 *
7690  tmp1 = ioffa - iia + 1
7691  m1 = m1 - tmp1
7692  n1 = n1 - inbloc
7693  lcmt00 = lcmt00 + low - ilow + qnb
7694  nblks = nblks - 1
7695  joffa = joffa + inbloc
7696 *
7697  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7698  $ CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7699  $ a( iia+joffa*lda ), lda )
7700 *
7701  iia = ioffa + 1
7702  jja = joffa + 1
7703 *
7704  ELSE IF( goleft ) THEN
7705 *
7706  lcmt00 = lcmt00 + low - ilow + qnb
7707  nblks = nblks - 1
7708  joffa = joffa + inbloc
7709 *
7710  30 CONTINUE
7711  IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7712  lcmt00 = lcmt00 + qnb
7713  nblks = nblks - 1
7714  joffa = joffa + nb
7715  GO TO 30
7716  END IF
7717 *
7718  tmp1 = min( joffa, jjmax ) - jja + 1
7719  IF( lower .AND. tmp1.GT.0 ) THEN
7720  CALL pb_slascal( 'All', m1, tmp1, 0, alpha,
7721  $ a( iia+(jja-1)*lda ), lda )
7722  jja = jja + tmp1
7723  n1 = n1 - tmp1
7724  END IF
7725 *
7726  IF( nblks.LE.0 )
7727  $ RETURN
7728 *
7729  lcmt = lcmt00
7730  nblkd = nblks
7731  joffd = joffa
7732 *
7733  nbloc = nb
7734  40 CONTINUE
7735  IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7736  IF( nblkd.EQ.1 )
7737  $ nbloc = lnbloc
7738  CALL pb_slascal( uplo, imbloc, nbloc, lcmt, alpha,
7739  $ a( iia+joffd*lda ), lda )
7740  lcmt00 = lcmt
7741  lcmt = lcmt + qnb
7742  nblks = nblkd
7743  nblkd = nblkd - 1
7744  joffa = joffd
7745  joffd = joffd + nbloc
7746  GO TO 40
7747  END IF
7748 *
7749  tmp1 = n1 - joffd + jja - 1
7750  IF( upper .AND. tmp1.GT.0 )
7751  $ CALL pb_slascal( 'All', imbloc, tmp1, 0, alpha,
7752  $ a( iia+joffd*lda ), lda )
7753 *
7754  tmp1 = joffa - jja + 1
7755  m1 = m1 - imbloc
7756  n1 = n1 - tmp1
7757  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7758  mblks = mblks - 1
7759  ioffa = ioffa + imbloc
7760 *
7761  IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7762  $ CALL pb_slascal( 'All', m1, tmp1, 0, alpha,
7763  $ a( ioffa+1+(jja-1)*lda ), lda )
7764 *
7765  iia = ioffa + 1
7766  jja = joffa + 1
7767 *
7768  END IF
7769 *
7770  nbloc = nb
7771  50 CONTINUE
7772  IF( nblks.GT.0 ) THEN
7773  IF( nblks.EQ.1 )
7774  $ nbloc = lnbloc
7775  60 CONTINUE
7776  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7777  lcmt00 = lcmt00 - pmb
7778  mblks = mblks - 1
7779  ioffa = ioffa + mb
7780  GO TO 60
7781  END IF
7782 *
7783  tmp1 = min( ioffa, iimax ) - iia + 1
7784  IF( upper .AND. tmp1.GT.0 ) THEN
7785  CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7786  $ a( iia+joffa*lda ), lda )
7787  iia = iia + tmp1
7788  m1 = m1 - tmp1
7789  END IF
7790 *
7791  IF( mblks.LE.0 )
7792  $ RETURN
7793 *
7794  lcmt = lcmt00
7795  mblkd = mblks
7796  ioffd = ioffa
7797 *
7798  mbloc = mb
7799  70 CONTINUE
7800  IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7801  IF( mblkd.EQ.1 )
7802  $ mbloc = lmbloc
7803  CALL pb_slascal( uplo, mbloc, nbloc, lcmt, alpha,
7804  $ a( ioffd+1+joffa*lda ), lda )
7805  lcmt00 = lcmt
7806  lcmt = lcmt - pmb
7807  mblks = mblkd
7808  mblkd = mblkd - 1
7809  ioffa = ioffd
7810  ioffd = ioffd + mbloc
7811  GO TO 70
7812  END IF
7813 *
7814  tmp1 = m1 - ioffd + iia - 1
7815  IF( lower .AND. tmp1.GT.0 )
7816  $ CALL pb_slascal( 'All', tmp1, nbloc, 0, alpha,
7817  $ a( ioffd+1+joffa*lda ), lda )
7818 *
7819  tmp1 = min( ioffa, iimax ) - iia + 1
7820  m1 = m1 - tmp1
7821  n1 = n1 - nbloc
7822  lcmt00 = lcmt00 + qnb
7823  nblks = nblks - 1
7824  joffa = joffa + nbloc
7825 *
7826  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7827  $ CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7828  $ a( iia+joffa*lda ), lda )
7829 *
7830  iia = ioffa + 1
7831  jja = joffa + 1
7832 *
7833  GO TO 50
7834 *
7835  END IF
7836 *
7837  END IF
7838 *
7839  RETURN
7840 *
7841 * End of PSLASCAL
7842 *
7843  END
7844  SUBROUTINE pslagen( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA,
7845  $ DESCA, IASEED, A, LDA )
7847 * -- PBLAS test routine (version 2.0) --
7848 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7849 * and University of California, Berkeley.
7850 * April 1, 1998
7851 *
7852 * .. Scalar Arguments ..
7853  LOGICAL inplace
7854  CHARACTER*1 aform, diag
7855  INTEGER ia, iaseed, ja, lda, m, n, offa
7856 * ..
7857 * .. Array Arguments ..
7858  INTEGER desca( * )
7859  REAL A( LDA, * )
7860 * ..
7861 *
7862 * Purpose
7863 * =======
7864 *
7865 * PSLAGEN generates (or regenerates) a submatrix sub( A ) denoting
7866 * A(IA:IA+M-1,JA:JA+N-1).
7867 *
7868 * Notes
7869 * =====
7870 *
7871 * A description vector is associated with each 2D block-cyclicly dis-
7872 * tributed matrix. This vector stores the information required to
7873 * establish the mapping between a matrix entry and its corresponding
7874 * process and memory location.
7875 *
7876 * In the following comments, the character _ should be read as
7877 * "of the distributed matrix". Let A be a generic term for any 2D
7878 * block cyclicly distributed matrix. Its description vector is DESCA:
7879 *
7880 * NOTATION STORED IN EXPLANATION
7881 * ---------------- --------------- ------------------------------------
7882 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7883 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7884 * the NPROW x NPCOL BLACS process grid
7885 * A is distributed over. The context
7886 * itself is global, but the handle
7887 * (the integer value) may vary.
7888 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
7889 * ted matrix A, M_A >= 0.
7890 * N_A (global) DESCA( N_ ) The number of columns in the distri-
7891 * buted matrix A, N_A >= 0.
7892 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7893 * block of the matrix A, IMB_A > 0.
7894 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
7895 * left block of the matrix A,
7896 * INB_A > 0.
7897 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7898 * bute the last M_A-IMB_A rows of A,
7899 * MB_A > 0.
7900 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7901 * bute the last N_A-INB_A columns of
7902 * A, NB_A > 0.
7903 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7904 * row of the matrix A is distributed,
7905 * NPROW > RSRC_A >= 0.
7906 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7907 * first column of A is distributed.
7908 * NPCOL > CSRC_A >= 0.
7909 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7910 * array storing the local blocks of
7911 * the distributed matrix A,
7912 * IF( Lc( 1, N_A ) > 0 )
7913 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
7914 * ELSE
7915 * LLD_A >= 1.
7916 *
7917 * Let K be the number of rows of a matrix A starting at the global in-
7918 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7919 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7920 * receive if these K rows were distributed over NPROW processes. If K
7921 * is the number of columns of a matrix A starting at the global index
7922 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7923 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7924 * these K columns were distributed over NPCOL processes.
7925 *
7926 * The values of Lr() and Lc() may be determined via a call to the func-
7927 * tion PB_NUMROC:
7928 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7929 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7930 *
7931 * Arguments
7932 * =========
7933 *
7934 * INPLACE (global input) LOGICAL
7935 * On entry, INPLACE specifies if the matrix should be generated
7936 * in place or not. If INPLACE is .TRUE., the local random array
7937 * to be generated will start in memory at the local memory lo-
7938 * cation A( 1, 1 ), otherwise it will start at the local posi-
7939 * tion induced by IA and JA.
7940 *
7941 * AFORM (global input) CHARACTER*1
7942 * On entry, AFORM specifies the type of submatrix to be genera-
7943 * ted as follows:
7944 * AFORM = 'S', sub( A ) is a symmetric matrix,
7945 * AFORM = 'H', sub( A ) is a Hermitian matrix,
7946 * AFORM = 'T', sub( A ) is overrwritten with the transpose
7947 * of what would normally be generated,
7948 * AFORM = 'C', sub( A ) is overwritten with the conjugate
7949 * transpose of what would normally be genera-
7950 * ted.
7951 * AFORM = 'N', a random submatrix is generated.
7952 *
7953 * DIAG (global input) CHARACTER*1
7954 * On entry, DIAG specifies if the generated submatrix is diago-
7955 * nally dominant or not as follows:
7956 * DIAG = 'D' : sub( A ) is diagonally dominant,
7957 * DIAG = 'N' : sub( A ) is not diagonally dominant.
7958 *
7959 * OFFA (global input) INTEGER
7960 * On entry, OFFA specifies the offdiagonal of the underlying
7961 * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma-
7962 * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0
7963 * specifies the main diagonal, OFFA > 0 specifies a subdiago-
7964 * nal, and OFFA < 0 specifies a superdiagonal (see further de-
7965 * tails).
7966 *
7967 * M (global input) INTEGER
7968 * On entry, M specifies the global number of matrix rows of the
7969 * submatrix sub( A ) to be generated. M must be at least zero.
7970 *
7971 * N (global input) INTEGER
7972 * On entry, N specifies the global number of matrix columns of
7973 * the submatrix sub( A ) to be generated. N must be at least
7974 * zero.
7975 *
7976 * IA (global input) INTEGER
7977 * On entry, IA specifies A's global row index, which points to
7978 * the beginning of the submatrix sub( A ).
7979 *
7980 * JA (global input) INTEGER
7981 * On entry, JA specifies A's global column index, which points
7982 * to the beginning of the submatrix sub( A ).
7983 *
7984 * DESCA (global and local input) INTEGER array
7985 * On entry, DESCA is an integer array of dimension DLEN_. This
7986 * is the array descriptor for the matrix A.
7987 *
7988 * IASEED (global input) INTEGER
7989 * On entry, IASEED specifies the seed number to generate the
7990 * matrix A. IASEED must be at least zero.
7991 *
7992 * A (local output) REAL array
7993 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7994 * at least Lc( 1, JA+N-1 ). On exit, this array contains the
7995 * local entries of the randomly generated submatrix sub( A ).
7996 *
7997 * LDA (local input) INTEGER
7998 * On entry, LDA specifies the local leading dimension of the
7999 * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_).
8000 * This restriction is however not enforced, and this subroutine
8001 * requires only that LDA >= MAX( 1, Mp ) where
8002 *
8003 * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ).
8004 *
8005 * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW
8006 * and NPCOL can be determined by calling the BLACS subroutine
8007 * BLACS_GRIDINFO.
8008 *
8009 * Further Details
8010 * ===============
8011 *
8012 * OFFD is tied to the matrix described by DESCA, as opposed to the
8013 * piece that is currently (re)generated. This is a global information
8014 * independent from the distribution parameters. Below are examples of
8015 * the meaning of OFFD for a global 7 by 5 matrix:
8016 *
8017 * ---------------------------------------------------------------------
8018 * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4
8019 * -------|-------------------------------------------------------------
8020 * | | OFFD=-1 | OFFD=0 OFFD=2
8021 * | V V
8022 * 0 | . d . . . -> d . . . . . . . . .
8023 * 1 | . . d . . . d . . . . . . . .
8024 * 2 | . . . d . . . d . . -> d . . . .
8025 * 3 | . . . . d . . . d . . d . . .
8026 * 4 | . . . . . . . . . d . . d . .
8027 * 5 | . . . . . . . . . . . . . d .
8028 * 6 | . . . . . . . . . . . . . . d
8029 * ---------------------------------------------------------------------
8030 *
8031 * -- Written on April 1, 1998 by
8032 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8033 *
8034 * =====================================================================
8035 *
8036 * .. Parameters ..
8037  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8038  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8039  $ RSRC_
8040  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8041  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8042  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8043  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8044  INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
8045  $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
8046  $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
8047  PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
8048  $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
8049  $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
8050  $ jmp_nqnb = 10, jmp_nqinbloc = 11,
8051  $ jmp_len = 11 )
8052 * ..
8053 * .. Local Scalars ..
8054  LOGICAL DIAGDO, SYMM, HERM, NOTRAN
8055  INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK,
8056  $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB,
8057  $ inb1, inbloc, inbvir, info, ioffda, itmp, iupp,
8058  $ ivir, jja, jlocblk, jlocoff, jvir, lcmt00,
8059  $ lmbloc, lnbloc, low, maxmn, mb, mblks, mp,
8060  $ mrcol, mrrow, mycdist, mycol, myrdist, myrow,
8061  $ nb, nblks, npcol, nprow, nq, nvir, rsrc, upp
8062  REAL ALPHA
8063 * ..
8064 * .. Local Arrays ..
8065  INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ),
8066  $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 )
8067 * ..
8068 * .. External Subroutines ..
8069  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8073  $ pxerbla
8074 * ..
8075 * .. External Functions ..
8076  LOGICAL LSAME
8077  EXTERNAL LSAME
8078 * ..
8079 * .. Intrinsic Functions ..
8080  INTRINSIC MAX, MIN, REAL
8081 * ..
8082 * .. Data Statements ..
8083  DATA ( muladd0( i ), i = 1, 4 ) / 20077, 16838,
8084  $ 12345, 0 /
8085 * ..
8086 * .. Executable Statements ..
8087 *
8088 * Convert descriptor
8089 *
8090  CALL pb_desctrans( desca, desca2 )
8091 *
8092 * Test the input arguments
8093 *
8094  ictxt = desca2( ctxt_ )
8095  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8096 *
8097 * Test the input parameters
8098 *
8099  info = 0
8100  IF( nprow.EQ.-1 ) THEN
8101  info = -( 1000 + ctxt_ )
8102  ELSE
8103  symm = lsame( aform, 'S' )
8104  herm = lsame( aform, 'H' )
8105  notran = lsame( aform, 'N' )
8106  diagdo = lsame( diag, 'D' )
8107  IF( .NOT.( symm.OR.herm.OR.notran ) .AND.
8108  $ .NOT.( lsame( aform, 'T' ) ) .AND.
8109  $ .NOT.( lsame( aform, 'C' ) ) ) THEN
8110  info = -2
8111  ELSE IF( ( .NOT.diagdo ) .AND.
8112  $ ( .NOT.lsame( diag, 'N' ) ) ) THEN
8113  info = -3
8114  END IF
8115  CALL pb_chkmat( ictxt, m, 5, n, 6, ia, ja, desca2, 10, info )
8116  END IF
8117 *
8118  IF( info.NE.0 ) THEN
8119  CALL pxerbla( ictxt, 'PSLAGEN', -info )
8120  RETURN
8121  END IF
8122 *
8123 * Quick return if possible
8124 *
8125  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8126  $ RETURN
8127 *
8128 * Start the operations
8129 *
8130  mb = desca2( mb_ )
8131  nb = desca2( nb_ )
8132  imb = desca2( imb_ )
8133  inb = desca2( inb_ )
8134  rsrc = desca2( rsrc_ )
8135  csrc = desca2( csrc_ )
8136 *
8137 * Figure out local information about the distributed matrix operand
8138 *
8139  CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
8140  $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
8141  $ iacol, mrrow, mrcol )
8142 *
8143 * Decide where the entries shall be stored in memory
8144 *
8145  IF( inplace ) THEN
8146  iia = 1
8147  jja = 1
8148  END IF
8149 *
8150 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8151 * ILOW, LOW, IUPP, and UPP.
8152 *
8153  ioffda = ja + offa - ia
8154  CALL pb_binfo( ioffda, mp, nq, imb1, inb1, mb, nb, mrrow,
8155  $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
8156  $ lmbloc, lnbloc, ilow, low, iupp, upp )
8157 *
8158 * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST
8159 * This values correspond to the square virtual underlying matrix
8160 * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used
8161 * to set up the random sequence. For practical purposes, the size
8162 * of this virtual matrix is upper bounded by M_ + N_ - 1.
8163 *
8164  itmp = max( 0, -offa )
8165  ivir = ia + itmp
8166  imbvir = imb + itmp
8167  nvir = desca2( m_ ) + itmp
8168 *
8169  CALL pb_locinfo( ivir, imbvir, mb, myrow, rsrc, nprow, ilocblk,
8170  $ ilocoff, myrdist )
8171 *
8172  itmp = max( 0, offa )
8173  jvir = ja + itmp
8174  inbvir = inb + itmp
8175  nvir = max( max( nvir, desca2( n_ ) + itmp ),
8176  $ desca2( m_ ) + desca2( n_ ) - 1 )
8177 *
8178  CALL pb_locinfo( jvir, inbvir, nb, mycol, csrc, npcol, jlocblk,
8179  $ jlocoff, mycdist )
8180 *
8181  IF( symm .OR. herm .OR. notran ) THEN
8182 *
8183  CALL pb_initjmp( .true., nvir, imbvir, inbvir, imbloc, inbloc,
8184  $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8185 *
8186 * Compute constants to jump JMP( * ) numbers in the sequence
8187 *
8188  CALL pb_initmuladd( muladd0, jmp, imuladd )
8189 *
8190 * Compute and set the random value corresponding to A( IA, JA )
8191 *
8192  CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8193  $ myrdist, mycdist, nprow, npcol, jmp,
8194  $ imuladd, iran )
8195 *
8196  CALL pb_slagen( 'Lower', aform, a( iia, jja ), lda, lcmt00,
8197  $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8198  $ nb, lnbloc, jmp, imuladd )
8199 *
8200  END IF
8201 *
8202  IF( symm .OR. herm .OR. ( .NOT. notran ) ) THEN
8203 *
8204  CALL pb_initjmp( .false., nvir, imbvir, inbvir, imbloc, inbloc,
8205  $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8206 *
8207 * Compute constants to jump JMP( * ) numbers in the sequence
8208 *
8209  CALL pb_initmuladd( muladd0, jmp, imuladd )
8210 *
8211 * Compute and set the random value corresponding to A( IA, JA )
8212 *
8213  CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8214  $ myrdist, mycdist, nprow, npcol, jmp,
8215  $ imuladd, iran )
8216 *
8217  CALL pb_slagen( 'Upper', aform, a( iia, jja ), lda, lcmt00,
8218  $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8219  $ nb, lnbloc, jmp, imuladd )
8220 *
8221  END IF
8222 *
8223  IF( diagdo ) THEN
8224 *
8225  maxmn = max( desca2( m_ ), desca2( n_ ) )
8226  alpha = real( maxmn )
8227 *
8228  IF( ioffda.GE.0 ) THEN
8229  CALL psladom( inplace, min( max( 0, m-ioffda ), n ), alpha,
8230  $ a, min( ia+ioffda, ia+m-1 ), ja, desca )
8231  ELSE
8232  CALL psladom( inplace, min( m, max( 0, n+ioffda ) ), alpha,
8233  $ a, ia, min( ja-ioffda, ja+n-1 ), desca )
8234  END IF
8235 *
8236  END IF
8237 *
8238  RETURN
8239 *
8240 * End of PSLAGEN
8241 *
8242  END
8243  SUBROUTINE psladom( INPLACE, N, ALPHA, A, IA, JA, DESCA )
8245 * -- PBLAS test routine (version 2.0) --
8246 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8247 * and University of California, Berkeley.
8248 * April 1, 1998
8249 *
8250 * .. Scalar Arguments ..
8251  LOGICAL INPLACE
8252  INTEGER IA, JA, N
8253  REAL ALPHA
8254 * ..
8255 * .. Array Arguments ..
8256  INTEGER DESCA( * )
8257  REAL A( * )
8258 * ..
8259 *
8260 * Purpose
8261 * =======
8262 *
8263 * PSLADOM adds alpha to the diagonal entries of an n by n submatrix
8264 * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ).
8265 *
8266 * Notes
8267 * =====
8268 *
8269 * A description vector is associated with each 2D block-cyclicly dis-
8270 * tributed matrix. This vector stores the information required to
8271 * establish the mapping between a matrix entry and its corresponding
8272 * process and memory location.
8273 *
8274 * In the following comments, the character _ should be read as
8275 * "of the distributed matrix". Let A be a generic term for any 2D
8276 * block cyclicly distributed matrix. Its description vector is DESCA:
8277 *
8278 * NOTATION STORED IN EXPLANATION
8279 * ---------------- --------------- ------------------------------------
8280 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8281 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8282 * the NPROW x NPCOL BLACS process grid
8283 * A is distributed over. The context
8284 * itself is global, but the handle
8285 * (the integer value) may vary.
8286 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
8287 * ted matrix A, M_A >= 0.
8288 * N_A (global) DESCA( N_ ) The number of columns in the distri-
8289 * buted matrix A, N_A >= 0.
8290 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8291 * block of the matrix A, IMB_A > 0.
8292 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
8293 * left block of the matrix A,
8294 * INB_A > 0.
8295 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8296 * bute the last M_A-IMB_A rows of A,
8297 * MB_A > 0.
8298 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8299 * bute the last N_A-INB_A columns of
8300 * A, NB_A > 0.
8301 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8302 * row of the matrix A is distributed,
8303 * NPROW > RSRC_A >= 0.
8304 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8305 * first column of A is distributed.
8306 * NPCOL > CSRC_A >= 0.
8307 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8308 * array storing the local blocks of
8309 * the distributed matrix A,
8310 * IF( Lc( 1, N_A ) > 0 )
8311 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
8312 * ELSE
8313 * LLD_A >= 1.
8314 *
8315 * Let K be the number of rows of a matrix A starting at the global in-
8316 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8317 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8318 * receive if these K rows were distributed over NPROW processes. If K
8319 * is the number of columns of a matrix A starting at the global index
8320 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8321 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8322 * these K columns were distributed over NPCOL processes.
8323 *
8324 * The values of Lr() and Lc() may be determined via a call to the func-
8325 * tion PB_NUMROC:
8326 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8327 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8328 *
8329 * Arguments
8330 * =========
8331 *
8332 * INPLACE (global input) LOGICAL
8333 * On entry, INPLACE specifies if the matrix should be generated
8334 * in place or not. If INPLACE is .TRUE., the local random array
8335 * to be generated will start in memory at the local memory lo-
8336 * cation A( 1, 1 ), otherwise it will start at the local posi-
8337 * tion induced by IA and JA.
8338 *
8339 * N (global input) INTEGER
8340 * On entry, N specifies the global order of the submatrix
8341 * sub( A ) to be modified. N must be at least zero.
8342 *
8343 * ALPHA (global input) REAL
8344 * On entry, ALPHA specifies the scalar alpha.
8345 *
8346 * A (local input/local output) REAL array
8347 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8348 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
8349 * the local entries of the matrix A. On exit, the local entries
8350 * of this array corresponding to the main diagonal of sub( A )
8351 * have been updated.
8352 *
8353 * IA (global input) INTEGER
8354 * On entry, IA specifies A's global row index, which points to
8355 * the beginning of the submatrix sub( A ).
8356 *
8357 * JA (global input) INTEGER
8358 * On entry, JA specifies A's global column index, which points
8359 * to the beginning of the submatrix sub( A ).
8360 *
8361 * DESCA (global and local input) INTEGER array
8362 * On entry, DESCA is an integer array of dimension DLEN_. This
8363 * is the array descriptor for the matrix A.
8364 *
8365 * -- Written on April 1, 1998 by
8366 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8367 *
8368 * =====================================================================
8369 *
8370 * .. Parameters ..
8371  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8372  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8373  $ RSRC_
8374  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8375  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8376  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8377  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8378 * ..
8379 * .. Local Scalars ..
8380  LOGICAL GODOWN, GOLEFT
8381  INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW,
8382  $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP,
8383  $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1,
8384  $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC,
8385  $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS,
8386  $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP
8387  REAL ATMP
8388 * ..
8389 * .. Local Scalars ..
8390  INTEGER DESCA2( DLEN_ )
8391 * ..
8392 * .. External Subroutines ..
8393  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8394  $ pb_desctrans
8395 * ..
8396 * .. Intrinsic Functions ..
8397  INTRINSIC abs, max, min
8398 * ..
8399 * .. Executable Statements ..
8400 *
8401 * Convert descriptor
8402 *
8403  CALL pb_desctrans( desca, desca2 )
8404 *
8405 * Get grid parameters
8406 *
8407  ictxt = desca2( ctxt_ )
8408  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8409 *
8410  IF( n.EQ.0 )
8411  $ RETURN
8412 *
8413  CALL pb_ainfog2l( n, n, ia, ja, desca2, nprow, npcol, myrow,
8414  $ mycol, imb1, inb1, np, nq, iia, jja, iarow,
8415  $ iacol, mrrow, mrcol )
8416 *
8417 * Decide where the entries shall be stored in memory
8418 *
8419  IF( inplace ) THEN
8420  iia = 1
8421  jja = 1
8422  END IF
8423 *
8424 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8425 * ILOW, LOW, IUPP, and UPP.
8426 *
8427  mb = desca2( mb_ )
8428  nb = desca2( nb_ )
8429 *
8430  CALL pb_binfo( 0, np, nq, imb1, inb1, mb, nb, mrrow, mrcol,
8431  $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
8432  $ lnbloc, ilow, low, iupp, upp )
8433 *
8434  ioffa = iia - 1
8435  joffa = jja - 1
8436  lda = desca2( lld_ )
8437  ldap1 = lda + 1
8438 *
8439  IF( desca2( rsrc_ ).LT.0 ) THEN
8440  pmb = mb
8441  ELSE
8442  pmb = nprow * mb
8443  END IF
8444  IF( desca2( csrc_ ).LT.0 ) THEN
8445  qnb = nb
8446  ELSE
8447  qnb = npcol * nb
8448  END IF
8449 *
8450 * Handle the first block of rows or columns separately, and update
8451 * LCMT00, MBLKS and NBLKS.
8452 *
8453  godown = ( lcmt00.GT.iupp )
8454  goleft = ( lcmt00.LT.ilow )
8455 *
8456  IF( .NOT.godown .AND. .NOT.goleft ) THEN
8457 *
8458 * LCMT00 >= ILOW && LCMT00 <= IUPP
8459 *
8460  IF( lcmt00.GE.0 ) THEN
8461  ijoffa = ioffa+lcmt00 + ( joffa - 1 ) * lda
8462  DO 10 i = 1, min( inbloc, max( 0, imbloc - lcmt00 ) )
8463  atmp = a( ijoffa + i*ldap1 )
8464  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8465  10 CONTINUE
8466  ELSE
8467  ijoffa = ioffa + ( joffa - lcmt00 - 1 ) * lda
8468  DO 20 i = 1, min( imbloc, max( 0, inbloc + lcmt00 ) )
8469  atmp = a( ijoffa + i*ldap1 )
8470  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8471  20 CONTINUE
8472  END IF
8473  goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
8474  godown = .NOT.goleft
8475 *
8476  END IF
8477 *
8478  IF( godown ) THEN
8479 *
8480  lcmt00 = lcmt00 - ( iupp - upp + pmb )
8481  mblks = mblks - 1
8482  ioffa = ioffa + imbloc
8483 *
8484  30 CONTINUE
8485  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8486  lcmt00 = lcmt00 - pmb
8487  mblks = mblks - 1
8488  ioffa = ioffa + mb
8489  GO TO 30
8490  END IF
8491 *
8492  lcmt = lcmt00
8493  mblkd = mblks
8494  ioffd = ioffa
8495 *
8496  mbloc = mb
8497  40 CONTINUE
8498  IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
8499  IF( mblkd.EQ.1 )
8500  $ mbloc = lmbloc
8501  IF( lcmt.GE.0 ) THEN
8502  ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8503  DO 50 i = 1, min( inbloc, max( 0, mbloc - lcmt ) )
8504  atmp = a( ijoffa + i*ldap1 )
8505  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8506  50 CONTINUE
8507  ELSE
8508  ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8509  DO 60 i = 1, min( mbloc, max( 0, inbloc + lcmt ) )
8510  atmp = a( ijoffa + i*ldap1 )
8511  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8512  60 CONTINUE
8513  END IF
8514  lcmt00 = lcmt
8515  lcmt = lcmt - pmb
8516  mblks = mblkd
8517  mblkd = mblkd - 1
8518  ioffa = ioffd
8519  ioffd = ioffd + mbloc
8520  GO TO 40
8521  END IF
8522 *
8523  lcmt00 = lcmt00 + low - ilow + qnb
8524  nblks = nblks - 1
8525  joffa = joffa + inbloc
8526 *
8527  ELSE IF( goleft ) THEN
8528 *
8529  lcmt00 = lcmt00 + low - ilow + qnb
8530  nblks = nblks - 1
8531  joffa = joffa + inbloc
8532 *
8533  70 CONTINUE
8534  IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
8535  lcmt00 = lcmt00 + qnb
8536  nblks = nblks - 1
8537  joffa = joffa + nb
8538  GO TO 70
8539  END IF
8540 *
8541  lcmt = lcmt00
8542  nblkd = nblks
8543  joffd = joffa
8544 *
8545  nbloc = nb
8546  80 CONTINUE
8547  IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
8548  IF( nblkd.EQ.1 )
8549  $ nbloc = lnbloc
8550  IF( lcmt.GE.0 ) THEN
8551  ijoffa = ioffa + lcmt + ( joffd - 1 ) * lda
8552  DO 90 i = 1, min( nbloc, max( 0, imbloc - lcmt ) )
8553  atmp = a( ijoffa + i*ldap1 )
8554  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8555  90 CONTINUE
8556  ELSE
8557  ijoffa = ioffa + ( joffd - lcmt - 1 ) * lda
8558  DO 100 i = 1, min( imbloc, max( 0, nbloc + lcmt ) )
8559  atmp = a( ijoffa + i*ldap1 )
8560  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8561  100 CONTINUE
8562  END IF
8563  lcmt00 = lcmt
8564  lcmt = lcmt + qnb
8565  nblks = nblkd
8566  nblkd = nblkd - 1
8567  joffa = joffd
8568  joffd = joffd + nbloc
8569  GO TO 80
8570  END IF
8571 *
8572  lcmt00 = lcmt00 - ( iupp - upp + pmb )
8573  mblks = mblks - 1
8574  ioffa = ioffa + imbloc
8575 *
8576  END IF
8577 *
8578  nbloc = nb
8579  110 CONTINUE
8580  IF( nblks.GT.0 ) THEN
8581  IF( nblks.EQ.1 )
8582  $ nbloc = lnbloc
8583  120 CONTINUE
8584  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8585  lcmt00 = lcmt00 - pmb
8586  mblks = mblks - 1
8587  ioffa = ioffa + mb
8588  GO TO 120
8589  END IF
8590 *
8591  lcmt = lcmt00
8592  mblkd = mblks
8593  ioffd = ioffa
8594 *
8595  mbloc = mb
8596  130 CONTINUE
8597  IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
8598  IF( mblkd.EQ.1 )
8599  $ mbloc = lmbloc
8600  IF( lcmt.GE.0 ) THEN
8601  ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8602  DO 140 i = 1, min( nbloc, max( 0, mbloc - lcmt ) )
8603  atmp = a( ijoffa + i*ldap1 )
8604  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8605  140 CONTINUE
8606  ELSE
8607  ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8608  DO 150 i = 1, min( mbloc, max( 0, nbloc + lcmt ) )
8609  atmp = a( ijoffa + i*ldap1 )
8610  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8611  150 CONTINUE
8612  END IF
8613  lcmt00 = lcmt
8614  lcmt = lcmt - pmb
8615  mblks = mblkd
8616  mblkd = mblkd - 1
8617  ioffa = ioffd
8618  ioffd = ioffd + mbloc
8619  GO TO 130
8620  END IF
8621 *
8622  lcmt00 = lcmt00 + qnb
8623  nblks = nblks - 1
8624  joffa = joffa + nbloc
8625  GO TO 110
8626 *
8627  END IF
8628 *
8629  RETURN
8630 *
8631 * End of PSLADOM
8632 *
8633  END
8634  SUBROUTINE pb_pslaprnt( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8635  $ CMATNM, NOUT, WORK )
8637 * -- PBLAS test routine (version 2.0) --
8638 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8639 * and University of California, Berkeley.
8640 * April 1, 1998
8641 *
8642 * .. Scalar Arguments ..
8643  INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT
8644 * ..
8645 * .. Array Arguments ..
8646  CHARACTER*(*) CMATNM
8647  INTEGER DESCA( * )
8648  REAL A( * ), WORK( * )
8649 * ..
8650 *
8651 * Purpose
8652 * =======
8653 *
8654 * PB_PSLAPRNT prints to the standard output a submatrix sub( A ) deno-
8655 * ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by
8656 * the process of coordinates (IRPRNT, ICPRNT).
8657 *
8658 * Notes
8659 * =====
8660 *
8661 * A description vector is associated with each 2D block-cyclicly dis-
8662 * tributed matrix. This vector stores the information required to
8663 * establish the mapping between a matrix entry and its corresponding
8664 * process and memory location.
8665 *
8666 * In the following comments, the character _ should be read as
8667 * "of the distributed matrix". Let A be a generic term for any 2D
8668 * block cyclicly distributed matrix. Its description vector is DESCA:
8669 *
8670 * NOTATION STORED IN EXPLANATION
8671 * ---------------- --------------- ------------------------------------
8672 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8673 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8674 * the NPROW x NPCOL BLACS process grid
8675 * A is distributed over. The context
8676 * itself is global, but the handle
8677 * (the integer value) may vary.
8678 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
8679 * ted matrix A, M_A >= 0.
8680 * N_A (global) DESCA( N_ ) The number of columns in the distri-
8681 * buted matrix A, N_A >= 0.
8682 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8683 * block of the matrix A, IMB_A > 0.
8684 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
8685 * left block of the matrix A,
8686 * INB_A > 0.
8687 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8688 * bute the last M_A-IMB_A rows of A,
8689 * MB_A > 0.
8690 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8691 * bute the last N_A-INB_A columns of
8692 * A, NB_A > 0.
8693 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8694 * row of the matrix A is distributed,
8695 * NPROW > RSRC_A >= 0.
8696 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8697 * first column of A is distributed.
8698 * NPCOL > CSRC_A >= 0.
8699 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8700 * array storing the local blocks of
8701 * the distributed matrix A,
8702 * IF( Lc( 1, N_A ) > 0 )
8703 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
8704 * ELSE
8705 * LLD_A >= 1.
8706 *
8707 * Let K be the number of rows of a matrix A starting at the global in-
8708 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8709 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8710 * receive if these K rows were distributed over NPROW processes. If K
8711 * is the number of columns of a matrix A starting at the global index
8712 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8713 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8714 * these K columns were distributed over NPCOL processes.
8715 *
8716 * The values of Lr() and Lc() may be determined via a call to the func-
8717 * tion PB_NUMROC:
8718 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8719 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8720 *
8721 * Arguments
8722 * =========
8723 *
8724 * M (global input) INTEGER
8725 * On entry, M specifies the number of rows of the submatrix
8726 * sub( A ). M must be at least zero.
8727 *
8728 * N (global input) INTEGER
8729 * On entry, N specifies the number of columns of the submatrix
8730 * sub( A ). N must be at least zero.
8731 *
8732 * A (local input) REAL array
8733 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8734 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
8735 * the local entries of the matrix A.
8736 *
8737 * IA (global input) INTEGER
8738 * On entry, IA specifies A's global row index, which points to
8739 * the beginning of the submatrix sub( A ).
8740 *
8741 * JA (global input) INTEGER
8742 * On entry, JA specifies A's global column index, which points
8743 * to the beginning of the submatrix sub( A ).
8744 *
8745 * DESCA (global and local input) INTEGER array
8746 * On entry, DESCA is an integer array of dimension DLEN_. This
8747 * is the array descriptor for the matrix A.
8748 *
8749 * IRPRNT (global input) INTEGER
8750 * On entry, IRPRNT specifies the row index of the printing pro-
8751 * cess.
8752 *
8753 * ICPRNT (global input) INTEGER
8754 * On entry, ICPRNT specifies the column index of the printing
8755 * process.
8756 *
8757 * CMATNM (global input) CHARACTER*(*)
8758 * On entry, CMATNM is the name of the matrix to be printed.
8759 *
8760 * NOUT (global input) INTEGER
8761 * On entry, NOUT specifies the output unit number. When NOUT is
8762 * equal to 6, the submatrix is printed on the screen.
8763 *
8764 * WORK (local workspace) REAL array
8765 * On entry, WORK is a work array of dimension at least equal to
8766 * MAX( IMB_A, MB_A ).
8767 *
8768 * -- Written on April 1, 1998 by
8769 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8770 *
8771 * =====================================================================
8772 *
8773 * .. Parameters ..
8774  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8775  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8776  $ RSRC_
8777  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8778  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8779  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8780  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8781 * ..
8782 * .. Local Scalars ..
8783  INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW
8784 * ..
8785 * .. Local Arrays ..
8786  INTEGER DESCA2( DLEN_ )
8787 * ..
8788 * .. External Subroutines ..
8789  EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PSLAPRN2
8790 * ..
8791 * .. Executable Statements ..
8792 *
8793 * Quick return if possible
8794 *
8795  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8796  $ RETURN
8797 *
8798 * Convert descriptor
8799 *
8800  CALL pb_desctrans( desca, desca2 )
8801 *
8802  CALL blacs_gridinfo( desca2( ctxt_ ), nprow, npcol, myrow, mycol )
8803 *
8804  IF( desca2( rsrc_ ).GE.0 ) THEN
8805  IF( desca2( csrc_ ).GE.0 ) THEN
8806  CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt, icprnt,
8807  $ cmatnm, nout, desca2( rsrc_ ),
8808  $ desca2( csrc_ ), work )
8809  ELSE
8810  DO 10 pcol = 0, npcol - 1
8811  IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8812  $ WRITE( nout, * ) 'Colum-replicated array -- ' ,
8813  $ 'copy in process column: ', pcol
8814  CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt,
8815  $ icprnt, cmatnm, nout, desca2( rsrc_ ),
8816  $ pcol, work )
8817  10 CONTINUE
8818  END IF
8819  ELSE
8820  IF( desca2( csrc_ ).GE.0 ) THEN
8821  DO 20 prow = 0, nprow - 1
8822  IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8823  $ WRITE( nout, * ) 'Row-replicated array -- ' ,
8824  $ 'copy in process row: ', prow
8825  CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt,
8826  $ icprnt, cmatnm, nout, prow,
8827  $ desca2( csrc_ ), work )
8828  20 CONTINUE
8829  ELSE
8830  DO 40 prow = 0, nprow - 1
8831  DO 30 pcol = 0, npcol - 1
8832  IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8833  $ WRITE( nout, * ) 'Replicated array -- ' ,
8834  $ 'copy in process (', prow, ',', pcol, ')'
8835  CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt,
8836  $ icprnt, cmatnm, nout, prow, pcol,
8837  $ work )
8838  30 CONTINUE
8839  40 CONTINUE
8840  END IF
8841  END IF
8842 *
8843  RETURN
8844 *
8845 * End of PB_PSLAPRNT
8846 *
8847  END
8848  SUBROUTINE pb_pslaprn2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8849  $ CMATNM, NOUT, PROW, PCOL, WORK )
8851 * -- PBLAS test routine (version 2.0) --
8852 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8853 * and University of California, Berkeley.
8854 * April 1, 1998
8855 *
8856 * .. Scalar Arguments ..
8857  INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW
8858 * ..
8859 * .. Array Arguments ..
8860  CHARACTER*(*) CMATNM
8861  INTEGER DESCA( * )
8862  REAL A( * ), WORK( * )
8863 * ..
8864 *
8865 * .. Parameters ..
8866  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8867  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8868  $ RSRC_
8869  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
8870  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8871  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8872  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8873 * ..
8874 * .. Local Scalars ..
8875  LOGICAL AISCOLREP, AISROWREP
8876  INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
8877  $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
8878  $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW
8879 * ..
8880 * .. External Subroutines ..
8881  EXTERNAL blacs_barrier, blacs_gridinfo, pb_infog2l,
8882  $ sgerv2d, sgesd2d
8883 * ..
8884 * .. Intrinsic Functions ..
8885  INTRINSIC min
8886 * ..
8887 * .. Executable Statements ..
8888 *
8889 * Get grid parameters
8890 *
8891  ictxt = desca( ctxt_ )
8892  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8893  CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
8894  $ iia, jja, iarow, iacol )
8895  ii = iia
8896  jj = jja
8897  IF( desca( rsrc_ ).LT.0 ) THEN
8898  aisrowrep = .true.
8899  iarow = prow
8900  icurrow = prow
8901  ELSE
8902  aisrowrep = .false.
8903  icurrow = iarow
8904  END IF
8905  IF( desca( csrc_ ).LT.0 ) THEN
8906  aiscolrep = .true.
8907  iacol = pcol
8908  icurcol = pcol
8909  ELSE
8910  aiscolrep = .false.
8911  icurcol = iacol
8912  END IF
8913  lda = desca( lld_ )
8914  ldw = max( desca( imb_ ), desca( mb_ ) )
8915 *
8916 * Handle the first block of column separately
8917 *
8918  jb = desca( inb_ ) - ja + 1
8919  IF( jb.LE.0 )
8920  $ jb = ( (-jb) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
8921  jb = min( jb, n )
8922  jn = ja+jb-1
8923  DO 60 h = 0, jb-1
8924  ib = desca( imb_ ) - ia + 1
8925  IF( ib.LE.0 )
8926  $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
8927  ib = min( ib, m )
8928  in = ia+ib-1
8929  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8930  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8931  DO 10 k = 0, ib-1
8932  WRITE( nout, fmt = 9999 )
8933  $ cmatnm, ia+k, ja+h, a( ii+k+(jj+h-1)*lda )
8934  10 CONTINUE
8935  END IF
8936  ELSE
8937  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8938  CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ), lda,
8939  $ irprnt, icprnt )
8940  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8941  CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow, icurcol )
8942  DO 20 k = 1, ib
8943  WRITE( nout, fmt = 9999 )
8944  $ cmatnm, ia+k-1, ja+h, work( k )
8945  20 CONTINUE
8946  END IF
8947  END IF
8948  IF( myrow.EQ.icurrow )
8949  $ ii = ii + ib
8950  IF( .NOT.aisrowrep )
8951  $ icurrow = mod( icurrow+1, nprow )
8952  CALL blacs_barrier( ictxt, 'All' )
8953 *
8954 * Loop over remaining block of rows
8955 *
8956  DO 50 i = in+1, ia+m-1, desca( mb_ )
8957  ib = min( desca( mb_ ), ia+m-i )
8958  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8959  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8960  DO 30 k = 0, ib-1
8961  WRITE( nout, fmt = 9999 )
8962  $ cmatnm, i+k, ja+h, a( ii+k+(jj+h-1)*lda )
8963  30 CONTINUE
8964  END IF
8965  ELSE
8966  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8967  CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
8968  $ lda, irprnt, icprnt )
8969  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8970  CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow,
8971  $ icurcol )
8972  DO 40 k = 1, ib
8973  WRITE( nout, fmt = 9999 )
8974  $ cmatnm, i+k-1, ja+h, work( k )
8975  40 CONTINUE
8976  END IF
8977  END IF
8978  IF( myrow.EQ.icurrow )
8979  $ ii = ii + ib
8980  IF( .NOT.aisrowrep )
8981  $ icurrow = mod( icurrow+1, nprow )
8982  CALL blacs_barrier( ictxt, 'All' )
8983  50 CONTINUE
8984 *
8985  ii = iia
8986  icurrow = iarow
8987  60 CONTINUE
8988 *
8989  IF( mycol.EQ.icurcol )
8990  $ jj = jj + jb
8991  IF( .NOT.aiscolrep )
8992  $ icurcol = mod( icurcol+1, npcol )
8993  CALL blacs_barrier( ictxt, 'All' )
8994 *
8995 * Loop over remaining column blocks
8996 *
8997  DO 130 j = jn+1, ja+n-1, desca( nb_ )
8998  jb = min( desca( nb_ ), ja+n-j )
8999  DO 120 h = 0, jb-1
9000  ib = desca( imb_ )-ia+1
9001  IF( ib.LE.0 )
9002  $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
9003  ib = min( ib, m )
9004  in = ia+ib-1
9005  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9006  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9007  DO 70 k = 0, ib-1
9008  WRITE( nout, fmt = 9999 )
9009  $ cmatnm, ia+k, j+h, a( ii+k+(jj+h-1)*lda )
9010  70 CONTINUE
9011  END IF
9012  ELSE
9013  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9014  CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9015  $ lda, irprnt, icprnt )
9016  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9017  CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9018  $ icurcol )
9019  DO 80 k = 1, ib
9020  WRITE( nout, fmt = 9999 )
9021  $ cmatnm, ia+k-1, j+h, work( k )
9022  80 CONTINUE
9023  END IF
9024  END IF
9025  IF( myrow.EQ.icurrow )
9026  $ ii = ii + ib
9027  icurrow = mod( icurrow+1, nprow )
9028  CALL blacs_barrier( ictxt, 'All' )
9029 *
9030 * Loop over remaining block of rows
9031 *
9032  DO 110 i = in+1, ia+m-1, desca( mb_ )
9033  ib = min( desca( mb_ ), ia+m-i )
9034  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9035  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9036  DO 90 k = 0, ib-1
9037  WRITE( nout, fmt = 9999 )
9038  $ cmatnm, i+k, j+h, a( ii+k+(jj+h-1)*lda )
9039  90 CONTINUE
9040  END IF
9041  ELSE
9042  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9043  CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9044  $ lda, irprnt, icprnt )
9045  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9046  CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9047  $ icurcol )
9048  DO 100 k = 1, ib
9049  WRITE( nout, fmt = 9999 )
9050  $ cmatnm, i+k-1, j+h, work( k )
9051  100 CONTINUE
9052  END IF
9053  END IF
9054  IF( myrow.EQ.icurrow )
9055  $ ii = ii + ib
9056  IF( .NOT.aisrowrep )
9057  $ icurrow = mod( icurrow+1, nprow )
9058  CALL blacs_barrier( ictxt, 'All' )
9059  110 CONTINUE
9060 *
9061  ii = iia
9062  icurrow = iarow
9063  120 CONTINUE
9064 *
9065  IF( mycol.EQ.icurcol )
9066  $ jj = jj + jb
9067  IF( .NOT.aiscolrep )
9068  $ icurcol = mod( icurcol+1, npcol )
9069  CALL blacs_barrier( ictxt, 'All' )
9070 *
9071  130 CONTINUE
9072 *
9073  9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8 )
9074 *
9075  RETURN
9076 *
9077 * End of PB_PSLAPRN2
9078 *
9079  END
9080  SUBROUTINE pb_sfillpad( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL )
9082 * -- PBLAS test routine (version 2.0) --
9083 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9084 * and University of California, Berkeley.
9085 * April 1, 1998
9086 *
9087 * .. Scalar Arguments ..
9088  INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9089  REAL CHKVAL
9090 * ..
9091 * .. Array Arguments ..
9092  REAL A( * )
9093 * ..
9094 *
9095 * Purpose
9096 * =======
9097 *
9098 * PB_SFILLPAD surrounds a two dimensional local array with a guard-zone
9099 * initialized to the value CHKVAL. The user may later call the routine
9100 * PB_SCHEKPAD to discover if the guardzone has been violated. There are
9101 * three guardzones. The first is a buffer of size IPRE that is before
9102 * the start of the array. The second is the buffer of size IPOST which
9103 * is after the end of the array to be padded. Finally, there is a guard
9104 * zone inside every column of the array to be padded, in the elements
9105 * of A(M+1:LDA, J).
9106 *
9107 * Arguments
9108 * =========
9109 *
9110 * ICTXT (local input) INTEGER
9111 * On entry, ICTXT specifies the BLACS context handle, indica-
9112 * ting the global context of the operation. The context itself
9113 * is global, but the value of ICTXT is local.
9114 *
9115 * M (local input) INTEGER
9116 * On entry, M specifies the number of rows in the local array
9117 * A. M must be at least zero.
9118 *
9119 * N (local input) INTEGER
9120 * On entry, N specifies the number of columns in the local ar-
9121 * ray A. N must be at least zero.
9122 *
9123 * A (local input/local output) REAL array
9124 * On entry, A is an array of dimension (LDA,N). On exit, this
9125 * array is the padded array.
9126 *
9127 * LDA (local input) INTEGER
9128 * On entry, LDA specifies the leading dimension of the local
9129 * array to be padded. LDA must be at least MAX( 1, M ).
9130 *
9131 * IPRE (local input) INTEGER
9132 * On entry, IPRE specifies the size of the guard zone to put
9133 * before the start of the padded array.
9134 *
9135 * IPOST (local input) INTEGER
9136 * On entry, IPOST specifies the size of the guard zone to put
9137 * after the end of the padded array.
9138 *
9139 * CHKVAL (local input) REAL
9140 * On entry, CHKVAL specifies the value to pad the array with.
9141 *
9142 * -- Written on April 1, 1998 by
9143 * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9144 *
9145 * =====================================================================
9146 *
9147 * .. Local Scalars ..
9148  INTEGER I, J, K
9149 * ..
9150 * .. Executable Statements ..
9151 *
9152 * Put check buffer in front of A
9153 *
9154  IF( IPRE.GT.0 ) THEN
9155  DO 10 I = 1, ipre
9156  a( i ) = chkval
9157  10 CONTINUE
9158  ELSE
9159  WRITE( *, fmt = '(A)' )
9160  $ 'WARNING no pre-guardzone in PB_SFILLPAD'
9161  END IF
9162 *
9163 * Put check buffer in back of A
9164 *
9165  IF( ipost.GT.0 ) THEN
9166  j = ipre+lda*n+1
9167  DO 20 i = j, j+ipost-1
9168  a( i ) = chkval
9169  20 CONTINUE
9170  ELSE
9171  WRITE( *, fmt = '(A)' )
9172  $ 'WARNING no post-guardzone in PB_SFILLPAD'
9173  END IF
9174 *
9175 * Put check buffer in all (LDA-M) gaps
9176 *
9177  IF( lda.GT.m ) THEN
9178  k = ipre + m + 1
9179  DO 40 j = 1, n
9180  DO 30 i = k, k + ( lda - m ) - 1
9181  a( i ) = chkval
9182  30 CONTINUE
9183  k = k + lda
9184  40 CONTINUE
9185  END IF
9186 *
9187  RETURN
9188 *
9189 * End of PB_SFILLPAD
9190 *
9191  END
9192  SUBROUTINE pb_schekpad( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST,
9193  $ CHKVAL )
9195 * -- PBLAS test routine (version 2.0) --
9196 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9197 * and University of California, Berkeley.
9198 * April 1, 1998
9199 *
9200 * .. Scalar Arguments ..
9201  INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9202  REAL CHKVAL
9203 * ..
9204 * .. Array Arguments ..
9205  CHARACTER*(*) MESS
9206  REAL A( * )
9207 * ..
9208 *
9209 * Purpose
9210 * =======
9211 *
9212 * PB_SCHEKPAD checks that the padding around a local array has not been
9213 * overwritten since the call to PB_SFILLPAD. Three types of errors are
9214 * reported:
9215 *
9216 * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has
9217 * occurred in the first IPRE elements which form a buffer before the
9218 * beginning of A. Therefore, the error message:
9219 * 'Overwrite in pre-guardzone: loc( 5) = 18.00000'
9220 * tells that the 5th element of the IPRE long buffer has been overwrit-
9221 * ten with the value 18, where it should still have the value CHKVAL.
9222 *
9223 * 2) Overwrite in post-guardzone. This indicates a memory overwrite has
9224 * occurred in the last IPOST elements which form a buffer after the end
9225 * of A. Error reports are refered from the end of A. Therefore,
9226 * 'Overwrite in post-guardzone: loc( 19) = 24.00000'
9227 * tells that the 19th element after the end of A was overwritten with
9228 * the value 24, where it should still have the value of CHKVAL.
9229 *
9230 * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were
9231 * overwritten. So,
9232 * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000'
9233 * tells that the element at the 12th row and 3rd column of A was over-
9234 * written with the value of 22, where it should still have the value of
9235 * CHKVAL.
9236 *
9237 * Arguments
9238 * =========
9239 *
9240 * ICTXT (local input) INTEGER
9241 * On entry, ICTXT specifies the BLACS context handle, indica-
9242 * ting the global context of the operation. The context itself
9243 * is global, but the value of ICTXT is local.
9244 *
9245 * MESS (local input) CHARACTER*(*)
9246 * On entry, MESS is a ttring containing a user-defined message.
9247 *
9248 * M (local input) INTEGER
9249 * On entry, M specifies the number of rows in the local array
9250 * A. M must be at least zero.
9251 *
9252 * N (local input) INTEGER
9253 * On entry, N specifies the number of columns in the local ar-
9254 * ray A. N must be at least zero.
9255 *
9256 * A (local input) REAL array
9257 * On entry, A is an array of dimension (LDA,N).
9258 *
9259 * LDA (local input) INTEGER
9260 * On entry, LDA specifies the leading dimension of the local
9261 * array to be padded. LDA must be at least MAX( 1, M ).
9262 *
9263 * IPRE (local input) INTEGER
9264 * On entry, IPRE specifies the size of the guard zone to put
9265 * before the start of the padded array.
9266 *
9267 * IPOST (local input) INTEGER
9268 * On entry, IPOST specifies the size of the guard zone to put
9269 * after the end of the padded array.
9270 *
9271 * CHKVAL (local input) REAL
9272 * On entry, CHKVAL specifies the value to pad the array with.
9273 *
9274 *
9275 * -- Written on April 1, 1998 by
9276 * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9277 *
9278 * =====================================================================
9279 *
9280 * .. Local Scalars ..
9281  CHARACTER*1 TOP
9282  INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL,
9283  $ NPROW
9284 * ..
9285 * .. External Subroutines ..
9286  EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET
9287 * ..
9288 * .. Executable Statements ..
9289 *
9290 * Get grid parameters
9291 *
9292  CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
9293  IAM = myrow*npcol + mycol
9294  info = -1
9295 *
9296 * Check buffer in front of A
9297 *
9298  IF( ipre.GT.0 ) THEN
9299  DO 10 i = 1, ipre
9300  IF( a( i ).NE.chkval ) THEN
9301  WRITE( *, fmt = 9998 ) myrow, mycol, mess, ' pre', i,
9302  $ a( i )
9303  info = iam
9304  END IF
9305  10 CONTINUE
9306  ELSE
9307  WRITE( *, fmt = * ) 'WARNING no pre-guardzone in PB_SCHEKPAD'
9308  END IF
9309 *
9310 * Check buffer after A
9311 *
9312  IF( ipost.GT.0 ) THEN
9313  j = ipre+lda*n+1
9314  DO 20 i = j, j+ipost-1
9315  IF( a( i ).NE.chkval ) THEN
9316  WRITE( *, fmt = 9998 ) myrow, mycol, mess, 'post',
9317  $ i-j+1, a( i )
9318  info = iam
9319  END IF
9320  20 CONTINUE
9321  ELSE
9322  WRITE( *, fmt = * )
9323  $ 'WARNING no post-guardzone buffer in PB_SCHEKPAD'
9324  END IF
9325 *
9326 * Check all (LDA-M) gaps
9327 *
9328  IF( lda.GT.m ) THEN
9329  k = ipre + m + 1
9330  DO 40 j = 1, n
9331  DO 30 i = k, k + (lda-m) - 1
9332  IF( a( i ).NE.chkval ) THEN
9333  WRITE( *, fmt = 9997 ) myrow, mycol, mess,
9334  $ i-ipre-lda*(j-1), j, a( i )
9335  info = iam
9336  END IF
9337  30 CONTINUE
9338  k = k + lda
9339  40 CONTINUE
9340  END IF
9341 *
9342  CALL pb_topget( ictxt, 'Combine', 'All', top )
9343  CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, idumm, idumm, -1,
9344  $ 0, 0 )
9345  IF( iam.EQ.0 .AND. info.GE.0 ) THEN
9346  WRITE( *, fmt = 9999 ) info / npcol, mod( info, npcol ), mess
9347  END IF
9348 *
9349  9999 FORMAT( '{', i5, ',', i5, '}: Memory overwrite in ', a )
9350  9998 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9351  $ a4, '-guardzone: loc(', i3, ') = ', g11.4 )
9352  9997 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9353  $ 'lda-m gap: loc(', i3, ',', i3, ') = ', g11.4 )
9354 *
9355  RETURN
9356 *
9357 * End of PB_SCHEKPAD
9358 *
9359  END
9360  SUBROUTINE pb_slaset( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA )
9362 * -- PBLAS test routine (version 2.0) --
9363 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9364 * and University of California, Berkeley.
9365 * April 1, 1998
9366 *
9367 * .. Scalar Arguments ..
9368  CHARACTER*1 UPLO
9369  INTEGER IOFFD, LDA, M, N
9370  REAL ALPHA, BETA
9371 * ..
9372 * .. Array Arguments ..
9373  REAL A( LDA, * )
9374 * ..
9375 *
9376 * Purpose
9377 * =======
9378 *
9379 * PB_SLASET initializes a two-dimensional array A to beta on the diago-
9380 * nal specified by IOFFD and alpha on the offdiagonals.
9381 *
9382 * Arguments
9383 * =========
9384 *
9385 * UPLO (global input) CHARACTER*1
9386 * On entry, UPLO specifies which trapezoidal part of the ar-
9387 * ray A is to be set as follows:
9388 * = 'L' or 'l': Lower triangular part is set; the strictly
9389 * upper triangular part of A is not changed,
9390 * = 'U' or 'u': Upper triangular part is set; the strictly
9391 * lower triangular part of A is not changed,
9392 * = 'D' or 'd' Only the diagonal of A is set,
9393 * Otherwise: All of the array A is set.
9394 *
9395 * M (input) INTEGER
9396 * On entry, M specifies the number of rows of the array A. M
9397 * must be at least zero.
9398 *
9399 * N (input) INTEGER
9400 * On entry, N specifies the number of columns of the array A.
9401 * N must be at least zero.
9402 *
9403 * IOFFD (input) INTEGER
9404 * On entry, IOFFD specifies the position of the offdiagonal de-
9405 * limiting the upper and lower trapezoidal part of A as follows
9406 * (see the notes below):
9407 *
9408 * IOFFD = 0 specifies the main diagonal A( i, i ),
9409 * with i = 1 ... MIN( M, N ),
9410 * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9411 * with i = 1 ... MIN( M-IOFFD, N ),
9412 * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9413 * with i = 1 ... MIN( M, N+IOFFD ).
9414 *
9415 * ALPHA (input) REAL
9416 * On entry, ALPHA specifies the value to which the offdiagonal
9417 * array elements are set to.
9418 *
9419 * BETA (input) REAL
9420 * On entry, BETA specifies the value to which the diagonal ar-
9421 * ray elements are set to.
9422 *
9423 * A (input/output) REAL array
9424 * On entry, A is an array of dimension (LDA,N). Before entry
9425 * with UPLO = 'U' or 'u', the leading m by n part of the array
9426 * A must contain the upper trapezoidal part of the matrix as
9427 * specified by IOFFD to be set, and the strictly lower trape-
9428 * zoidal part of A is not referenced; When IUPLO = 'L' or 'l',
9429 * the leading m by n part of the array A must contain the
9430 * lower trapezoidal part of the matrix as specified by IOFFD to
9431 * be set, and the strictly upper trapezoidal part of A is
9432 * not referenced.
9433 *
9434 * LDA (input) INTEGER
9435 * On entry, LDA specifies the leading dimension of the array A.
9436 * LDA must be at least max( 1, M ).
9437 *
9438 * Notes
9439 * =====
9440 * N N
9441 * ---------------------------- -----------
9442 * | d | | |
9443 * M | d 'U' | | 'U' |
9444 * | 'L' 'D' | |d |
9445 * | d | M | d |
9446 * ---------------------------- | 'D' |
9447 * | d |
9448 * IOFFD < 0 | 'L' d |
9449 * | d|
9450 * N | |
9451 * ----------- -----------
9452 * | d 'U'|
9453 * | d | IOFFD > 0
9454 * M | 'D' |
9455 * | d| N
9456 * | 'L' | ----------------------------
9457 * | | | 'U' |
9458 * | | |d |
9459 * | | | 'D' |
9460 * | | | d |
9461 * | | |'L' d |
9462 * ----------- ----------------------------
9463 *
9464 * -- Written on April 1, 1998 by
9465 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9466 *
9467 * =====================================================================
9468 *
9469 * .. Local Scalars ..
9470  INTEGER I, J, JTMP, MN
9471 * ..
9472 * .. External Functions ..
9473  LOGICAL LSAME
9474  EXTERNAL LSAME
9475 * ..
9476 * .. Intrinsic Functions ..
9477  INTRINSIC MAX, MIN
9478 * ..
9479 * .. Executable Statements ..
9480 *
9481 * Quick return if possible
9482 *
9483  IF( M.LE.0 .OR. N.LE.0 )
9484  $ RETURN
9485 *
9486 * Start the operations
9487 *
9488  IF( LSAME( UPLO, 'L' ) ) THEN
9489 *
9490 * Set the diagonal to BETA and the strictly lower triangular
9491 * part of the array to ALPHA.
9492 *
9493  mn = max( 0, -ioffd )
9494  DO 20 j = 1, min( mn, n )
9495  DO 10 i = 1, m
9496  a( i, j ) = alpha
9497  10 CONTINUE
9498  20 CONTINUE
9499  DO 40 j = mn + 1, min( m - ioffd, n )
9500  jtmp = j + ioffd
9501  a( jtmp, j ) = beta
9502  DO 30 i = jtmp + 1, m
9503  a( i, j ) = alpha
9504  30 CONTINUE
9505  40 CONTINUE
9506 *
9507  ELSE IF( lsame( uplo, 'U' ) ) THEN
9508 *
9509 * Set the diagonal to BETA and the strictly upper triangular
9510 * part of the array to ALPHA.
9511 *
9512  mn = min( m - ioffd, n )
9513  DO 60 j = max( 0, -ioffd ) + 1, mn
9514  jtmp = j + ioffd
9515  DO 50 i = 1, jtmp - 1
9516  a( i, j ) = alpha
9517  50 CONTINUE
9518  a( jtmp, j ) = beta
9519  60 CONTINUE
9520  DO 80 j = max( 0, mn ) + 1, n
9521  DO 70 i = 1, m
9522  a( i, j ) = alpha
9523  70 CONTINUE
9524  80 CONTINUE
9525 *
9526  ELSE IF( lsame( uplo, 'D' ) ) THEN
9527 *
9528 * Set the array to BETA on the diagonal.
9529 *
9530  DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9531  a( j + ioffd, j ) = beta
9532  90 CONTINUE
9533 *
9534  ELSE
9535 *
9536 * Set the array to BETA on the diagonal and ALPHA on the
9537 * offdiagonal.
9538 *
9539  DO 110 j = 1, n
9540  DO 100 i = 1, m
9541  a( i, j ) = alpha
9542  100 CONTINUE
9543  110 CONTINUE
9544  IF( alpha.NE.beta .AND. ioffd.LT.m .AND. ioffd.GT.-n ) THEN
9545  DO 120 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9546  a( j + ioffd, j ) = beta
9547  120 CONTINUE
9548  END IF
9549 *
9550  END IF
9551 *
9552  RETURN
9553 *
9554 * End of PB_SLASET
9555 *
9556  END
9557  SUBROUTINE pb_slascal( UPLO, M, N, IOFFD, ALPHA, A, LDA )
9559 * -- PBLAS test routine (version 2.0) --
9560 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9561 * and University of California, Berkeley.
9562 * April 1, 1998
9563 *
9564 * .. Scalar Arguments ..
9565  CHARACTER*1 UPLO
9566  INTEGER IOFFD, LDA, M, N
9567  REAL ALPHA
9568 * ..
9569 * .. Array Arguments ..
9570  REAL A( LDA, * )
9571 * ..
9572 *
9573 * Purpose
9574 * =======
9575 *
9576 * PB_SLASCAL scales a two-dimensional array A by the scalar alpha.
9577 *
9578 * Arguments
9579 * =========
9580 *
9581 * UPLO (input) CHARACTER*1
9582 * On entry, UPLO specifies which trapezoidal part of the ar-
9583 * ray A is to be scaled as follows:
9584 * = 'L' or 'l': the lower trapezoid of A is scaled,
9585 * = 'U' or 'u': the upper trapezoid of A is scaled,
9586 * = 'D' or 'd': diagonal specified by IOFFD is scaled,
9587 * Otherwise: all of the array A is scaled.
9588 *
9589 * M (input) INTEGER
9590 * On entry, M specifies the number of rows of the array A. M
9591 * must be at least zero.
9592 *
9593 * N (input) INTEGER
9594 * On entry, N specifies the number of columns of the array A.
9595 * N must be at least zero.
9596 *
9597 * IOFFD (input) INTEGER
9598 * On entry, IOFFD specifies the position of the offdiagonal de-
9599 * limiting the upper and lower trapezoidal part of A as follows
9600 * (see the notes below):
9601 *
9602 * IOFFD = 0 specifies the main diagonal A( i, i ),
9603 * with i = 1 ... MIN( M, N ),
9604 * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9605 * with i = 1 ... MIN( M-IOFFD, N ),
9606 * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9607 * with i = 1 ... MIN( M, N+IOFFD ).
9608 *
9609 * ALPHA (input) REAL
9610 * On entry, ALPHA specifies the scalar alpha.
9611 *
9612 * A (input/output) REAL array
9613 * On entry, A is an array of dimension (LDA,N). Before entry
9614 * with UPLO = 'U' or 'u', the leading m by n part of the array
9615 * A must contain the upper trapezoidal part of the matrix as
9616 * specified by IOFFD to be scaled, and the strictly lower tra-
9617 * pezoidal part of A is not referenced; When UPLO = 'L' or 'l',
9618 * the leading m by n part of the array A must contain the lower
9619 * trapezoidal part of the matrix as specified by IOFFD to be
9620 * scaled, and the strictly upper trapezoidal part of A is not
9621 * referenced. On exit, the entries of the trapezoid part of A
9622 * determined by UPLO and IOFFD are scaled.
9623 *
9624 * LDA (input) INTEGER
9625 * On entry, LDA specifies the leading dimension of the array A.
9626 * LDA must be at least max( 1, M ).
9627 *
9628 * Notes
9629 * =====
9630 * N N
9631 * ---------------------------- -----------
9632 * | d | | |
9633 * M | d 'U' | | 'U' |
9634 * | 'L' 'D' | |d |
9635 * | d | M | d |
9636 * ---------------------------- | 'D' |
9637 * | d |
9638 * IOFFD < 0 | 'L' d |
9639 * | d|
9640 * N | |
9641 * ----------- -----------
9642 * | d 'U'|
9643 * | d | IOFFD > 0
9644 * M | 'D' |
9645 * | d| N
9646 * | 'L' | ----------------------------
9647 * | | | 'U' |
9648 * | | |d |
9649 * | | | 'D' |
9650 * | | | d |
9651 * | | |'L' d |
9652 * ----------- ----------------------------
9653 *
9654 * -- Written on April 1, 1998 by
9655 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9656 *
9657 * =====================================================================
9658 *
9659 * .. Local Scalars ..
9660  INTEGER I, J, JTMP, MN
9661 * ..
9662 * .. External Functions ..
9663  LOGICAL LSAME
9664  EXTERNAL LSAME
9665 * ..
9666 * .. Intrinsic Functions ..
9667  INTRINSIC MAX, MIN
9668 * ..
9669 * .. Executable Statements ..
9670 *
9671 * Quick return if possible
9672 *
9673  IF( M.LE.0 .OR. N.LE.0 )
9674  $ RETURN
9675 *
9676 * Start the operations
9677 *
9678  IF( LSAME( UPLO, 'L' ) ) THEN
9679 *
9680 * Scales the lower triangular part of the array by ALPHA.
9681 *
9682  MN = max( 0, -ioffd )
9683  DO 20 j = 1, min( mn, n )
9684  DO 10 i = 1, m
9685  a( i, j ) = alpha * a( i, j )
9686  10 CONTINUE
9687  20 CONTINUE
9688  DO 40 j = mn + 1, min( m - ioffd, n )
9689  DO 30 i = j + ioffd, m
9690  a( i, j ) = alpha * a( i, j )
9691  30 CONTINUE
9692  40 CONTINUE
9693 *
9694  ELSE IF( lsame( uplo, 'U' ) ) THEN
9695 *
9696 * Scales the upper triangular part of the array by ALPHA.
9697 *
9698  mn = min( m - ioffd, n )
9699  DO 60 j = max( 0, -ioffd ) + 1, mn
9700  DO 50 i = 1, j + ioffd
9701  a( i, j ) = alpha * a( i, j )
9702  50 CONTINUE
9703  60 CONTINUE
9704  DO 80 j = max( 0, mn ) + 1, n
9705  DO 70 i = 1, m
9706  a( i, j ) = alpha * a( i, j )
9707  70 CONTINUE
9708  80 CONTINUE
9709 *
9710  ELSE IF( lsame( uplo, 'D' ) ) THEN
9711 *
9712 * Scales the diagonal entries by ALPHA.
9713 *
9714  DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9715  jtmp = j + ioffd
9716  a( jtmp, j ) = alpha * a( jtmp, j )
9717  90 CONTINUE
9718 *
9719  ELSE
9720 *
9721 * Scales the entire array by ALPHA.
9722 *
9723  DO 110 j = 1, n
9724  DO 100 i = 1, m
9725  a( i, j ) = alpha * a( i, j )
9726  100 CONTINUE
9727  110 CONTINUE
9728 *
9729  END IF
9730 *
9731  RETURN
9732 *
9733 * End of PB_SLASCAL
9734 *
9735  END
9736  SUBROUTINE pb_slagen( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS,
9737  $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB,
9738  $ LNBLOC, JMP, IMULADD )
9740 * -- PBLAS test routine (version 2.0) --
9741 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9742 * and University of California, Berkeley.
9743 * April 1, 1998
9744 *
9745 * .. Scalar Arguments ..
9746  CHARACTER*1 UPLO, AFORM
9747  INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC,
9748  $ mb, mblks, nb, nblks
9749 * ..
9750 * .. Array Arguments ..
9751  INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * )
9752  REAL A( LDA, * )
9753 * ..
9754 *
9755 * Purpose
9756 * =======
9757 *
9758 * PB_SLAGEN locally initializes an array A.
9759 *
9760 * Arguments
9761 * =========
9762 *
9763 * UPLO (global input) CHARACTER*1
9764 * On entry, UPLO specifies whether the lower (UPLO='L') trape-
9765 * zoidal part or the upper (UPLO='U') trapezoidal part is to be
9766 * generated when the matrix to be generated is symmetric or
9767 * Hermitian. For all the other values of AFORM, the value of
9768 * this input argument is ignored.
9769 *
9770 * AFORM (global input) CHARACTER*1
9771 * On entry, AFORM specifies the type of submatrix to be genera-
9772 * ted as follows:
9773 * AFORM = 'S', sub( A ) is a symmetric matrix,
9774 * AFORM = 'H', sub( A ) is a Hermitian matrix,
9775 * AFORM = 'T', sub( A ) is overrwritten with the transpose
9776 * of what would normally be generated,
9777 * AFORM = 'C', sub( A ) is overwritten with the conjugate
9778 * transpose of what would normally be genera-
9779 * ted.
9780 * AFORM = 'N', a random submatrix is generated.
9781 *
9782 * A (local output) REAL array
9783 * On entry, A is an array of dimension (LLD_A, *). On exit,
9784 * this array contains the local entries of the randomly genera-
9785 * ted submatrix sub( A ).
9786 *
9787 * LDA (local input) INTEGER
9788 * On entry, LDA specifies the local leading dimension of the
9789 * array A. LDA must be at least one.
9790 *
9791 * LCMT00 (global input) INTEGER
9792 * On entry, LCMT00 is the LCM value specifying the off-diagonal
9793 * of the underlying matrix of interest. LCMT00=0 specifies the
9794 * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0
9795 * specifies superdiagonals.
9796 *
9797 * IRAN (local input) INTEGER array
9798 * On entry, IRAN is an array of dimension 2 containing respec-
9799 * tively the 16-lower and 16-higher bits of the encoding of the
9800 * entry of the random sequence corresponding locally to the
9801 * first local array entry to generate. Usually, this array is
9802 * computed by PB_SETLOCRAN.
9803 *
9804 * MBLKS (local input) INTEGER
9805 * On entry, MBLKS specifies the local number of blocks of rows.
9806 * MBLKS is at least zero.
9807 *
9808 * IMBLOC (local input) INTEGER
9809 * On entry, IMBLOC specifies the number of rows (size) of the
9810 * local uppest blocks. IMBLOC is at least zero.
9811 *
9812 * MB (global input) INTEGER
9813 * On entry, MB specifies the blocking factor used to partition
9814 * the rows of the matrix. MB must be at least one.
9815 *
9816 * LMBLOC (local input) INTEGER
9817 * On entry, LMBLOC specifies the number of rows (size) of the
9818 * local lowest blocks. LMBLOC is at least zero.
9819 *
9820 * NBLKS (local input) INTEGER
9821 * On entry, NBLKS specifies the local number of blocks of co-
9822 * lumns. NBLKS is at least zero.
9823 *
9824 * INBLOC (local input) INTEGER
9825 * On entry, INBLOC specifies the number of columns (size) of
9826 * the local leftmost blocks. INBLOC is at least zero.
9827 *
9828 * NB (global input) INTEGER
9829 * On entry, NB specifies the blocking factor used to partition
9830 * the the columns of the matrix. NB must be at least one.
9831 *
9832 * LNBLOC (local input) INTEGER
9833 * On entry, LNBLOC specifies the number of columns (size) of
9834 * the local rightmost blocks. LNBLOC is at least zero.
9835 *
9836 * JMP (local input) INTEGER array
9837 * On entry, JMP is an array of dimension JMP_LEN containing the
9838 * different jump values used by the random matrix generator.
9839 *
9840 * IMULADD (local input) INTEGER array
9841 * On entry, IMULADD is an array of dimension (4, JMP_LEN). The
9842 * jth column of this array contains the encoded initial cons-
9843 * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) )
9844 * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j)
9845 * contains respectively the 16-lower and 16-higher bits of the
9846 * constant a_j, and IMULADD(3:4,j) contains the 16-lower and
9847 * 16-higher bits of the constant c_j.
9848 *
9849 * -- Written on April 1, 1998 by
9850 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9851 *
9852 * =====================================================================
9853 *
9854 * .. Parameters ..
9855  INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
9856  $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
9857  $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
9858  PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
9859  $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
9860  $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
9861  $ jmp_nqnb = 10, jmp_nqinbloc = 11,
9862  $ jmp_len = 11 )
9863 * ..
9864 * .. Local Scalars ..
9865  INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK,
9866  $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP
9867  REAL DUMMY
9868 * ..
9869 * .. Local Arrays ..
9870  INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 )
9871 * ..
9872 * .. External Subroutines ..
9873  EXTERNAL PB_JUMPIT
9874 * ..
9875 * .. External Functions ..
9876  LOGICAL LSAME
9877  REAL PB_SRAND
9878  EXTERNAL LSAME, PB_SRAND
9879 * ..
9880 * .. Intrinsic Functions ..
9881  INTRINSIC max, min
9882 * ..
9883 * .. Executable Statements ..
9884 *
9885  DO 10 i = 1, 2
9886  ib1( i ) = iran( i )
9887  ib2( i ) = iran( i )
9888  ib3( i ) = iran( i )
9889  10 CONTINUE
9890 *
9891  IF( lsame( aform, 'N' ) ) THEN
9892 *
9893 * Generate random matrix
9894 *
9895  jj = 1
9896 *
9897  DO 50 jblk = 1, nblks
9898 *
9899  IF( jblk.EQ.1 ) THEN
9900  jb = inbloc
9901  ELSE IF( jblk.EQ.nblks ) THEN
9902  jb = lnbloc
9903  ELSE
9904  jb = nb
9905  END IF
9906 *
9907  DO 40 jk = jj, jj + jb - 1
9908 *
9909  ii = 1
9910 *
9911  DO 30 iblk = 1, mblks
9912 *
9913  IF( iblk.EQ.1 ) THEN
9914  ib = imbloc
9915  ELSE IF( iblk.EQ.mblks ) THEN
9916  ib = lmbloc
9917  ELSE
9918  ib = mb
9919  END IF
9920 *
9921 * Blocks are IB by JB
9922 *
9923  DO 20 ik = ii, ii + ib - 1
9924  a( ik, jk ) = pb_srand( 0 )
9925  20 CONTINUE
9926 *
9927  ii = ii + ib
9928 *
9929  IF( iblk.EQ.1 ) THEN
9930 *
9931 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
9932 *
9933  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
9934  $ ib0 )
9935 *
9936  ELSE
9937 *
9938 * Jump NPROW * MB rows
9939 *
9940  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1, ib0 )
9941 *
9942  END IF
9943 *
9944  ib1( 1 ) = ib0( 1 )
9945  ib1( 2 ) = ib0( 2 )
9946 *
9947  30 CONTINUE
9948 *
9949 * Jump one column
9950 *
9951  CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
9952 *
9953  ib1( 1 ) = ib0( 1 )
9954  ib1( 2 ) = ib0( 2 )
9955  ib2( 1 ) = ib0( 1 )
9956  ib2( 2 ) = ib0( 2 )
9957 *
9958  40 CONTINUE
9959 *
9960  jj = jj + jb
9961 *
9962  IF( jblk.EQ.1 ) THEN
9963 *
9964 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
9965 *
9966  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
9967 *
9968  ELSE
9969 *
9970 * Jump NPCOL * NB columns
9971 *
9972  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
9973 *
9974  END IF
9975 *
9976  ib1( 1 ) = ib0( 1 )
9977  ib1( 2 ) = ib0( 2 )
9978  ib2( 1 ) = ib0( 1 )
9979  ib2( 2 ) = ib0( 2 )
9980  ib3( 1 ) = ib0( 1 )
9981  ib3( 2 ) = ib0( 2 )
9982 *
9983  50 CONTINUE
9984 *
9985  ELSE IF( lsame( aform, 'T' ) .OR. lsame( aform, 'C' ) ) THEN
9986 *
9987 * Generate the transpose of the matrix that would be normally
9988 * generated.
9989 *
9990  ii = 1
9991 *
9992  DO 90 iblk = 1, mblks
9993 *
9994  IF( iblk.EQ.1 ) THEN
9995  ib = imbloc
9996  ELSE IF( iblk.EQ.mblks ) THEN
9997  ib = lmbloc
9998  ELSE
9999  ib = mb
10000  END IF
10001 *
10002  DO 80 ik = ii, ii + ib - 1
10003 *
10004  jj = 1
10005 *
10006  DO 70 jblk = 1, nblks
10007 *
10008  IF( jblk.EQ.1 ) THEN
10009  jb = inbloc
10010  ELSE IF( jblk.EQ.nblks ) THEN
10011  jb = lnbloc
10012  ELSE
10013  jb = nb
10014  END IF
10015 *
10016 * Blocks are IB by JB
10017 *
10018  DO 60 jk = jj, jj + jb - 1
10019  a( ik, jk ) = pb_srand( 0 )
10020  60 CONTINUE
10021 *
10022  jj = jj + jb
10023 *
10024  IF( jblk.EQ.1 ) THEN
10025 *
10026 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
10027 *
10028  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10029  $ ib0 )
10030 *
10031  ELSE
10032 *
10033 * Jump NPCOL * NB columns
10034 *
10035  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1, ib0 )
10036 *
10037  END IF
10038 *
10039  ib1( 1 ) = ib0( 1 )
10040  ib1( 2 ) = ib0( 2 )
10041 *
10042  70 CONTINUE
10043 *
10044 * Jump one row
10045 *
10046  CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10047 *
10048  ib1( 1 ) = ib0( 1 )
10049  ib1( 2 ) = ib0( 2 )
10050  ib2( 1 ) = ib0( 1 )
10051  ib2( 2 ) = ib0( 2 )
10052 *
10053  80 CONTINUE
10054 *
10055  ii = ii + ib
10056 *
10057  IF( iblk.EQ.1 ) THEN
10058 *
10059 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
10060 *
10061  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10062 *
10063  ELSE
10064 *
10065 * Jump NPROW * MB rows
10066 *
10067  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10068 *
10069  END IF
10070 *
10071  ib1( 1 ) = ib0( 1 )
10072  ib1( 2 ) = ib0( 2 )
10073  ib2( 1 ) = ib0( 1 )
10074  ib2( 2 ) = ib0( 2 )
10075  ib3( 1 ) = ib0( 1 )
10076  ib3( 2 ) = ib0( 2 )
10077 *
10078  90 CONTINUE
10079 *
10080  ELSE IF( ( lsame( aform, 'S' ) ).OR.( lsame( aform, 'H' ) ) ) THEN
10081 *
10082 * Generate a symmetric matrix
10083 *
10084  IF( lsame( uplo, 'L' ) ) THEN
10085 *
10086 * generate lower trapezoidal part
10087 *
10088  jj = 1
10089  lcmtc = lcmt00
10090 *
10091  DO 170 jblk = 1, nblks
10092 *
10093  IF( jblk.EQ.1 ) THEN
10094  jb = inbloc
10095  low = 1 - inbloc
10096  ELSE IF( jblk.EQ.nblks ) THEN
10097  jb = lnbloc
10098  low = 1 - nb
10099  ELSE
10100  jb = nb
10101  low = 1 - nb
10102  END IF
10103 *
10104  DO 160 jk = jj, jj + jb - 1
10105 *
10106  ii = 1
10107  lcmtr = lcmtc
10108 *
10109  DO 150 iblk = 1, mblks
10110 *
10111  IF( iblk.EQ.1 ) THEN
10112  ib = imbloc
10113  upp = imbloc - 1
10114  ELSE IF( iblk.EQ.mblks ) THEN
10115  ib = lmbloc
10116  upp = mb - 1
10117  ELSE
10118  ib = mb
10119  upp = mb - 1
10120  END IF
10121 *
10122 * Blocks are IB by JB
10123 *
10124  IF( lcmtr.GT.upp ) THEN
10125 *
10126  DO 100 ik = ii, ii + ib - 1
10127  dummy = pb_srand( 0 )
10128  100 CONTINUE
10129 *
10130  ELSE IF( lcmtr.GE.low ) THEN
10131 *
10132  jtmp = jk - jj + 1
10133  mnb = max( 0, -lcmtr )
10134 *
10135  IF( jtmp.LE.min( mnb, jb ) ) THEN
10136 *
10137  DO 110 ik = ii, ii + ib - 1
10138  a( ik, jk ) = pb_srand( 0 )
10139  110 CONTINUE
10140 *
10141  ELSE IF( ( jtmp.GE.( mnb + 1 ) ) .AND.
10142  $ ( jtmp.LE.min( ib-lcmtr, jb ) ) ) THEN
10143 *
10144  itmp = ii + jtmp + lcmtr - 1
10145 *
10146  DO 120 ik = ii, itmp - 1
10147  dummy = pb_srand( 0 )
10148  120 CONTINUE
10149 *
10150  DO 130 ik = itmp, ii + ib - 1
10151  a( ik, jk ) = pb_srand( 0 )
10152  130 CONTINUE
10153 *
10154  END IF
10155 *
10156  ELSE
10157 *
10158  DO 140 ik = ii, ii + ib - 1
10159  a( ik, jk ) = pb_srand( 0 )
10160  140 CONTINUE
10161 *
10162  END IF
10163 *
10164  ii = ii + ib
10165 *
10166  IF( iblk.EQ.1 ) THEN
10167 *
10168 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
10169 *
10170  lcmtr = lcmtr - jmp( jmp_npimbloc )
10171  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
10172  $ ib0 )
10173 *
10174  ELSE
10175 *
10176 * Jump NPROW * MB rows
10177 *
10178  lcmtr = lcmtr - jmp( jmp_npmb )
10179  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1,
10180  $ ib0 )
10181 *
10182  END IF
10183 *
10184  ib1( 1 ) = ib0( 1 )
10185  ib1( 2 ) = ib0( 2 )
10186 *
10187  150 CONTINUE
10188 *
10189 * Jump one column
10190 *
10191  CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
10192 *
10193  ib1( 1 ) = ib0( 1 )
10194  ib1( 2 ) = ib0( 2 )
10195  ib2( 1 ) = ib0( 1 )
10196  ib2( 2 ) = ib0( 2 )
10197 *
10198  160 CONTINUE
10199 *
10200  jj = jj + jb
10201 *
10202  IF( jblk.EQ.1 ) THEN
10203 *
10204 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
10205 *
10206  lcmtc = lcmtc + jmp( jmp_nqinbloc )
10207  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
10208 *
10209  ELSE
10210 *
10211 * Jump NPCOL * NB columns
10212 *
10213  lcmtc = lcmtc + jmp( jmp_nqnb )
10214  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
10215 *
10216  END IF
10217 *
10218  ib1( 1 ) = ib0( 1 )
10219  ib1( 2 ) = ib0( 2 )
10220  ib2( 1 ) = ib0( 1 )
10221  ib2( 2 ) = ib0( 2 )
10222  ib3( 1 ) = ib0( 1 )
10223  ib3( 2 ) = ib0( 2 )
10224 *
10225  170 CONTINUE
10226 *
10227  ELSE
10228 *
10229 * generate upper trapezoidal part
10230 *
10231  ii = 1
10232  lcmtr = lcmt00
10233 *
10234  DO 250 iblk = 1, mblks
10235 *
10236  IF( iblk.EQ.1 ) THEN
10237  ib = imbloc
10238  upp = imbloc - 1
10239  ELSE IF( iblk.EQ.mblks ) THEN
10240  ib = lmbloc
10241  upp = mb - 1
10242  ELSE
10243  ib = mb
10244  upp = mb - 1
10245  END IF
10246 *
10247  DO 240 ik = ii, ii + ib - 1
10248 *
10249  jj = 1
10250  lcmtc = lcmtr
10251 *
10252  DO 230 jblk = 1, nblks
10253 *
10254  IF( jblk.EQ.1 ) THEN
10255  jb = inbloc
10256  low = 1 - inbloc
10257  ELSE IF( jblk.EQ.nblks ) THEN
10258  jb = lnbloc
10259  low = 1 - nb
10260  ELSE
10261  jb = nb
10262  low = 1 - nb
10263  END IF
10264 *
10265 * Blocks are IB by JB
10266 *
10267  IF( lcmtc.LT.low ) THEN
10268 *
10269  DO 180 jk = jj, jj + jb - 1
10270  dummy = pb_srand( 0 )
10271  180 CONTINUE
10272 *
10273  ELSE IF( lcmtc.LE.upp ) THEN
10274 *
10275  itmp = ik - ii + 1
10276  mnb = max( 0, lcmtc )
10277 *
10278  IF( itmp.LE.min( mnb, ib ) ) THEN
10279 *
10280  DO 190 jk = jj, jj + jb - 1
10281  a( ik, jk ) = pb_srand( 0 )
10282  190 CONTINUE
10283 *
10284  ELSE IF( ( itmp.GE.( mnb + 1 ) ) .AND.
10285  $ ( itmp.LE.min( jb+lcmtc, ib ) ) ) THEN
10286 *
10287  jtmp = jj + itmp - lcmtc - 1
10288 *
10289  DO 200 jk = jj, jtmp - 1
10290  dummy = pb_srand( 0 )
10291  200 CONTINUE
10292 *
10293  DO 210 jk = jtmp, jj + jb - 1
10294  a( ik, jk ) = pb_srand( 0 )
10295  210 CONTINUE
10296 *
10297  END IF
10298 *
10299  ELSE
10300 *
10301  DO 220 jk = jj, jj + jb - 1
10302  a( ik, jk ) = pb_srand( 0 )
10303  220 CONTINUE
10304 *
10305  END IF
10306 *
10307  jj = jj + jb
10308 *
10309  IF( jblk.EQ.1 ) THEN
10310 *
10311 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
10312 *
10313  lcmtc = lcmtc + jmp( jmp_nqinbloc )
10314  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10315  $ ib0 )
10316 *
10317  ELSE
10318 *
10319 * Jump NPCOL * NB columns
10320 *
10321  lcmtc = lcmtc + jmp( jmp_nqnb )
10322  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
10323  $ ib0 )
10324 *
10325  END IF
10326 *
10327  ib1( 1 ) = ib0( 1 )
10328  ib1( 2 ) = ib0( 2 )
10329 *
10330  230 CONTINUE
10331 *
10332 * Jump one row
10333 *
10334  CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10335 *
10336  ib1( 1 ) = ib0( 1 )
10337  ib1( 2 ) = ib0( 2 )
10338  ib2( 1 ) = ib0( 1 )
10339  ib2( 2 ) = ib0( 2 )
10340 *
10341  240 CONTINUE
10342 *
10343  ii = ii + ib
10344 *
10345  IF( iblk.EQ.1 ) THEN
10346 *
10347 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
10348 *
10349  lcmtr = lcmtr - jmp( jmp_npimbloc )
10350  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10351 *
10352  ELSE
10353 *
10354 * Jump NPROW * MB rows
10355 *
10356  lcmtr = lcmtr - jmp( jmp_npmb )
10357  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10358 *
10359  END IF
10360 *
10361  ib1( 1 ) = ib0( 1 )
10362  ib1( 2 ) = ib0( 2 )
10363  ib2( 1 ) = ib0( 1 )
10364  ib2( 2 ) = ib0( 2 )
10365  ib3( 1 ) = ib0( 1 )
10366  ib3( 2 ) = ib0( 2 )
10367 *
10368  250 CONTINUE
10369 *
10370  END IF
10371 *
10372  END IF
10373 *
10374  RETURN
10375 *
10376 * End of PB_SLAGEN
10377 *
10378  END
10379  REAL FUNCTION PB_SRAND( IDUMM )
10381 * -- PBLAS test routine (version 2.0) --
10382 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10383 * and University of California, Berkeley.
10384 * April 1, 1998
10385 *
10386 * .. Scalar Arguments ..
10387  INTEGER idumm
10388 * ..
10389 *
10390 * Purpose
10391 * =======
10392 *
10393 * PB_SRAND generates the next number in the random sequence. This func-
10394 * tion ensures that this number will be in the interval ( -1.0, 1.0 ).
10395 *
10396 * Arguments
10397 * =========
10398 *
10399 * IDUMM (local input) INTEGER
10400 * This argument is ignored, but necessary to a FORTRAN 77 func-
10401 * tion.
10402 *
10403 * Further Details
10404 * ===============
10405 *
10406 * On entry, the array IRAND stored in the common block RANCOM contains
10407 * the information (2 integers) required to generate the next number in
10408 * the sequence X( n ). This number is computed as
10409 *
10410 * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10411 *
10412 * where the constant d is the largest 32 bit positive integer. The
10413 * array IRAND is then updated for the generation of the next number
10414 * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10415 * The constants a and c should have been preliminarily stored in the
10416 * array IACS as 2 pairs of integers. The initial set up of IRAND and
10417 * IACS is performed by the routine PB_SETRAN.
10418 *
10419 * -- Written on April 1, 1998 by
10420 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10421 *
10422 * =====================================================================
10423 *
10424 * .. Parameters ..
10425  REAL one, two
10426  PARAMETER ( one = 1.0e+0, two = 2.0e+0 )
10427 * ..
10428 * .. External Functions ..
10429  REAL pb_sran
10430  EXTERNAL pb_sran
10431 * ..
10432 * .. Executable Statements ..
10433 *
10434  pb_srand = one - two * pb_sran( idumm )
10435 *
10436  RETURN
10437 *
10438 * End of PB_SRAND
10439 *
10440  END
10441  REAL function pb_sran( idumm )
10443 * -- PBLAS test routine (version 2.0) --
10444 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10445 * and University of California, Berkeley.
10446 * April 1, 1998
10447 *
10448 * .. Scalar Arguments ..
10449  INTEGER idumm
10450 * ..
10451 *
10452 * Purpose
10453 * =======
10454 *
10455 * PB_SRAN generates the next number in the random sequence.
10456 *
10457 * Arguments
10458 * =========
10459 *
10460 * IDUMM (local input) INTEGER
10461 * This argument is ignored, but necessary to a FORTRAN 77 func-
10462 * tion.
10463 *
10464 * Further Details
10465 * ===============
10466 *
10467 * On entry, the array IRAND stored in the common block RANCOM contains
10468 * the information (2 integers) required to generate the next number in
10469 * the sequence X( n ). This number is computed as
10470 *
10471 * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10472 *
10473 * where the constant d is the largest 32 bit positive integer. The
10474 * array IRAND is then updated for the generation of the next number
10475 * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10476 * The constants a and c should have been preliminarily stored in the
10477 * array IACS as 2 pairs of integers. The initial set up of IRAND and
10478 * IACS is performed by the routine PB_SETRAN.
10479 *
10480 * -- Written on April 1, 1998 by
10481 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10482 *
10483 * =====================================================================
10484 *
10485 * .. Parameters ..
10486  REAL divfac, pow16
10487  PARAMETER ( divfac = 2.147483648e+9,
10488  $ pow16 = 6.5536e+4 )
10489 * ..
10490 * .. Local Arrays ..
10491  INTEGER j( 2 )
10492 * ..
10493 * .. External Subroutines ..
10494  EXTERNAL pb_ladd, pb_lmul
10495 * ..
10496 * .. Intrinsic Functions ..
10497  INTRINSIC real
10498 * ..
10499 * .. Common Blocks ..
10500  INTEGER iacs( 4 ), irand( 2 )
10501  common /rancom/ irand, iacs
10502 * ..
10503 * .. Save Statements ..
10504  SAVE /rancom/
10505 * ..
10506 * .. Executable Statements ..
10507 *
10508  pb_sran = ( real( irand( 1 ) ) + pow16 * real( irand( 2 ) ) ) /
10509  $ divfac
10510 *
10511  CALL pb_lmul( irand, iacs, j )
10512  CALL pb_ladd( j, iacs( 3 ), irand )
10513 *
10514  RETURN
10515 *
10516 * End of PB_SRAN
10517 *
10518  END
pb_ladd
subroutine pb_ladd(J, K, I)
Definition: pblastst.f:4480
pslamch
real function pslamch(ICTXT, CMACH)
Definition: pcblastst.f:7455
psmprnt
subroutine psmprnt(ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT, CMATNM)
Definition: psblastst.f:3949
psmvch
subroutine psmvch(ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, DESCY, INCY, G, ERR, INFO)
Definition: psblastst.f:4157
max
#define max(A, B)
Definition: pcgemr.c:180
pssetpblas
subroutine pssetpblas(ICTXT)
Definition: psblastst.f:1478
pschkdim
subroutine pschkdim(ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, ARGPOS)
Definition: psblastst.f:759
pb_setlocran
subroutine pb_setlocran(SEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, MYRDIST, MYCDIST, NPROW, NPCOL, JMP, IMULADD, IRAN)
Definition: pblastst.f:4302
pb_setran
subroutine pb_setran(IRAN, IAC)
Definition: pblastst.f:4759
pb_pslaprn2
subroutine pb_pslaprn2(M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, NOUT, PROW, PCOL, WORK)
Definition: psblastst.f:8850
pb_descset2
subroutine pb_descset2(DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, CTXT, LLD)
Definition: pblastst.f:3172
pb_sfillpad
subroutine pb_sfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psblastst.f:9081
pserrset
subroutine pserrset(ERR, ERRMAX, XTRUE, X)
Definition: psblastst.f:2456
pb_slaset
subroutine pb_slaset(UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA)
Definition: psblastst.f:9361
pb_lmul
subroutine pb_lmul(K, J, I)
Definition: pblastst.f:4559
pschkvout
subroutine pschkvout(N, X, PX, IX, JX, DESCX, INCX, INFO)
Definition: psblastst.f:2870
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pschkmat
subroutine pschkmat(ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, ARGPOS)
Definition: psblastst.f:1674
psdimee
subroutine psdimee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: psblastst.f:455
psladom
subroutine psladom(INPLACE, N, ALPHA, A, IA, JA, DESCA)
Definition: psblastst.f:8244
pslagen
subroutine pslagen(INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, DESCA, IASEED, A, LDA)
Definition: psblastst.f:7846
pb_desctrans
subroutine pb_desctrans(DESCIN, DESCOUT)
Definition: pblastst.f:2964
slamch
real function slamch(CMACH)
Definition: tools.f:867
pb_slagen
subroutine pb_slagen(UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, LNBLOC, JMP, IMULADD)
Definition: psblastst.f:9739
psmmch1
subroutine psmmch1(ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, CT, G, ERR, INFO)
Definition: psblastst.f:5649
psoptee
subroutine psoptee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: psblastst.f:2
psmmch
subroutine psmmch(ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, DESCC, CT, G, ERR, INFO)
Definition: psblastst.f:5272
psvmch2
subroutine psvmch2(ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, G, ERR, INFO)
Definition: psblastst.f:4919
pb_jumpit
subroutine pb_jumpit(MULADD, IRANN, IRANM)
Definition: pblastst.f:4822
pschkvin
subroutine pschkvin(ERRMAX, N, X, PX, IX, JX, DESCX, INCX, INFO)
Definition: psblastst.f:2576
pb_infog2l
subroutine pb_infog2l(I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, JJ, PROW, PCOL)
Definition: pblastst.f:1673
pschkopt
subroutine pschkopt(ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, ARGPOS)
Definition: psblastst.f:266
pb_sran
real function pb_sran(IDUMM)
Definition: pcblastst.f:11552
pb_initmuladd
subroutine pb_initmuladd(MULADD0, JMP, IMULADD)
Definition: pblastst.f:4196
pb_ainfog2l
subroutine pb_ainfog2l(M, N, I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, IMB1, INB1, MP, NQ, II, JJ, PROW, PCOL, RPROW, RPCOL)
Definition: pblastst.f:2023
pb_initjmp
subroutine pb_initjmp(COLMAJ, NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, MB, NB, RSRC, CSRC, NPROW, NPCOL, STRIDE, JMP)
Definition: pblastst.f:4045
psvecee
subroutine psvecee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: psblastst.f:936
pb_schekpad
subroutine pb_schekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psblastst.f:9194
pchkpbe
subroutine pchkpbe(ICTXT, NOUT, SNAME, INFOT)
Definition: pblastst.f:1084
psvmch
subroutine psvmch(ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, G, ERR, INFO)
Definition: psblastst.f:4570
psmmch3
subroutine psmmch3(UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, ERR, INFO)
Definition: psblastst.f:6372
pslascal
subroutine pslascal(TYPE, M, N, ALPHA, A, IA, JA, DESCA)
Definition: psblastst.f:7338
psvprnt
subroutine psvprnt(ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT, CVECNM)
Definition: psblastst.f:4056
pb_jump
subroutine pb_jump(K, MULADD, IRANN, IRANM, IMA)
Definition: pblastst.f:4648
psmatee
subroutine psmatee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: psblastst.f:1190
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pb_binfo
subroutine pb_binfo(OFFD, M, N, IMB1, INB1, MB, NB, MRROW, MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP)
Definition: pblastst.f:3577
pscallsub
subroutine pscallsub(SUBPTR, SCODE)
Definition: psblastst.f:2180
pb_chkmat
subroutine pb_chkmat(ICTXT, M, MPOS0, N, NPOS0, IA, JA, DESCA, DPOS0, INFO)
Definition: pblastst.f:2742
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863
pschkmout
subroutine pschkmout(M, N, A, PA, IA, JA, DESCA, INFO)
Definition: psblastst.f:3627
psmmch2
subroutine psmmch2(ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, DESCC, CT, G, ERR, INFO)
Definition: psblastst.f:5996
pschkmin
subroutine pschkmin(ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO)
Definition: psblastst.f:3326
pb_slascal
subroutine pb_slascal(UPLO, M, N, IOFFD, ALPHA, A, LDA)
Definition: psblastst.f:9558
pb_pslaprnt
subroutine pb_pslaprnt(M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, NOUT, WORK)
Definition: psblastst.f:8636
pserraxpby
subroutine pserraxpby(ERRBND, ALPHA, X, BETA, Y, PREC)
Definition: psblastst.f:6684
pb_locinfo
subroutine pb_locinfo(I, INB, NB, MYROC, SRCPROC, NPROCS, ILOCBLK, ILOCOFF, MYDIST)
Definition: pblastst.f:3910
min
#define min(A, B)
Definition: pcgemr.c:181