SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
psblastst.f
Go to the documentation of this file.
1 SUBROUTINE psoptee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
2*
3* -- PBLAS test routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* .. Scalar Arguments ..
9 INTEGER ICTXT, NOUT, SCODE
10* ..
11* .. Array Arguments ..
12 CHARACTER*(*) SNAME
13* ..
14* .. Subroutine Arguments ..
15 EXTERNAL subptr
16* ..
17*
18* Purpose
19* =======
20*
21* PSOPTEE tests whether the PBLAS respond correctly to a bad option
22* argument.
23*
24* Notes
25* =====
26*
27* A description vector is associated with each 2D block-cyclicly dis-
28* tributed matrix. This vector stores the information required to
29* establish the mapping between a matrix entry and its corresponding
30* process and memory location.
31*
32* In the following comments, the character _ should be read as
33* "of the distributed matrix". Let A be a generic term for any 2D
34* block cyclicly distributed matrix. Its description vector is DESCA:
35*
36* NOTATION STORED IN EXPLANATION
37* ---------------- --------------- ------------------------------------
38* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
39* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
40* the NPROW x NPCOL BLACS process grid
41* A is distributed over. The context
42* itself is global, but the handle
43* (the integer value) may vary.
44* M_A (global) DESCA( M_ ) The number of rows in the distribu-
45* ted matrix A, M_A >= 0.
46* N_A (global) DESCA( N_ ) The number of columns in the distri-
47* buted matrix A, N_A >= 0.
48* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
49* block of the matrix A, IMB_A > 0.
50* INB_A (global) DESCA( INB_ ) The number of columns of the upper
51* left block of the matrix A,
52* INB_A > 0.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
54* bute the last M_A-IMB_A rows of A,
55* MB_A > 0.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
57* bute the last N_A-INB_A columns of
58* A, NB_A > 0.
59* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60* row of the matrix A is distributed,
61* NPROW > RSRC_A >= 0.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of A is distributed.
64* NPCOL > CSRC_A >= 0.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array storing the local blocks of
67* the distributed matrix A,
68* IF( Lc( 1, N_A ) > 0 )
69* LLD_A >= MAX( 1, Lr( 1, M_A ) )
70* ELSE
71* LLD_A >= 1.
72*
73* Let K be the number of rows of a matrix A starting at the global in-
74* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
75* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
76* receive if these K rows were distributed over NPROW processes. If K
77* is the number of columns of a matrix A starting at the global index
78* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
79* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
80* these K columns were distributed over NPCOL processes.
81*
82* The values of Lr() and Lc() may be determined via a call to the func-
83* tion PB_NUMROC:
84* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
85* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
86*
87* Arguments
88* =========
89*
90* ICTXT (local input) INTEGER
91* On entry, ICTXT specifies the BLACS context handle, indica-
92* ting the global context of the operation. The context itself
93* is global, but the value of ICTXT is local.
94*
95* NOUT (global input) INTEGER
96* On entry, NOUT specifies the unit number for the output file.
97* When NOUT is 6, output to screen, when NOUT is 0, output to
98* stderr. NOUT is only defined for process 0.
99*
100* SUBPTR (global input) SUBROUTINE
101* On entry, SUBPTR is a subroutine. SUBPTR must be declared
102* EXTERNAL in the calling subroutine.
103*
104* SCODE (global input) INTEGER
105* On entry, SCODE specifies the calling sequence code.
106*
107* SNAME (global input) CHARACTER*(*)
108* On entry, SNAME specifies the subroutine name calling this
109* subprogram.
110*
111* Calling sequence encodings
112* ==========================
113*
114* code Formal argument list Examples
115*
116* 11 (n, v1,v2) _SWAP, _COPY
117* 12 (n,s1, v1 ) _SCAL, _SCAL
118* 13 (n,s1, v1,v2) _AXPY, _DOT_
119* 14 (n,s1,i1,v1 ) _AMAX
120* 15 (n,u1, v1 ) _ASUM, _NRM2
121*
122* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
123* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
124* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
125* 24 ( m,n,s1,v1,v2,m1) _GER_
126* 25 (uplo, n,s1,v1, m1) _SYR
127* 26 (uplo, n,u1,v1, m1) _HER
128* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
129*
130* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
131* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
132* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
133* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
134* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
135* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
136* 37 ( m,n, s1,m1, s2,m3) _TRAN_
137* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
138* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
139* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
140*
141* -- Written on April 1, 1998 by
142* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
143*
144* =====================================================================
145*
146* .. Local Scalars ..
147 INTEGER APOS
148* ..
149* .. External Subroutines ..
150 EXTERNAL pschkopt
151* ..
152* .. Executable Statements ..
153*
154* Level 2 PBLAS
155*
156 IF( scode.EQ.21 ) THEN
157*
158* Check 1st (and only) option
159*
160 apos = 1
161 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
162*
163 ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
164 $ scode.EQ.27 ) THEN
165*
166* Check 1st (and only) option
167*
168 apos = 1
169 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
170*
171 ELSE IF( scode.EQ.23 ) THEN
172*
173* Check 1st option
174*
175 apos = 1
176 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
177*
178* Check 2nd option
179*
180 apos = 2
181 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
182*
183* Check 3rd option
184*
185 apos = 3
186 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
187*
188* Level 3 PBLAS
189*
190 ELSE IF( scode.EQ.31 ) THEN
191*
192* Check 1st option
193*
194 apos = 1
195 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
196*
197* Check 2'nd option
198*
199 apos = 2
200 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'B', apos )
201*
202 ELSE IF( scode.EQ.32 ) THEN
203*
204* Check 1st option
205*
206 apos = 1
207 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
208*
209* Check 2nd option
210*
211 apos = 2
212 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
213*
214 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
215 $ scode.EQ.36 .OR. scode.EQ.40 ) THEN
216*
217* Check 1st option
218*
219 apos = 1
220 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
221*
222* Check 2'nd option
223*
224 apos = 2
225 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
226*
227 ELSE IF( scode.EQ.38 ) THEN
228*
229* Check 1st option
230*
231 apos = 1
232 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
233*
234* Check 2nd option
235*
236 apos = 2
237 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
238*
239* Check 3rd option
240*
241 apos = 3
242 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
243*
244* Check 4th option
245*
246 apos = 4
247 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
248*
249*
250 ELSE IF( scode.EQ.39 ) THEN
251*
252* Check 1st option
253*
254 apos = 1
255 CALL pschkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
256*
257 END IF
258*
259 RETURN
260*
261* End of PSOPTEE
262*
263 END
264 SUBROUTINE pschkopt( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
265 $ ARGPOS )
266*
267* -- PBLAS test routine (version 2.0) --
268* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
269* and University of California, Berkeley.
270* April 1, 1998
271*
272* .. Scalar Arguments ..
273 CHARACTER*1 ARGNAM
274 INTEGER ARGPOS, ICTXT, NOUT, SCODE
275* ..
276* .. Array Arguments ..
277 CHARACTER*(*) SNAME
278* ..
279* .. Subroutine Arguments ..
280 EXTERNAL subptr
281* ..
282*
283* Purpose
284* =======
285*
286* PSCHKOPT tests the option ARGNAM in any PBLAS routine.
287*
288* Notes
289* =====
290*
291* A description vector is associated with each 2D block-cyclicly dis-
292* tributed matrix. This vector stores the information required to
293* establish the mapping between a matrix entry and its corresponding
294* process and memory location.
295*
296* In the following comments, the character _ should be read as
297* "of the distributed matrix". Let A be a generic term for any 2D
298* block cyclicly distributed matrix. Its description vector is DESCA:
299*
300* NOTATION STORED IN EXPLANATION
301* ---------------- --------------- ------------------------------------
302* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
303* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
304* the NPROW x NPCOL BLACS process grid
305* A is distributed over. The context
306* itself is global, but the handle
307* (the integer value) may vary.
308* M_A (global) DESCA( M_ ) The number of rows in the distribu-
309* ted matrix A, M_A >= 0.
310* N_A (global) DESCA( N_ ) The number of columns in the distri-
311* buted matrix A, N_A >= 0.
312* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
313* block of the matrix A, IMB_A > 0.
314* INB_A (global) DESCA( INB_ ) The number of columns of the upper
315* left block of the matrix A,
316* INB_A > 0.
317* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
318* bute the last M_A-IMB_A rows of A,
319* MB_A > 0.
320* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
321* bute the last N_A-INB_A columns of
322* A, NB_A > 0.
323* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
324* row of the matrix A is distributed,
325* NPROW > RSRC_A >= 0.
326* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
327* first column of A is distributed.
328* NPCOL > CSRC_A >= 0.
329* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
330* array storing the local blocks of
331* the distributed matrix A,
332* IF( Lc( 1, N_A ) > 0 )
333* LLD_A >= MAX( 1, Lr( 1, M_A ) )
334* ELSE
335* LLD_A >= 1.
336*
337* Let K be the number of rows of a matrix A starting at the global in-
338* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
339* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
340* receive if these K rows were distributed over NPROW processes. If K
341* is the number of columns of a matrix A starting at the global index
342* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
343* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
344* these K columns were distributed over NPCOL processes.
345*
346* The values of Lr() and Lc() may be determined via a call to the func-
347* tion PB_NUMROC:
348* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
349* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
350*
351* Arguments
352* =========
353*
354* ICTXT (local input) INTEGER
355* On entry, ICTXT specifies the BLACS context handle, indica-
356* ting the global context of the operation. The context itself
357* is global, but the value of ICTXT is local.
358*
359* NOUT (global input) INTEGER
360* On entry, NOUT specifies the unit number for the output file.
361* When NOUT is 6, output to screen, when NOUT is 0, output to
362* stderr. NOUT is only defined for process 0.
363*
364* SUBPTR (global input) SUBROUTINE
365* On entry, SUBPTR is a subroutine. SUBPTR must be declared
366* EXTERNAL in the calling subroutine.
367*
368* SCODE (global input) INTEGER
369* On entry, SCODE specifies the calling sequence code.
370*
371* SNAME (global input) CHARACTER*(*)
372* On entry, SNAME specifies the subroutine name calling this
373* subprogram.
374*
375* ARGNAM (global input) CHARACTER*(*)
376* On entry, ARGNAM specifies the name of the option to be
377* checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'.
378*
379* ARGPOS (global input) INTEGER
380* On entry, ARGPOS indicates the position of the option ARGNAM
381* to be tested.
382*
383* -- Written on April 1, 1998 by
384* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
385*
386* =====================================================================
387*
388* .. Local Scalars ..
389 INTEGER INFOT
390* ..
391* .. External Subroutines ..
392 EXTERNAL pchkpbe, pscallsub, pssetpblas
393* ..
394* .. External Functions ..
395 LOGICAL LSAME
396 EXTERNAL lsame
397* ..
398* .. Common Blocks ..
399 CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO
400 COMMON /pblasc/diag, side, transa, transb, uplo
401* ..
402* .. Executable Statements ..
403*
404* Reiniatilize the dummy arguments to correct values
405*
406 CALL pssetpblas( ictxt )
407*
408 IF( lsame( argnam, 'D' ) ) THEN
409*
410* Generate bad DIAG option
411*
412 diag = '/'
413*
414 ELSE IF( lsame( argnam, 'S' ) ) THEN
415*
416* Generate bad SIDE option
417*
418 side = '/'
419*
420 ELSE IF( lsame( argnam, 'A' ) ) THEN
421*
422* Generate bad TRANSA option
423*
424 transa = '/'
425*
426 ELSE IF( lsame( argnam, 'B' ) ) THEN
427*
428* Generate bad TRANSB option
429*
430 transb = '/'
431*
432 ELSE IF( lsame( argnam, 'U' ) ) THEN
433*
434* Generate bad UPLO option
435*
436 uplo = '/'
437*
438 END IF
439*
440* Set INFOT to the position of the bad dimension argument
441*
442 infot = argpos
443*
444* Call the PBLAS routine
445*
446 CALL pscallsub( subptr, scode )
447 CALL pchkpbe( ictxt, nout, sname, infot )
448*
449 RETURN
450*
451* End of PSCHKOPT
452*
453 END
454 SUBROUTINE psdimee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
455*
456* -- PBLAS test routine (version 2.0) --
457* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
458* and University of California, Berkeley.
459* April 1, 1998
460*
461* .. Scalar Arguments ..
462 INTEGER ICTXT, NOUT, SCODE
463* ..
464* .. Array Arguments ..
465 CHARACTER*(*) SNAME
466* ..
467* .. Subroutine Arguments ..
468 EXTERNAL subptr
469* ..
470*
471* Purpose
472* =======
473*
474* PSDIMEE tests whether the PBLAS respond correctly to a bad dimension
475* argument.
476*
477* Notes
478* =====
479*
480* A description vector is associated with each 2D block-cyclicly dis-
481* tributed matrix. This vector stores the information required to
482* establish the mapping between a matrix entry and its corresponding
483* process and memory location.
484*
485* In the following comments, the character _ should be read as
486* "of the distributed matrix". Let A be a generic term for any 2D
487* block cyclicly distributed matrix. Its description vector is DESCA:
488*
489* NOTATION STORED IN EXPLANATION
490* ---------------- --------------- ------------------------------------
491* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
492* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
493* the NPROW x NPCOL BLACS process grid
494* A is distributed over. The context
495* itself is global, but the handle
496* (the integer value) may vary.
497* M_A (global) DESCA( M_ ) The number of rows in the distribu-
498* ted matrix A, M_A >= 0.
499* N_A (global) DESCA( N_ ) The number of columns in the distri-
500* buted matrix A, N_A >= 0.
501* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
502* block of the matrix A, IMB_A > 0.
503* INB_A (global) DESCA( INB_ ) The number of columns of the upper
504* left block of the matrix A,
505* INB_A > 0.
506* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
507* bute the last M_A-IMB_A rows of A,
508* MB_A > 0.
509* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
510* bute the last N_A-INB_A columns of
511* A, NB_A > 0.
512* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
513* row of the matrix A is distributed,
514* NPROW > RSRC_A >= 0.
515* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
516* first column of A is distributed.
517* NPCOL > CSRC_A >= 0.
518* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
519* array storing the local blocks of
520* the distributed matrix A,
521* IF( Lc( 1, N_A ) > 0 )
522* LLD_A >= MAX( 1, Lr( 1, M_A ) )
523* ELSE
524* LLD_A >= 1.
525*
526* Let K be the number of rows of a matrix A starting at the global in-
527* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
528* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
529* receive if these K rows were distributed over NPROW processes. If K
530* is the number of columns of a matrix A starting at the global index
531* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
532* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
533* these K columns were distributed over NPCOL processes.
534*
535* The values of Lr() and Lc() may be determined via a call to the func-
536* tion PB_NUMROC:
537* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
538* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
539*
540* Arguments
541* =========
542*
543* ICTXT (local input) INTEGER
544* On entry, ICTXT specifies the BLACS context handle, indica-
545* ting the global context of the operation. The context itself
546* is global, but the value of ICTXT is local.
547*
548* NOUT (global input) INTEGER
549* On entry, NOUT specifies the unit number for the output file.
550* When NOUT is 6, output to screen, when NOUT is 0, output to
551* stderr. NOUT is only defined for process 0.
552*
553* SUBPTR (global input) SUBROUTINE
554* On entry, SUBPTR is a subroutine. SUBPTR must be declared
555* EXTERNAL in the calling subroutine.
556*
557* SCODE (global input) INTEGER
558* On entry, SCODE specifies the calling sequence code.
559*
560* SNAME (global input) CHARACTER*(*)
561* On entry, SNAME specifies the subroutine name calling this
562* subprogram.
563*
564* Calling sequence encodings
565* ==========================
566*
567* code Formal argument list Examples
568*
569* 11 (n, v1,v2) _SWAP, _COPY
570* 12 (n,s1, v1 ) _SCAL, _SCAL
571* 13 (n,s1, v1,v2) _AXPY, _DOT_
572* 14 (n,s1,i1,v1 ) _AMAX
573* 15 (n,u1, v1 ) _ASUM, _NRM2
574*
575* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
576* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
577* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
578* 24 ( m,n,s1,v1,v2,m1) _GER_
579* 25 (uplo, n,s1,v1, m1) _SYR
580* 26 (uplo, n,u1,v1, m1) _HER
581* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
582*
583* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
584* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
585* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
586* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
587* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
588* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
589* 37 ( m,n, s1,m1, s2,m3) _TRAN_
590* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
591* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
592* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
593*
594* -- Written on April 1, 1998 by
595* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
596*
597* =====================================================================
598*
599* .. Local Scalars ..
600 INTEGER APOS
601* ..
602* .. External Subroutines ..
603 EXTERNAL pschkdim
604* ..
605* .. Executable Statements ..
606*
607* Level 1 PBLAS
608*
609 IF( scode.EQ.11 .OR. scode.EQ.12 .OR. scode.EQ.13 .OR.
610 $ scode.EQ.14 .OR. scode.EQ.15 ) THEN
611*
612* Check 1st (and only) dimension
613*
614 apos = 1
615 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
616*
617* Level 2 PBLAS
618*
619 ELSE IF( scode.EQ.21 ) THEN
620*
621* Check 1st dimension
622*
623 apos = 2
624 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
625*
626* Check 2nd dimension
627*
628 apos = 3
629 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
630*
631 ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
632 $ scode.EQ.27 ) THEN
633*
634* Check 1st (and only) dimension
635*
636 apos = 2
637 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
638*
639 ELSE IF( scode.EQ.23 ) THEN
640*
641* Check 1st (and only) dimension
642*
643 apos = 4
644 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
645*
646 ELSE IF( scode.EQ.24 ) THEN
647*
648* Check 1st dimension
649*
650 apos = 1
651 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
652*
653* Check 2nd dimension
654*
655 apos = 2
656 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
657*
658* Level 3 PBLAS
659*
660 ELSE IF( scode.EQ.31 ) THEN
661*
662* Check 1st dimension
663*
664 apos = 3
665 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
666*
667* Check 2nd dimension
668*
669 apos = 4
670 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
671*
672* Check 3rd dimension
673*
674 apos = 5
675 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
676*
677 ELSE IF( scode.EQ.32 ) THEN
678*
679* Check 1st dimension
680*
681 apos = 3
682 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
683*
684* Check 2nd dimension
685*
686 apos = 4
687 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
688*
689 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
690 $ scode.EQ.36 ) THEN
691*
692* Check 1st dimension
693*
694 apos = 3
695 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
696*
697* Check 2nd dimension
698*
699 apos = 4
700 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
701*
702 ELSE IF( scode.EQ.37 ) THEN
703*
704* Check 1st dimension
705*
706 apos = 1
707 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
708*
709* Check 2nd dimension
710*
711 apos = 2
712 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
713*
714 ELSE IF( scode.EQ.38 ) THEN
715*
716* Check 1st dimension
717*
718 apos = 5
719 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
720*
721* Check 2nd dimension
722*
723 apos = 6
724 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
725*
726 ELSE IF( scode.EQ.39 ) THEN
727*
728* Check 1st dimension
729*
730 apos = 2
731 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
732*
733* Check 2nd dimension
734*
735 apos = 3
736 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
737*
738 ELSE IF( scode.EQ.40 ) THEN
739*
740* Check 1st dimension
741*
742 apos = 3
743 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
744*
745* Check 2nd dimension
746*
747 apos = 4
748 CALL pschkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
749*
750 END IF
751*
752 RETURN
753*
754* End of PSDIMEE
755*
756 END
757 SUBROUTINE pschkdim( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
758 $ ARGPOS )
759*
760* -- PBLAS test routine (version 2.0) --
761* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
762* and University of California, Berkeley.
763* April 1, 1998
764*
765* .. Scalar Arguments ..
766 CHARACTER*1 ARGNAM
767 INTEGER ARGPOS, ICTXT, NOUT, SCODE
768* ..
769* .. Array Arguments ..
770 CHARACTER*(*) SNAME
771* ..
772* .. Subroutine Arguments ..
773 EXTERNAL subptr
774* ..
775*
776* Purpose
777* =======
778*
779* PSCHKDIM tests the dimension ARGNAM in any PBLAS routine.
780*
781* Notes
782* =====
783*
784* A description vector is associated with each 2D block-cyclicly dis-
785* tributed matrix. This vector stores the information required to
786* establish the mapping between a matrix entry and its corresponding
787* process and memory location.
788*
789* In the following comments, the character _ should be read as
790* "of the distributed matrix". Let A be a generic term for any 2D
791* block cyclicly distributed matrix. Its description vector is DESCA:
792*
793* NOTATION STORED IN EXPLANATION
794* ---------------- --------------- ------------------------------------
795* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
796* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
797* the NPROW x NPCOL BLACS process grid
798* A is distributed over. The context
799* itself is global, but the handle
800* (the integer value) may vary.
801* M_A (global) DESCA( M_ ) The number of rows in the distribu-
802* ted matrix A, M_A >= 0.
803* N_A (global) DESCA( N_ ) The number of columns in the distri-
804* buted matrix A, N_A >= 0.
805* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
806* block of the matrix A, IMB_A > 0.
807* INB_A (global) DESCA( INB_ ) The number of columns of the upper
808* left block of the matrix A,
809* INB_A > 0.
810* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
811* bute the last M_A-IMB_A rows of A,
812* MB_A > 0.
813* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
814* bute the last N_A-INB_A columns of
815* A, NB_A > 0.
816* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
817* row of the matrix A is distributed,
818* NPROW > RSRC_A >= 0.
819* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
820* first column of A is distributed.
821* NPCOL > CSRC_A >= 0.
822* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
823* array storing the local blocks of
824* the distributed matrix A,
825* IF( Lc( 1, N_A ) > 0 )
826* LLD_A >= MAX( 1, Lr( 1, M_A ) )
827* ELSE
828* LLD_A >= 1.
829*
830* Let K be the number of rows of a matrix A starting at the global in-
831* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
832* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
833* receive if these K rows were distributed over NPROW processes. If K
834* is the number of columns of a matrix A starting at the global index
835* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
836* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
837* these K columns were distributed over NPCOL processes.
838*
839* The values of Lr() and Lc() may be determined via a call to the func-
840* tion PB_NUMROC:
841* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
842* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
843*
844* Arguments
845* =========
846*
847* ICTXT (local input) INTEGER
848* On entry, ICTXT specifies the BLACS context handle, indica-
849* ting the global context of the operation. The context itself
850* is global, but the value of ICTXT is local.
851*
852* NOUT (global input) INTEGER
853* On entry, NOUT specifies the unit number for the output file.
854* When NOUT is 6, output to screen, when NOUT is 0, output to
855* stderr. NOUT is only defined for process 0.
856*
857* SUBPTR (global input) SUBROUTINE
858* On entry, SUBPTR is a subroutine. SUBPTR must be declared
859* EXTERNAL in the calling subroutine.
860*
861* SCODE (global input) INTEGER
862* On entry, SCODE specifies the calling sequence code.
863*
864* SNAME (global input) CHARACTER*(*)
865* On entry, SNAME specifies the subroutine name calling this
866* subprogram.
867*
868* ARGNAM (global input) CHARACTER*(*)
869* On entry, ARGNAM specifies the name of the dimension to be
870* checked. ARGNAM can either be 'M', 'N' or 'K'.
871*
872* ARGPOS (global input) INTEGER
873* On entry, ARGPOS indicates the position of the option ARGNAM
874* to be tested.
875*
876* -- Written on April 1, 1998 by
877* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
878*
879* =====================================================================
880*
881* .. Local Scalars ..
882 INTEGER INFOT
883* ..
884* .. External Subroutines ..
885 EXTERNAL pchkpbe, pscallsub, pssetpblas
886* ..
887* .. External Functions ..
888 LOGICAL LSAME
889 EXTERNAL LSAME
890* ..
891* .. Common Blocks ..
892 INTEGER KDIM, MDIM, NDIM
893 COMMON /PBLASN/KDIM, MDIM, NDIM
894* ..
895* .. Executable Statements ..
896*
897* Reiniatilize the dummy arguments to correct values
898*
899 CALL pssetpblas( ictxt )
900*
901 IF( lsame( argnam, 'M' ) ) THEN
902*
903* Generate bad MDIM
904*
905 mdim = -1
906*
907 ELSE IF( lsame( argnam, 'N' ) ) THEN
908*
909* Generate bad NDIM
910*
911 ndim = -1
912*
913 ELSE
914*
915* Generate bad KDIM
916*
917 kdim = -1
918*
919 END IF
920*
921* Set INFOT to the position of the bad dimension argument
922*
923 infot = argpos
924*
925* Call the PBLAS routine
926*
927 CALL pscallsub( subptr, scode )
928 CALL pchkpbe( ictxt, nout, sname, infot )
929*
930 RETURN
931*
932* End of PSCHKDIM
933*
934 END
935 SUBROUTINE psvecee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
936*
937* -- PBLAS test routine (version 2.0) --
938* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
939* and University of California, Berkeley.
940* April 1, 1998
941*
942* .. Scalar Arguments ..
943 INTEGER ICTXT, NOUT, SCODE
944* ..
945* .. Array Arguments ..
946 CHARACTER*7 SNAME
947* ..
948* .. Subroutine Arguments ..
949 EXTERNAL subptr
950* ..
951*
952* Purpose
953* =======
954*
955* PSVECEE tests whether the PBLAS respond correctly to a bad vector
956* argument. Each vector <vec> is described by: <vec>, I<vec>, J<vec>,
957* DESC<vec>, INC<vec>. Out of all these, only I<vec>, J<vec>,
958* DESC<vec>, and INC<vec> can be tested.
959*
960* Notes
961* =====
962*
963* A description vector is associated with each 2D block-cyclicly dis-
964* tributed matrix. This vector stores the information required to
965* establish the mapping between a matrix entry and its corresponding
966* process and memory location.
967*
968* In the following comments, the character _ should be read as
969* "of the distributed matrix". Let A be a generic term for any 2D
970* block cyclicly distributed matrix. Its description vector is DESCA:
971*
972* NOTATION STORED IN EXPLANATION
973* ---------------- --------------- ------------------------------------
974* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
975* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
976* the NPROW x NPCOL BLACS process grid
977* A is distributed over. The context
978* itself is global, but the handle
979* (the integer value) may vary.
980* M_A (global) DESCA( M_ ) The number of rows in the distribu-
981* ted matrix A, M_A >= 0.
982* N_A (global) DESCA( N_ ) The number of columns in the distri-
983* buted matrix A, N_A >= 0.
984* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
985* block of the matrix A, IMB_A > 0.
986* INB_A (global) DESCA( INB_ ) The number of columns of the upper
987* left block of the matrix A,
988* INB_A > 0.
989* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
990* bute the last M_A-IMB_A rows of A,
991* MB_A > 0.
992* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
993* bute the last N_A-INB_A columns of
994* A, NB_A > 0.
995* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
996* row of the matrix A is distributed,
997* NPROW > RSRC_A >= 0.
998* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
999* first column of A is distributed.
1000* NPCOL > CSRC_A >= 0.
1001* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1002* array storing the local blocks of
1003* the distributed matrix A,
1004* IF( Lc( 1, N_A ) > 0 )
1005* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1006* ELSE
1007* LLD_A >= 1.
1008*
1009* Let K be the number of rows of a matrix A starting at the global in-
1010* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1011* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1012* receive if these K rows were distributed over NPROW processes. If K
1013* is the number of columns of a matrix A starting at the global index
1014* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1015* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1016* these K columns were distributed over NPCOL processes.
1017*
1018* The values of Lr() and Lc() may be determined via a call to the func-
1019* tion PB_NUMROC:
1020* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1021* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1022*
1023* Arguments
1024* =========
1025*
1026* ICTXT (local input) INTEGER
1027* On entry, ICTXT specifies the BLACS context handle, indica-
1028* ting the global context of the operation. The context itself
1029* is global, but the value of ICTXT is local.
1030*
1031* NOUT (global input) INTEGER
1032* On entry, NOUT specifies the unit number for the output file.
1033* When NOUT is 6, output to screen, when NOUT is 0, output to
1034* stderr. NOUT is only defined for process 0.
1035*
1036* SUBPTR (global input) SUBROUTINE
1037* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1038* EXTERNAL in the calling subroutine.
1039*
1040* SCODE (global input) INTEGER
1041* On entry, SCODE specifies the calling sequence code.
1042*
1043* SNAME (global input) CHARACTER*(*)
1044* On entry, SNAME specifies the subroutine name calling this
1045* subprogram.
1046*
1047* Calling sequence encodings
1048* ==========================
1049*
1050* code Formal argument list Examples
1051*
1052* 11 (n, v1,v2) _SWAP, _COPY
1053* 12 (n,s1, v1 ) _SCAL, _SCAL
1054* 13 (n,s1, v1,v2) _AXPY, _DOT_
1055* 14 (n,s1,i1,v1 ) _AMAX
1056* 15 (n,u1, v1 ) _ASUM, _NRM2
1057*
1058* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1059* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1060* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1061* 24 ( m,n,s1,v1,v2,m1) _GER_
1062* 25 (uplo, n,s1,v1, m1) _SYR
1063* 26 (uplo, n,u1,v1, m1) _HER
1064* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1065*
1066* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1067* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1068* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1069* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1070* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1071* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1072* 37 ( m,n, s1,m1, s2,m3) _TRAN_
1073* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1074* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1075* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1076*
1077* -- Written on April 1, 1998 by
1078* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1079*
1080* =====================================================================
1081*
1082* .. Local Scalars ..
1083 INTEGER APOS
1084* ..
1085* .. External Subroutines ..
1086 EXTERNAL pschkmat
1087* ..
1088* .. Executable Statements ..
1089*
1090* Level 1 PBLAS
1091*
1092 IF( scode.EQ.11 ) THEN
1093*
1094* Check 1st vector
1095*
1096 apos = 2
1097 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1098*
1099* Check 2nd vector
1100*
1101 apos = 7
1102 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1103*
1104 ELSE IF( scode.EQ.12 .OR. scode.EQ.15 ) THEN
1105*
1106* Check 1st (and only) vector
1107*
1108 apos = 3
1109 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1110*
1111 ELSE IF( scode.EQ.13 ) THEN
1112*
1113* Check 1st vector
1114*
1115 apos = 3
1116 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1117*
1118* Check 2nd vector
1119*
1120 apos = 8
1121 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1122*
1123 ELSE IF( scode.EQ.14 ) THEN
1124*
1125* Check 1st (and only) vector
1126*
1127 apos = 4
1128 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1129*
1130* Level 2 PBLAS
1131*
1132 ELSE IF( scode.EQ.21 ) THEN
1133*
1134* Check 1st vector
1135*
1136 apos = 9
1137 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1138*
1139* Check 2nd vector
1140*
1141 apos = 15
1142 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1143*
1144 ELSE IF( scode.EQ.22 ) THEN
1145*
1146* Check 1st vector
1147*
1148 apos = 8
1149 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1150*
1151* Check 2nd vector
1152*
1153 apos = 14
1154 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1155*
1156 ELSE IF( scode.EQ.23 ) THEN
1157*
1158* Check 1st (and only) vector
1159*
1160 apos = 9
1161 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1162*
1163 ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1164*
1165* Check 1st vector
1166*
1167 apos = 4
1168 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1169*
1170* Check 2nd vector
1171*
1172 apos = 9
1173 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1174*
1175 ELSE IF( scode.EQ.26 .OR. scode.EQ.27 ) THEN
1176*
1177* Check 1'st (and only) vector
1178*
1179 apos = 4
1180 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1181*
1182 END IF
1183*
1184 RETURN
1185*
1186* End of PSVECEE
1187*
1188 END
1189 SUBROUTINE psmatee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
1190*
1191* -- PBLAS test routine (version 2.0) --
1192* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1193* and University of California, Berkeley.
1194* April 1, 1998
1195*
1196* .. Scalar Arguments ..
1197 INTEGER ICTXT, NOUT, SCODE
1198* ..
1199* .. Array Arguments ..
1200 CHARACTER*7 SNAME
1201* ..
1202* .. Subroutine Arguments ..
1203 EXTERNAL subptr
1204* ..
1205*
1206* Purpose
1207* =======
1208*
1209* PSMATEE tests whether the PBLAS respond correctly to a bad matrix
1210* argument. Each matrix <mat> is described by: <mat>, I<mat>, J<mat>,
1211* and DESC<mat>. Out of all these, only I<vec>, J<vec> and DESC<mat>
1212* can be tested.
1213*
1214* Notes
1215* =====
1216*
1217* A description vector is associated with each 2D block-cyclicly dis-
1218* tributed matrix. This vector stores the information required to
1219* establish the mapping between a matrix entry and its corresponding
1220* process and memory location.
1221*
1222* In the following comments, the character _ should be read as
1223* "of the distributed matrix". Let A be a generic term for any 2D
1224* block cyclicly distributed matrix. Its description vector is DESCA:
1225*
1226* NOTATION STORED IN EXPLANATION
1227* ---------------- --------------- ------------------------------------
1228* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1229* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1230* the NPROW x NPCOL BLACS process grid
1231* A is distributed over. The context
1232* itself is global, but the handle
1233* (the integer value) may vary.
1234* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1235* ted matrix A, M_A >= 0.
1236* N_A (global) DESCA( N_ ) The number of columns in the distri-
1237* buted matrix A, N_A >= 0.
1238* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1239* block of the matrix A, IMB_A > 0.
1240* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1241* left block of the matrix A,
1242* INB_A > 0.
1243* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1244* bute the last M_A-IMB_A rows of A,
1245* MB_A > 0.
1246* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1247* bute the last N_A-INB_A columns of
1248* A, NB_A > 0.
1249* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1250* row of the matrix A is distributed,
1251* NPROW > RSRC_A >= 0.
1252* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1253* first column of A is distributed.
1254* NPCOL > CSRC_A >= 0.
1255* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1256* array storing the local blocks of
1257* the distributed matrix A,
1258* IF( Lc( 1, N_A ) > 0 )
1259* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1260* ELSE
1261* LLD_A >= 1.
1262*
1263* Let K be the number of rows of a matrix A starting at the global in-
1264* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1265* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1266* receive if these K rows were distributed over NPROW processes. If K
1267* is the number of columns of a matrix A starting at the global index
1268* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1269* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1270* these K columns were distributed over NPCOL processes.
1271*
1272* The values of Lr() and Lc() may be determined via a call to the func-
1273* tion PB_NUMROC:
1274* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1275* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1276*
1277* Arguments
1278* =========
1279*
1280* ICTXT (local input) INTEGER
1281* On entry, ICTXT specifies the BLACS context handle, indica-
1282* ting the global context of the operation. The context itself
1283* is global, but the value of ICTXT is local.
1284*
1285* NOUT (global input) INTEGER
1286* On entry, NOUT specifies the unit number for the output file.
1287* When NOUT is 6, output to screen, when NOUT is 0, output to
1288* stderr. NOUT is only defined for process 0.
1289*
1290* SUBPTR (global input) SUBROUTINE
1291* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1292* EXTERNAL in the calling subroutine.
1293*
1294* SCODE (global input) INTEGER
1295* On entry, SCODE specifies the calling sequence code.
1296*
1297* SNAME (global input) CHARACTER*(*)
1298* On entry, SNAME specifies the subroutine name calling this
1299* subprogram.
1300*
1301* Calling sequence encodings
1302* ==========================
1303*
1304* code Formal argument list Examples
1305*
1306* 11 (n, v1,v2) _SWAP, _COPY
1307* 12 (n,s1, v1 ) _SCAL, _SCAL
1308* 13 (n,s1, v1,v2) _AXPY, _DOT_
1309* 14 (n,s1,i1,v1 ) _AMAX
1310* 15 (n,u1, v1 ) _ASUM, _NRM2
1311*
1312* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1313* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1314* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1315* 24 ( m,n,s1,v1,v2,m1) _GER_
1316* 25 (uplo, n,s1,v1, m1) _SYR
1317* 26 (uplo, n,u1,v1, m1) _HER
1318* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1319*
1320* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1321* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1322* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1323* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1324* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1325* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1326* 37 ( m,n, s1,m1, s2,m3) _TRAN_
1327* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1328* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1329* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1330*
1331* -- Written on April 1, 1998 by
1332* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1333*
1334* =====================================================================
1335*
1336* .. Local Scalars ..
1337 INTEGER APOS
1338* ..
1339* .. External Subroutines ..
1340 EXTERNAL pschkmat
1341* ..
1342* .. Executable Statements ..
1343*
1344* Level 2 PBLAS
1345*
1346 IF( scode.EQ.21 .OR. scode.EQ.23 ) THEN
1347*
1348* Check 1st (and only) matrix
1349*
1350 apos = 5
1351 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1352*
1353 ELSE IF( scode.EQ.22 ) THEN
1354*
1355* Check 1st (and only) matrix
1356*
1357 apos = 4
1358 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1359*
1360 ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1361*
1362* Check 1st (and only) matrix
1363*
1364 apos = 14
1365 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1366*
1367 ELSE IF( scode.EQ.25 .OR. scode.EQ.26 ) THEN
1368*
1369* Check 1st (and only) matrix
1370*
1371 apos = 9
1372 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1373*
1374* Level 3 PBLAS
1375*
1376 ELSE IF( scode.EQ.31 ) THEN
1377*
1378* Check 1st matrix
1379*
1380 apos = 7
1381 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1382*
1383* Check 2nd matrix
1384*
1385 apos = 11
1386 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1387*
1388* Check 3nd matrix
1389*
1390 apos = 16
1391 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1392*
1393 ELSE IF( scode.EQ.32 .OR. scode.EQ.35 .OR. scode.EQ.36 ) THEN
1394*
1395* Check 1st matrix
1396*
1397 apos = 6
1398 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1399*
1400* Check 2nd matrix
1401*
1402 apos = 10
1403 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1404*
1405* Check 3nd matrix
1406*
1407 apos = 15
1408 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1409*
1410 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 ) THEN
1411*
1412* Check 1st matrix
1413*
1414 apos = 6
1415 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1416*
1417* Check 2nd matrix
1418*
1419 apos = 11
1420 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1421*
1422 ELSE IF( scode.EQ.37 ) THEN
1423*
1424* Check 1st matrix
1425*
1426 apos = 4
1427 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1428*
1429* Check 2nd matrix
1430*
1431 apos = 9
1432 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1433*
1434 ELSE IF( scode.EQ.38 ) THEN
1435*
1436* Check 1st matrix
1437*
1438 apos = 8
1439 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1440*
1441* Check 2nd matrix
1442*
1443 apos = 12
1444 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1445*
1446 ELSE IF( scode.EQ.39 ) THEN
1447*
1448* Check 1st matrix
1449*
1450 apos = 5
1451 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1452*
1453* Check 2nd matrix
1454*
1455 apos = 10
1456 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1457*
1458 ELSE IF( scode.EQ.40 ) THEN
1459*
1460* Check 1st matrix
1461*
1462 apos = 6
1463 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1464*
1465* Check 2nd matrix
1466*
1467 apos = 11
1468 CALL pschkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1469*
1470 END IF
1471*
1472 RETURN
1473*
1474* End of PSMATEE
1475*
1476 END
1477 SUBROUTINE pssetpblas( ICTXT )
1478*
1479* -- PBLAS test routine (version 2.0) --
1480* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1481* and University of California, Berkeley.
1482* April 1, 1998
1483*
1484* .. Scalar Arguments ..
1485 INTEGER ICTXT
1486* ..
1487*
1488* Purpose
1489* =======
1490*
1491* PSSETPBLAS initializes *all* the dummy arguments to correct values.
1492*
1493* Notes
1494* =====
1495*
1496* A description vector is associated with each 2D block-cyclicly dis-
1497* tributed matrix. This vector stores the information required to
1498* establish the mapping between a matrix entry and its corresponding
1499* process and memory location.
1500*
1501* In the following comments, the character _ should be read as
1502* "of the distributed matrix". Let A be a generic term for any 2D
1503* block cyclicly distributed matrix. Its description vector is DESCA:
1504*
1505* NOTATION STORED IN EXPLANATION
1506* ---------------- --------------- ------------------------------------
1507* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1508* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1509* the NPROW x NPCOL BLACS process grid
1510* A is distributed over. The context
1511* itself is global, but the handle
1512* (the integer value) may vary.
1513* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1514* ted matrix A, M_A >= 0.
1515* N_A (global) DESCA( N_ ) The number of columns in the distri-
1516* buted matrix A, N_A >= 0.
1517* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1518* block of the matrix A, IMB_A > 0.
1519* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1520* left block of the matrix A,
1521* INB_A > 0.
1522* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1523* bute the last M_A-IMB_A rows of A,
1524* MB_A > 0.
1525* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1526* bute the last N_A-INB_A columns of
1527* A, NB_A > 0.
1528* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1529* row of the matrix A is distributed,
1530* NPROW > RSRC_A >= 0.
1531* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1532* first column of A is distributed.
1533* NPCOL > CSRC_A >= 0.
1534* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1535* array storing the local blocks of
1536* the distributed matrix A,
1537* IF( Lc( 1, N_A ) > 0 )
1538* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1539* ELSE
1540* LLD_A >= 1.
1541*
1542* Let K be the number of rows of a matrix A starting at the global in-
1543* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1544* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1545* receive if these K rows were distributed over NPROW processes. If K
1546* is the number of columns of a matrix A starting at the global index
1547* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1548* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1549* these K columns were distributed over NPCOL processes.
1550*
1551* The values of Lr() and Lc() may be determined via a call to the func-
1552* tion PB_NUMROC:
1553* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1554* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1555*
1556* Arguments
1557* =========
1558*
1559* ICTXT (local input) INTEGER
1560* On entry, ICTXT specifies the BLACS context handle, indica-
1561* ting the global context of the operation. The context itself
1562* is global, but the value of ICTXT is local.
1563*
1564* -- Written on April 1, 1998 by
1565* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1566*
1567* =====================================================================
1568*
1569* .. Parameters ..
1570 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1571 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1572 $ rsrc_
1573 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1574 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1575 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1576 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1577 REAL ONE
1578 PARAMETER ( ONE = 1.0e+0 )
1579* ..
1580* .. External Subroutines ..
1581 EXTERNAL pb_descset2
1582* ..
1583* .. Common Blocks ..
1584 CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
1585 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1586 $ jc, jx, jy, kdim, mdim, ndim
1587 REAL USCLR, SCLR
1588 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1589 $ descx( dlen_ ), descy( dlen_ )
1590 REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
1591 COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO
1592 COMMON /pblasd/desca, descb, descc, descx, descy
1593 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1594 $ ja, jb, jc, jx, jy
1595 COMMON /pblasm/a, b, c
1596 COMMON /pblasn/kdim, mdim, ndim
1597 COMMON /pblass/sclr, usclr
1598 COMMON /pblasv/x, y
1599* ..
1600* .. Executable Statements ..
1601*
1602* Set default values for options
1603*
1604 diag = 'N'
1605 side = 'L'
1606 transa = 'N'
1607 transb = 'N'
1608 uplo = 'U'
1609*
1610* Set default values for scalars
1611*
1612 kdim = 1
1613 mdim = 1
1614 ndim = 1
1615 isclr = 1
1616 sclr = one
1617 usclr = one
1618*
1619* Set default values for distributed matrix A
1620*
1621 a( 1, 1 ) = one
1622 a( 2, 1 ) = one
1623 a( 1, 2 ) = one
1624 a( 2, 2 ) = one
1625 ia = 1
1626 ja = 1
1627 CALL pb_descset2( desca, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1628*
1629* Set default values for distributed matrix B
1630*
1631 b( 1, 1 ) = one
1632 b( 2, 1 ) = one
1633 b( 1, 2 ) = one
1634 b( 2, 2 ) = one
1635 ib = 1
1636 jb = 1
1637 CALL pb_descset2( descb, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1638*
1639* Set default values for distributed matrix C
1640*
1641 c( 1, 1 ) = one
1642 c( 2, 1 ) = one
1643 c( 1, 2 ) = one
1644 c( 2, 2 ) = one
1645 ic = 1
1646 jc = 1
1647 CALL pb_descset2( descc, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1648*
1649* Set default values for distributed matrix X
1650*
1651 x( 1 ) = one
1652 x( 2 ) = one
1653 ix = 1
1654 jx = 1
1655 CALL pb_descset2( descx, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1656 incx = 1
1657*
1658* Set default values for distributed matrix Y
1659*
1660 y( 1 ) = one
1661 y( 2 ) = one
1662 iy = 1
1663 jy = 1
1664 CALL pb_descset2( descy, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1665 incy = 1
1666*
1667 RETURN
1668*
1669* End of PSSETPBLAS
1670*
1671 END
1672 SUBROUTINE pschkmat( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
1673 $ ARGPOS )
1674*
1675* -- PBLAS test routine (version 2.0) --
1676* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1677* and University of California, Berkeley.
1678* April 1, 1998
1679*
1680* .. Scalar Arguments ..
1681 CHARACTER*1 ARGNAM
1682 INTEGER ARGPOS, ICTXT, NOUT, SCODE
1683* ..
1684* .. Array Arguments ..
1685 CHARACTER*(*) SNAME
1686* ..
1687* .. Subroutine Arguments ..
1688 EXTERNAL subptr
1689* ..
1690*
1691* Purpose
1692* =======
1693*
1694* PSCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine.
1695*
1696* Notes
1697* =====
1698*
1699* A description vector is associated with each 2D block-cyclicly dis-
1700* tributed matrix. This vector stores the information required to
1701* establish the mapping between a matrix entry and its corresponding
1702* process and memory location.
1703*
1704* In the following comments, the character _ should be read as
1705* "of the distributed matrix". Let A be a generic term for any 2D
1706* block cyclicly distributed matrix. Its description vector is DESCA:
1707*
1708* NOTATION STORED IN EXPLANATION
1709* ---------------- --------------- ------------------------------------
1710* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1711* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1712* the NPROW x NPCOL BLACS process grid
1713* A is distributed over. The context
1714* itself is global, but the handle
1715* (the integer value) may vary.
1716* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1717* ted matrix A, M_A >= 0.
1718* N_A (global) DESCA( N_ ) The number of columns in the distri-
1719* buted matrix A, N_A >= 0.
1720* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1721* block of the matrix A, IMB_A > 0.
1722* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1723* left block of the matrix A,
1724* INB_A > 0.
1725* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1726* bute the last M_A-IMB_A rows of A,
1727* MB_A > 0.
1728* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1729* bute the last N_A-INB_A columns of
1730* A, NB_A > 0.
1731* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1732* row of the matrix A is distributed,
1733* NPROW > RSRC_A >= 0.
1734* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1735* first column of A is distributed.
1736* NPCOL > CSRC_A >= 0.
1737* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1738* array storing the local blocks of
1739* the distributed matrix A,
1740* IF( Lc( 1, N_A ) > 0 )
1741* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1742* ELSE
1743* LLD_A >= 1.
1744*
1745* Let K be the number of rows of a matrix A starting at the global in-
1746* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1747* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1748* receive if these K rows were distributed over NPROW processes. If K
1749* is the number of columns of a matrix A starting at the global index
1750* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1751* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1752* these K columns were distributed over NPCOL processes.
1753*
1754* The values of Lr() and Lc() may be determined via a call to the func-
1755* tion PB_NUMROC:
1756* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1757* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1758*
1759* Arguments
1760* =========
1761*
1762* ICTXT (local input) INTEGER
1763* On entry, ICTXT specifies the BLACS context handle, indica-
1764* ting the global context of the operation. The context itself
1765* is global, but the value of ICTXT is local.
1766*
1767* NOUT (global input) INTEGER
1768* On entry, NOUT specifies the unit number for the output file.
1769* When NOUT is 6, output to screen, when NOUT is 0, output to
1770* stderr. NOUT is only defined for process 0.
1771*
1772* SUBPTR (global input) SUBROUTINE
1773* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1774* EXTERNAL in the calling subroutine.
1775*
1776* SCODE (global input) INTEGER
1777* On entry, SCODE specifies the calling sequence code.
1778*
1779* SNAME (global input) CHARACTER*(*)
1780* On entry, SNAME specifies the subroutine name calling this
1781* subprogram.
1782*
1783* ARGNAM (global input) CHARACTER*(*)
1784* On entry, ARGNAM specifies the name of the matrix or vector
1785* to be checked. ARGNAM can either be 'A', 'B' or 'C' when one
1786* wants to check a matrix, and 'X' or 'Y' for a vector.
1787*
1788* ARGPOS (global input) INTEGER
1789* On entry, ARGPOS indicates the position of the first argument
1790* of the matrix (or vector) ARGNAM.
1791*
1792* -- Written on April 1, 1998 by
1793* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1794*
1795* =====================================================================
1796*
1797* .. Parameters ..
1798 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1799 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1800 $ RSRC_
1801 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1802 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1803 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1804 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1805 INTEGER DESCMULT
1806 PARAMETER ( DESCMULT = 100 )
1807* ..
1808* .. Local Scalars ..
1809 INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL
1810* ..
1811* .. External Subroutines ..
1812 EXTERNAL blacs_gridinfo, pchkpbe, pscallsub, pssetpblas
1813* ..
1814* .. External Functions ..
1815 LOGICAL LSAME
1816 EXTERNAL LSAME
1817* ..
1818* .. Common Blocks ..
1819 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1820 $ JC, JX, JY
1821 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1822 $ descx( dlen_ ), descy( dlen_ )
1823 COMMON /pblasd/desca, descb, descc, descx, descy
1824 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1825 $ ja, jb, jc, jx, jy
1826* ..
1827* .. Executable Statements ..
1828*
1829 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1830*
1831 IF( lsame( argnam, 'A' ) ) THEN
1832*
1833* Check IA. Set all other OK, bad IA
1834*
1835 CALL pssetpblas( ictxt )
1836 ia = -1
1837 infot = argpos + 1
1838 CALL pscallsub( subptr, scode )
1839 CALL pchkpbe( ictxt, nout, sname, infot )
1840*
1841* Check JA. Set all other OK, bad JA
1842*
1843 CALL pssetpblas( ictxt )
1844 ja = -1
1845 infot = argpos + 2
1846 CALL pscallsub( subptr, scode )
1847 CALL pchkpbe( ictxt, nout, sname, infot )
1848*
1849* Check DESCA. Set all other OK, bad DESCA
1850*
1851 DO 10 i = 1, dlen_
1852*
1853* Set I'th entry of DESCA to incorrect value, rest ok.
1854*
1855 CALL pssetpblas( ictxt )
1856 desca( i ) = -2
1857 infot = ( ( argpos + 3 ) * descmult ) + i
1858 CALL pscallsub( subptr, scode )
1859 CALL pchkpbe( ictxt, nout, sname, infot )
1860*
1861* Extra tests for RSRCA, CSRCA, LDA
1862*
1863 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1864 $ ( i.EQ.lld_ ) ) THEN
1865*
1866 CALL pssetpblas( ictxt )
1867*
1868* Test RSRCA >= NPROW
1869*
1870 IF( i.EQ.rsrc_ )
1871 $ desca( i ) = nprow
1872*
1873* Test CSRCA >= NPCOL
1874*
1875 IF( i.EQ.csrc_ )
1876 $ desca( i ) = npcol
1877*
1878* Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1879*
1880 IF( i.EQ.lld_ ) THEN
1881 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1882 desca( i ) = 1
1883 ELSE
1884 desca( i ) = 0
1885 END IF
1886 END IF
1887*
1888 infot = ( ( argpos + 3 ) * descmult ) + i
1889 CALL pscallsub( subptr, scode )
1890 CALL pchkpbe( ictxt, nout, sname, infot )
1891*
1892 END IF
1893*
1894 10 CONTINUE
1895*
1896 ELSE IF( lsame( argnam, 'B' ) ) THEN
1897*
1898* Check IB. Set all other OK, bad IB
1899*
1900 CALL pssetpblas( ictxt )
1901 ib = -1
1902 infot = argpos + 1
1903 CALL pscallsub( subptr, scode )
1904 CALL pchkpbe( ictxt, nout, sname, infot )
1905*
1906* Check JB. Set all other OK, bad JB
1907*
1908 CALL pssetpblas( ictxt )
1909 jb = -1
1910 infot = argpos + 2
1911 CALL pscallsub( subptr, scode )
1912 CALL pchkpbe( ictxt, nout, sname, infot )
1913*
1914* Check DESCB. Set all other OK, bad DESCB
1915*
1916 DO 20 i = 1, dlen_
1917*
1918* Set I'th entry of DESCB to incorrect value, rest ok.
1919*
1920 CALL pssetpblas( ictxt )
1921 descb( i ) = -2
1922 infot = ( ( argpos + 3 ) * descmult ) + i
1923 CALL pscallsub( subptr, scode )
1924 CALL pchkpbe( ictxt, nout, sname, infot )
1925*
1926* Extra tests for RSRCB, CSRCB, LDB
1927*
1928 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1929 $ ( i.EQ.lld_ ) ) THEN
1930*
1931 CALL pssetpblas( ictxt )
1932*
1933* Test RSRCB >= NPROW
1934*
1935 IF( i.EQ.rsrc_ )
1936 $ descb( i ) = nprow
1937*
1938* Test CSRCB >= NPCOL
1939*
1940 IF( i.EQ.csrc_ )
1941 $ descb( i ) = npcol
1942*
1943* Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1944*
1945 IF( i.EQ.lld_ ) THEN
1946 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1947 descb( i ) = 1
1948 ELSE
1949 descb( i ) = 0
1950 END IF
1951 END IF
1952*
1953 infot = ( ( argpos + 3 ) * descmult ) + i
1954 CALL pscallsub( subptr, scode )
1955 CALL pchkpbe( ictxt, nout, sname, infot )
1956*
1957 END IF
1958*
1959 20 CONTINUE
1960*
1961 ELSE IF( lsame( argnam, 'C' ) ) THEN
1962*
1963* Check IC. Set all other OK, bad IC
1964*
1965 CALL pssetpblas( ictxt )
1966 ic = -1
1967 infot = argpos + 1
1968 CALL pscallsub( subptr, scode )
1969 CALL pchkpbe( ictxt, nout, sname, infot )
1970*
1971* Check JC. Set all other OK, bad JC
1972*
1973 CALL pssetpblas( ictxt )
1974 jc = -1
1975 infot = argpos + 2
1976 CALL pscallsub( subptr, scode )
1977 CALL pchkpbe( ictxt, nout, sname, infot )
1978*
1979* Check DESCC. Set all other OK, bad DESCC
1980*
1981 DO 30 i = 1, dlen_
1982*
1983* Set I'th entry of DESCC to incorrect value, rest ok.
1984*
1985 CALL pssetpblas( ictxt )
1986 descc( i ) = -2
1987 infot = ( ( argpos + 3 ) * descmult ) + i
1988 CALL pscallsub( subptr, scode )
1989 CALL pchkpbe( ictxt, nout, sname, infot )
1990*
1991* Extra tests for RSRCC, CSRCC, LDC
1992*
1993 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1994 $ ( i.EQ.lld_ ) ) THEN
1995*
1996 CALL pssetpblas( ictxt )
1997*
1998* Test RSRCC >= NPROW
1999*
2000 IF( i.EQ.rsrc_ )
2001 $ descc( i ) = nprow
2002*
2003* Test CSRCC >= NPCOL
2004*
2005 IF( i.EQ.csrc_ )
2006 $ descc( i ) = npcol
2007*
2008* Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2009*
2010 IF( i.EQ.lld_ ) THEN
2011 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2012 descc( i ) = 1
2013 ELSE
2014 descc( i ) = 0
2015 END IF
2016 END IF
2017*
2018 infot = ( ( argpos + 3 ) * descmult ) + i
2019 CALL pscallsub( subptr, scode )
2020 CALL pchkpbe( ictxt, nout, sname, infot )
2021*
2022 END IF
2023*
2024 30 CONTINUE
2025*
2026 ELSE IF( lsame( argnam, 'X' ) ) THEN
2027*
2028* Check IX. Set all other OK, bad IX
2029*
2030 CALL pssetpblas( ictxt )
2031 ix = -1
2032 infot = argpos + 1
2033 CALL pscallsub( subptr, scode )
2034 CALL pchkpbe( ictxt, nout, sname, infot )
2035*
2036* Check JX. Set all other OK, bad JX
2037*
2038 CALL pssetpblas( ictxt )
2039 jx = -1
2040 infot = argpos + 2
2041 CALL pscallsub( subptr, scode )
2042 CALL pchkpbe( ictxt, nout, sname, infot )
2043*
2044* Check DESCX. Set all other OK, bad DESCX
2045*
2046 DO 40 i = 1, dlen_
2047*
2048* Set I'th entry of DESCX to incorrect value, rest ok.
2049*
2050 CALL pssetpblas( ictxt )
2051 descx( i ) = -2
2052 infot = ( ( argpos + 3 ) * descmult ) + i
2053 CALL pscallsub( subptr, scode )
2054 CALL pchkpbe( ictxt, nout, sname, infot )
2055*
2056* Extra tests for RSRCX, CSRCX, LDX
2057*
2058 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2059 $ ( i.EQ.lld_ ) ) THEN
2060*
2061 CALL pssetpblas( ictxt )
2062*
2063* Test RSRCX >= NPROW
2064*
2065 IF( i.EQ.rsrc_ )
2066 $ descx( i ) = nprow
2067*
2068* Test CSRCX >= NPCOL
2069*
2070 IF( i.EQ.csrc_ )
2071 $ descx( i ) = npcol
2072*
2073* Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2074*
2075 IF( i.EQ.lld_ ) THEN
2076 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2077 descx( i ) = 1
2078 ELSE
2079 descx( i ) = 0
2080 END IF
2081 END IF
2082*
2083 infot = ( ( argpos + 3 ) * descmult ) + i
2084 CALL pscallsub( subptr, scode )
2085 CALL pchkpbe( ictxt, nout, sname, infot )
2086*
2087 END IF
2088*
2089 40 CONTINUE
2090*
2091* Check INCX. Set all other OK, bad INCX
2092*
2093 CALL pssetpblas( ictxt )
2094 incx = -1
2095 infot = argpos + 4
2096 CALL pscallsub( subptr, scode )
2097 CALL pchkpbe( ictxt, nout, sname, infot )
2098*
2099 ELSE
2100*
2101* Check IY. Set all other OK, bad IY
2102*
2103 CALL pssetpblas( ictxt )
2104 iy = -1
2105 infot = argpos + 1
2106 CALL pscallsub( subptr, scode )
2107 CALL pchkpbe( ictxt, nout, sname, infot )
2108*
2109* Check JY. Set all other OK, bad JY
2110*
2111 CALL pssetpblas( ictxt )
2112 jy = -1
2113 infot = argpos + 2
2114 CALL pscallsub( subptr, scode )
2115 CALL pchkpbe( ictxt, nout, sname, infot )
2116*
2117* Check DESCY. Set all other OK, bad DESCY
2118*
2119 DO 50 i = 1, dlen_
2120*
2121* Set I'th entry of DESCY to incorrect value, rest ok.
2122*
2123 CALL pssetpblas( ictxt )
2124 descy( i ) = -2
2125 infot = ( ( argpos + 3 ) * descmult ) + i
2126 CALL pscallsub( subptr, scode )
2127 CALL pchkpbe( ictxt, nout, sname, infot )
2128*
2129* Extra tests for RSRCY, CSRCY, LDY
2130*
2131 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2132 $ ( i.EQ.lld_ ) ) THEN
2133*
2134 CALL pssetpblas( ictxt )
2135*
2136* Test RSRCY >= NPROW
2137*
2138 IF( i.EQ.rsrc_ )
2139 $ descy( i ) = nprow
2140*
2141* Test CSRCY >= NPCOL
2142*
2143 IF( i.EQ.csrc_ )
2144 $ descy( i ) = npcol
2145*
2146* Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2147*
2148 IF( i.EQ.lld_ ) THEN
2149 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2150 descy( i ) = 1
2151 ELSE
2152 descy( i ) = 0
2153 END IF
2154 END IF
2155*
2156 infot = ( ( argpos + 3 ) * descmult ) + i
2157 CALL pscallsub( subptr, scode )
2158 CALL pchkpbe( ictxt, nout, sname, infot )
2159*
2160 END IF
2161*
2162 50 CONTINUE
2163*
2164* Check INCY. Set all other OK, bad INCY
2165*
2166 CALL pssetpblas( ictxt )
2167 incy = -1
2168 infot = argpos + 4
2169 CALL pscallsub( subptr, scode )
2170 CALL pchkpbe( ictxt, nout, sname, infot )
2171*
2172 END IF
2173*
2174 RETURN
2175*
2176* End of PSCHKMAT
2177*
2178 END
2179 SUBROUTINE pscallsub( SUBPTR, SCODE )
2180*
2181* -- PBLAS test routine (version 2.0) --
2182* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2183* and University of California, Berkeley.
2184* April 1, 1998
2185*
2186* .. Scalar Arguments ..
2187 INTEGER SCODE
2188* ..
2189* .. Subroutine Arguments ..
2190 EXTERNAL subptr
2191* ..
2192*
2193* Purpose
2194* =======
2195*
2196* PSCALLSUB calls the subroutine SUBPTR with the calling sequence iden-
2197* tified by SCODE.
2198*
2199* Notes
2200* =====
2201*
2202* A description vector is associated with each 2D block-cyclicly dis-
2203* tributed matrix. This vector stores the information required to
2204* establish the mapping between a matrix entry and its corresponding
2205* process and memory location.
2206*
2207* In the following comments, the character _ should be read as
2208* "of the distributed matrix". Let A be a generic term for any 2D
2209* block cyclicly distributed matrix. Its description vector is DESCA:
2210*
2211* NOTATION STORED IN EXPLANATION
2212* ---------------- --------------- ------------------------------------
2213* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2214* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2215* the NPROW x NPCOL BLACS process grid
2216* A is distributed over. The context
2217* itself is global, but the handle
2218* (the integer value) may vary.
2219* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2220* ted matrix A, M_A >= 0.
2221* N_A (global) DESCA( N_ ) The number of columns in the distri-
2222* buted matrix A, N_A >= 0.
2223* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2224* block of the matrix A, IMB_A > 0.
2225* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2226* left block of the matrix A,
2227* INB_A > 0.
2228* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2229* bute the last M_A-IMB_A rows of A,
2230* MB_A > 0.
2231* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2232* bute the last N_A-INB_A columns of
2233* A, NB_A > 0.
2234* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2235* row of the matrix A is distributed,
2236* NPROW > RSRC_A >= 0.
2237* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2238* first column of A is distributed.
2239* NPCOL > CSRC_A >= 0.
2240* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2241* array storing the local blocks of
2242* the distributed matrix A,
2243* IF( Lc( 1, N_A ) > 0 )
2244* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2245* ELSE
2246* LLD_A >= 1.
2247*
2248* Let K be the number of rows of a matrix A starting at the global in-
2249* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2250* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2251* receive if these K rows were distributed over NPROW processes. If K
2252* is the number of columns of a matrix A starting at the global index
2253* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2254* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2255* these K columns were distributed over NPCOL processes.
2256*
2257* The values of Lr() and Lc() may be determined via a call to the func-
2258* tion PB_NUMROC:
2259* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2260* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2261*
2262* Arguments
2263* =========
2264*
2265* SUBPTR (global input) SUBROUTINE
2266* On entry, SUBPTR is a subroutine. SUBPTR must be declared
2267* EXTERNAL in the calling subroutine.
2268*
2269* SCODE (global input) INTEGER
2270* On entry, SCODE specifies the calling sequence code.
2271*
2272* Calling sequence encodings
2273* ==========================
2274*
2275* code Formal argument list Examples
2276*
2277* 11 (n, v1,v2) _SWAP, _COPY
2278* 12 (n,s1, v1 ) _SCAL, _SCAL
2279* 13 (n,s1, v1,v2) _AXPY, _DOT_
2280* 14 (n,s1,i1,v1 ) _AMAX
2281* 15 (n,u1, v1 ) _ASUM, _NRM2
2282*
2283* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
2284* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
2285* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
2286* 24 ( m,n,s1,v1,v2,m1) _GER_
2287* 25 (uplo, n,s1,v1, m1) _SYR
2288* 26 (uplo, n,u1,v1, m1) _HER
2289* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
2290*
2291* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
2292* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
2293* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
2294* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
2295* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
2296* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
2297* 37 ( m,n, s1,m1, s2,m3) _TRAN_
2298* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
2299* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
2300* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
2301*
2302* -- Written on April 1, 1998 by
2303* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2304*
2305* =====================================================================
2306*
2307* .. Parameters ..
2308 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2309 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2310 $ RSRC_
2311 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
2312 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2313 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2314 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2315* ..
2316* .. Common Blocks ..
2317 CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
2318 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
2319 $ JC, JX, JY, KDIM, MDIM, NDIM
2320 REAL USCLR, SCLR
2321 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
2322 $ DESCX( DLEN_ ), DESCY( DLEN_ )
2323 REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
2324 COMMON /pblasc/diag, side, transa, transb, uplo
2325 COMMON /pblasd/desca, descb, descc, descx, descy
2326 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
2327 $ ja, jb, jc, jx, jy
2328 COMMON /pblasm/a, b, c
2329 COMMON /pblasn/kdim, mdim, ndim
2330 COMMON /pblass/sclr, usclr
2331 COMMON /pblasv/x, y
2332* ..
2333* .. Executable Statements ..
2334*
2335* Level 1 PBLAS
2336*
2337 IF( scode.EQ.11 ) THEN
2338*
2339 CALL subptr( ndim, x, ix, jx, descx, incx, y, iy, jy, descy,
2340 $ incy )
2341*
2342 ELSE IF( scode.EQ.12 ) THEN
2343*
2344 CALL subptr( ndim, sclr, x, ix, jx, descx, incx )
2345*
2346 ELSE IF( scode.EQ.13 ) THEN
2347*
2348 CALL subptr( ndim, sclr, x, ix, jx, descx, incx, y, iy, jy,
2349 $ descy, incy )
2350*
2351 ELSE IF( scode.EQ.14 ) THEN
2352*
2353 CALL subptr( ndim, sclr, isclr, x, ix, jx, descx, incx )
2354*
2355 ELSE IF( scode.EQ.15 ) THEN
2356*
2357 CALL subptr( ndim, usclr, x, ix, jx, descx, incx )
2358*
2359* Level 2 PBLAS
2360*
2361 ELSE IF( scode.EQ.21 ) THEN
2362*
2363 CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, x, ix,
2364 $ jx, descx, incx, sclr, y, iy, jy, descy, incy )
2365*
2366 ELSE IF( scode.EQ.22 ) THEN
2367*
2368 CALL subptr( uplo, ndim, sclr, a, ia, ja, desca, x, ix, jx,
2369 $ descx, incx, sclr, y, iy, jy, descy, incy )
2370*
2371 ELSE IF( scode.EQ.23 ) THEN
2372*
2373 CALL subptr( uplo, transa, diag, ndim, a, ia, ja, desca, x, ix,
2374 $ jx, descx, incx )
2375*
2376 ELSE IF( scode.EQ.24 ) THEN
2377*
2378 CALL subptr( mdim, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2379 $ jy, descy, incy, a, ia, ja, desca )
2380*
2381 ELSE IF( scode.EQ.25 ) THEN
2382*
2383 CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, a, ia,
2384 $ ja, desca )
2385*
2386 ELSE IF( scode.EQ.26 ) THEN
2387*
2388 CALL subptr( uplo, ndim, usclr, x, ix, jx, descx, incx, a, ia,
2389 $ ja, desca )
2390*
2391 ELSE IF( scode.EQ.27 ) THEN
2392*
2393 CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2394 $ jy, descy, incy, a, ia, ja, desca )
2395*
2396* Level 3 PBLAS
2397*
2398 ELSE IF( scode.EQ.31 ) THEN
2399*
2400 CALL subptr( transa, transb, mdim, ndim, kdim, sclr, a, ia, ja,
2401 $ desca, b, ib, jb, descb, sclr, c, ic, jc, descc )
2402*
2403 ELSE IF( scode.EQ.32 ) THEN
2404*
2405 CALL subptr( side, uplo, mdim, ndim, sclr, a, ia, ja, desca, b,
2406 $ ib, jb, descb, sclr, c, ic, jc, descc )
2407*
2408 ELSE IF( scode.EQ.33 ) THEN
2409*
2410 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2411 $ sclr, c, ic, jc, descc )
2412*
2413 ELSE IF( scode.EQ.34 ) THEN
2414*
2415 CALL subptr( uplo, transa, ndim, kdim, usclr, a, ia, ja, desca,
2416 $ usclr, c, ic, jc, descc )
2417*
2418 ELSE IF( scode.EQ.35 ) THEN
2419*
2420 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2421 $ b, ib, jb, descb, sclr, c, ic, jc, descc )
2422*
2423 ELSE IF( scode.EQ.36 ) THEN
2424*
2425 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2426 $ b, ib, jb, descb, usclr, c, ic, jc, descc )
2427*
2428 ELSE IF( scode.EQ.37 ) THEN
2429*
2430 CALL subptr( mdim, ndim, sclr, a, ia, ja, desca, sclr, c, ic,
2431 $ jc, descc )
2432*
2433 ELSE IF( scode.EQ.38 ) THEN
2434*
2435 CALL subptr( side, uplo, transa, diag, mdim, ndim, sclr, a, ia,
2436 $ ja, desca, b, ib, jb, descb )
2437*
2438 ELSE IF( scode.EQ.39 ) THEN
2439*
2440 CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, sclr,
2441 $ c, ic, jc, descc )
2442*
2443 ELSE IF( scode.EQ.40 ) THEN
2444*
2445 CALL subptr( uplo, transa, mdim, ndim, sclr, a, ia, ja, desca,
2446 $ sclr, c, ic, jc, descc )
2447*
2448 END IF
2449*
2450 RETURN
2451*
2452* End of PSCALLSUB
2453*
2454 END
2455 SUBROUTINE pserrset( ERR, ERRMAX, XTRUE, X )
2456*
2457* -- PBLAS test routine (version 2.0) --
2458* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2459* and University of California, Berkeley.
2460* April 1, 1998
2461*
2462* .. Scalar Arguments ..
2463 REAL ERR, ERRMAX, X, XTRUE
2464* ..
2465*
2466* Purpose
2467* =======
2468*
2469* PSERRSET computes the absolute difference ERR = |XTRUE - X| and com-
2470* pares it with zero. ERRMAX accumulates the absolute error difference.
2471*
2472* Notes
2473* =====
2474*
2475* A description vector is associated with each 2D block-cyclicly dis-
2476* tributed matrix. This vector stores the information required to
2477* establish the mapping between a matrix entry and its corresponding
2478* process and memory location.
2479*
2480* In the following comments, the character _ should be read as
2481* "of the distributed matrix". Let A be a generic term for any 2D
2482* block cyclicly distributed matrix. Its description vector is DESCA:
2483*
2484* NOTATION STORED IN EXPLANATION
2485* ---------------- --------------- ------------------------------------
2486* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2487* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2488* the NPROW x NPCOL BLACS process grid
2489* A is distributed over. The context
2490* itself is global, but the handle
2491* (the integer value) may vary.
2492* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2493* ted matrix A, M_A >= 0.
2494* N_A (global) DESCA( N_ ) The number of columns in the distri-
2495* buted matrix A, N_A >= 0.
2496* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2497* block of the matrix A, IMB_A > 0.
2498* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2499* left block of the matrix A,
2500* INB_A > 0.
2501* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2502* bute the last M_A-IMB_A rows of A,
2503* MB_A > 0.
2504* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2505* bute the last N_A-INB_A columns of
2506* A, NB_A > 0.
2507* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2508* row of the matrix A is distributed,
2509* NPROW > RSRC_A >= 0.
2510* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2511* first column of A is distributed.
2512* NPCOL > CSRC_A >= 0.
2513* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2514* array storing the local blocks of
2515* the distributed matrix A,
2516* IF( Lc( 1, N_A ) > 0 )
2517* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2518* ELSE
2519* LLD_A >= 1.
2520*
2521* Let K be the number of rows of a matrix A starting at the global in-
2522* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2523* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2524* receive if these K rows were distributed over NPROW processes. If K
2525* is the number of columns of a matrix A starting at the global index
2526* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2527* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2528* these K columns were distributed over NPCOL processes.
2529*
2530* The values of Lr() and Lc() may be determined via a call to the func-
2531* tion PB_NUMROC:
2532* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2533* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2534*
2535* Arguments
2536* =========
2537*
2538* ERR (local output) REAL
2539* On exit, ERR specifies the absolute difference |XTRUE - X|.
2540*
2541* ERRMAX (local input/local output) REAL
2542* On entry, ERRMAX specifies a previously computed error. On
2543* exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ).
2544*
2545* XTRUE (local input) REAL
2546* On entry, XTRUE specifies the true value.
2547*
2548* X (local input) REAL
2549* On entry, X specifies the value to be compared to XTRUE.
2550*
2551* -- Written on April 1, 1998 by
2552* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2553*
2554* =====================================================================
2555*
2556* .. External Functions ..
2557 REAL PSDIFF
2558 EXTERNAL PSDIFF
2559* ..
2560* .. Intrinsic Functions ..
2561 INTRINSIC abs, max
2562* ..
2563* .. Executable Statements ..
2564*
2565 err = abs( psdiff( xtrue, x ) )
2566*
2567 errmax = max( errmax, err )
2568*
2569 RETURN
2570*
2571* End of PSERRSET
2572*
2573 END
2574 SUBROUTINE pschkvin( ERRMAX, N, X, PX, IX, JX, DESCX, INCX,
2575 $ INFO )
2576*
2577* -- PBLAS test routine (version 2.0) --
2578* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2579* and University of California, Berkeley.
2580* April 1, 1998
2581*
2582* .. Scalar Arguments ..
2583 INTEGER INCX, INFO, IX, JX, N
2584 REAL ERRMAX
2585* ..
2586* .. Array Arguments ..
2587 INTEGER DESCX( * )
2588 REAL PX( * ), X( * )
2589* ..
2590*
2591* Purpose
2592* =======
2593*
2594* PSCHKVIN checks that the submatrix sub( PX ) remained unchanged. The
2595* local array entries are compared element by element, and their dif-
2596* ference is tested against 0.0 as well as the epsilon machine. Notice
2597* that this difference should be numerically exactly the zero machine,
2598* but because of the possible fluctuation of some of the data we flag-
2599* ged differently a difference less than twice the epsilon machine. The
2600* largest error is also returned.
2601*
2602* Notes
2603* =====
2604*
2605* A description vector is associated with each 2D block-cyclicly dis-
2606* tributed matrix. This vector stores the information required to
2607* establish the mapping between a matrix entry and its corresponding
2608* process and memory location.
2609*
2610* In the following comments, the character _ should be read as
2611* "of the distributed matrix". Let A be a generic term for any 2D
2612* block cyclicly distributed matrix. Its description vector is DESCA:
2613*
2614* NOTATION STORED IN EXPLANATION
2615* ---------------- --------------- ------------------------------------
2616* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2617* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2618* the NPROW x NPCOL BLACS process grid
2619* A is distributed over. The context
2620* itself is global, but the handle
2621* (the integer value) may vary.
2622* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2623* ted matrix A, M_A >= 0.
2624* N_A (global) DESCA( N_ ) The number of columns in the distri-
2625* buted matrix A, N_A >= 0.
2626* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2627* block of the matrix A, IMB_A > 0.
2628* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2629* left block of the matrix A,
2630* INB_A > 0.
2631* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2632* bute the last M_A-IMB_A rows of A,
2633* MB_A > 0.
2634* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2635* bute the last N_A-INB_A columns of
2636* A, NB_A > 0.
2637* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2638* row of the matrix A is distributed,
2639* NPROW > RSRC_A >= 0.
2640* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2641* first column of A is distributed.
2642* NPCOL > CSRC_A >= 0.
2643* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2644* array storing the local blocks of
2645* the distributed matrix A,
2646* IF( Lc( 1, N_A ) > 0 )
2647* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2648* ELSE
2649* LLD_A >= 1.
2650*
2651* Let K be the number of rows of a matrix A starting at the global in-
2652* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2653* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2654* receive if these K rows were distributed over NPROW processes. If K
2655* is the number of columns of a matrix A starting at the global index
2656* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2657* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2658* these K columns were distributed over NPCOL processes.
2659*
2660* The values of Lr() and Lc() may be determined via a call to the func-
2661* tion PB_NUMROC:
2662* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2663* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2664*
2665* Arguments
2666* =========
2667*
2668* ERRMAX (global output) REAL
2669* On exit, ERRMAX specifies the largest absolute element-wise
2670* difference between sub( X ) and sub( PX ).
2671*
2672* N (global input) INTEGER
2673* On entry, N specifies the length of the subvector operand
2674* sub( X ). N must be at least zero.
2675*
2676* X (local input) REAL array
2677* On entry, X is an array of dimension (DESCX( M_ ),*). This
2678* array contains a local copy of the initial entire matrix PX.
2679*
2680* PX (local input) REAL array
2681* On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2682* array contains the local entries of the matrix PX.
2683*
2684* IX (global input) INTEGER
2685* On entry, IX specifies X's global row index, which points to
2686* the beginning of the submatrix sub( X ).
2687*
2688* JX (global input) INTEGER
2689* On entry, JX specifies X's global column index, which points
2690* to the beginning of the submatrix sub( X ).
2691*
2692* DESCX (global and local input) INTEGER array
2693* On entry, DESCX is an integer array of dimension DLEN_. This
2694* is the array descriptor for the matrix X.
2695*
2696* INCX (global input) INTEGER
2697* On entry, INCX specifies the global increment for the
2698* elements of X. Only two values of INCX are supported in
2699* this version, namely 1 and M_X. INCX must not be zero.
2700*
2701* INFO (global output) INTEGER
2702* On exit, if INFO = 0, no error has been found,
2703* If INFO > 0, the maximum abolute error found is in (0,eps],
2704* If INFO < 0, the maximum abolute error found is in (eps,+oo).
2705*
2706* -- Written on April 1, 1998 by
2707* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2708*
2709* =====================================================================
2710*
2711* .. Parameters ..
2712 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2713 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2714 $ RSRC_
2715 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
2716 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2717 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2718 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2719 REAL ZERO
2720 PARAMETER ( ZERO = 0.0e+0 )
2721* ..
2722* .. Local Scalars ..
2723 LOGICAL COLREP, ROWREP
2724 INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL,
2725 $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL,
2726 $ MYCOL, MYROW, NPCOL, NPROW
2727 REAL ERR, EPS
2728* ..
2729* .. External Subroutines ..
2730 EXTERNAL blacs_gridinfo, pb_infog2l, pserrset, sgamx2d
2731* ..
2732* .. External Functions ..
2733 REAL PSLAMCH
2734 EXTERNAL pslamch
2735* ..
2736* .. Intrinsic Functions ..
2737 INTRINSIC abs, max, min, mod
2738* ..
2739* .. Executable Statements ..
2740*
2741 info = 0
2742 errmax = zero
2743*
2744* Quick return if possible
2745*
2746 IF( n.LE.0 )
2747 $ RETURN
2748*
2749 ictxt = descx( ctxt_ )
2750 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
2751*
2752 eps = pslamch( ictxt, 'eps' )
2753*
2754 CALL pb_infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix,
2755 $ jjx, ixrow, ixcol )
2756*
2757 ldx = descx( m_ )
2758 ldpx = descx( lld_ )
2759 rowrep = ( ixrow.EQ.-1 )
2760 colrep = ( ixcol.EQ.-1 )
2761*
2762 IF( n.EQ.1 ) THEN
2763*
2764 IF( ( myrow.EQ.ixrow .OR. rowrep ) .AND.
2765 $ ( mycol.EQ.ixcol .OR. colrep ) )
2766 $ CALL pserrset( err, errmax, x( ix+(jx-1)*ldx ),
2767 $ px( iix+(jjx-1)*ldpx ) )
2768*
2769 ELSE IF( incx.EQ.descx( m_ ) ) THEN
2770*
2771* sub( X ) is a row vector
2772*
2773 jb = descx( inb_ ) - jx + 1
2774 IF( jb.LE.0 )
2775 $ jb = ( ( -jb ) / descx( nb_ ) + 1 ) * descx( nb_ ) + jb
2776 jb = min( jb, n )
2777 jn = jx + jb - 1
2778*
2779 IF( myrow.EQ.ixrow .OR. rowrep ) THEN
2780*
2781 icurcol = ixcol
2782 IF( mycol.EQ.icurcol .OR. colrep ) THEN
2783 DO 10 j = jx, jn
2784 CALL pserrset( err, errmax, x( ix+(j-1)*ldx ),
2785 $ px( iix+(jjx-1)*ldpx ) )
2786 jjx = jjx + 1
2787 10 CONTINUE
2788 END IF
2789 icurcol = mod( icurcol+1, npcol )
2790*
2791 DO 30 j = jn+1, jx+n-1, descx( nb_ )
2792 jb = min( jx+n-j, descx( nb_ ) )
2793*
2794 IF( mycol.EQ.icurcol .OR. colrep ) THEN
2795*
2796 DO 20 kk = 0, jb-1
2797 CALL pserrset( err, errmax, x( ix+(j+kk-1)*ldx ),
2798 $ px( iix+(jjx+kk-1)*ldpx ) )
2799 20 CONTINUE
2800*
2801 jjx = jjx + jb
2802*
2803 END IF
2804*
2805 icurcol = mod( icurcol+1, npcol )
2806*
2807 30 CONTINUE
2808*
2809 END IF
2810*
2811 ELSE
2812*
2813* sub( X ) is a column vector
2814*
2815 ib = descx( imb_ ) - ix + 1
2816 IF( ib.LE.0 )
2817 $ ib = ( ( -ib ) / descx( mb_ ) + 1 ) * descx( mb_ ) + ib
2818 ib = min( ib, n )
2819 in = ix + ib - 1
2820*
2821 IF( mycol.EQ.ixcol .OR. colrep ) THEN
2822*
2823 icurrow = ixrow
2824 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2825 DO 40 i = ix, in
2826 CALL pserrset( err, errmax, x( i+(jx-1)*ldx ),
2827 $ px( iix+(jjx-1)*ldpx ) )
2828 iix = iix + 1
2829 40 CONTINUE
2830 END IF
2831 icurrow = mod( icurrow+1, nprow )
2832*
2833 DO 60 i = in+1, ix+n-1, descx( mb_ )
2834 ib = min( ix+n-i, descx( mb_ ) )
2835*
2836 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2837*
2838 DO 50 kk = 0, ib-1
2839 CALL pserrset( err, errmax, x( i+kk+(jx-1)*ldx ),
2840 $ px( iix+kk+(jjx-1)*ldpx ) )
2841 50 CONTINUE
2842*
2843 iix = iix + ib
2844*
2845 END IF
2846*
2847 icurrow = mod( icurrow+1, nprow )
2848*
2849 60 CONTINUE
2850*
2851 END IF
2852*
2853 END IF
2854*
2855 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
2856 $ -1, -1 )
2857*
2858 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
2859 info = 1
2860 ELSE IF( errmax.GT.eps ) THEN
2861 info = -1
2862 END IF
2863*
2864 RETURN
2865*
2866* End of PSCHKVIN
2867*
2868 END
2869 SUBROUTINE pschkvout( N, X, PX, IX, JX, DESCX, INCX, INFO )
2870*
2871* -- PBLAS test routine (version 2.0) --
2872* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2873* and University of California, Berkeley.
2874* April 1, 1998
2875*
2876* .. Scalar Arguments ..
2877 INTEGER INCX, INFO, IX, JX, N
2878* ..
2879* .. Array Arguments ..
2880 INTEGER DESCX( * )
2881 REAL PX( * ), X( * )
2882* ..
2883*
2884* Purpose
2885* =======
2886*
2887* PSCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged.
2888* The local array entries are compared element by element, and their
2889* difference is tested against 0.0 as well as the epsilon machine. No-
2890* tice that this difference should be numerically exactly the zero ma-
2891* chine, but because of the possible movement of some of the data we
2892* flagged differently a difference less than twice the epsilon machine.
2893* The largest error is reported.
2894*
2895* Notes
2896* =====
2897*
2898* A description vector is associated with each 2D block-cyclicly dis-
2899* tributed matrix. This vector stores the information required to
2900* establish the mapping between a matrix entry and its corresponding
2901* process and memory location.
2902*
2903* In the following comments, the character _ should be read as
2904* "of the distributed matrix". Let A be a generic term for any 2D
2905* block cyclicly distributed matrix. Its description vector is DESCA:
2906*
2907* NOTATION STORED IN EXPLANATION
2908* ---------------- --------------- ------------------------------------
2909* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2910* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2911* the NPROW x NPCOL BLACS process grid
2912* A is distributed over. The context
2913* itself is global, but the handle
2914* (the integer value) may vary.
2915* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2916* ted matrix A, M_A >= 0.
2917* N_A (global) DESCA( N_ ) The number of columns in the distri-
2918* buted matrix A, N_A >= 0.
2919* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2920* block of the matrix A, IMB_A > 0.
2921* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2922* left block of the matrix A,
2923* INB_A > 0.
2924* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2925* bute the last M_A-IMB_A rows of A,
2926* MB_A > 0.
2927* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2928* bute the last N_A-INB_A columns of
2929* A, NB_A > 0.
2930* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2931* row of the matrix A is distributed,
2932* NPROW > RSRC_A >= 0.
2933* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2934* first column of A is distributed.
2935* NPCOL > CSRC_A >= 0.
2936* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2937* array storing the local blocks of
2938* the distributed matrix A,
2939* IF( Lc( 1, N_A ) > 0 )
2940* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2941* ELSE
2942* LLD_A >= 1.
2943*
2944* Let K be the number of rows of a matrix A starting at the global in-
2945* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2946* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2947* receive if these K rows were distributed over NPROW processes. If K
2948* is the number of columns of a matrix A starting at the global index
2949* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2950* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2951* these K columns were distributed over NPCOL processes.
2952*
2953* The values of Lr() and Lc() may be determined via a call to the func-
2954* tion PB_NUMROC:
2955* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2956* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2957*
2958* Arguments
2959* =========
2960*
2961* N (global input) INTEGER
2962* On entry, N specifies the length of the subvector operand
2963* sub( X ). N must be at least zero.
2964*
2965* X (local input) REAL array
2966* On entry, X is an array of dimension (DESCX( M_ ),*). This
2967* array contains a local copy of the initial entire matrix PX.
2968*
2969* PX (local input) REAL array
2970* On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2971* array contains the local entries of the matrix PX.
2972*
2973* IX (global input) INTEGER
2974* On entry, IX specifies X's global row index, which points to
2975* the beginning of the submatrix sub( X ).
2976*
2977* JX (global input) INTEGER
2978* On entry, JX specifies X's global column index, which points
2979* to the beginning of the submatrix sub( X ).
2980*
2981* DESCX (global and local input) INTEGER array
2982* On entry, DESCX is an integer array of dimension DLEN_. This
2983* is the array descriptor for the matrix X.
2984*
2985* INCX (global input) INTEGER
2986* On entry, INCX specifies the global increment for the
2987* elements of X. Only two values of INCX are supported in
2988* this version, namely 1 and M_X. INCX must not be zero.
2989*
2990* INFO (global output) INTEGER
2991* On exit, if INFO = 0, no error has been found,
2992* If INFO > 0, the maximum abolute error found is in (0,eps],
2993* If INFO < 0, the maximum abolute error found is in (eps,+oo).
2994*
2995* -- Written on April 1, 1998 by
2996* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2997*
2998* =====================================================================
2999*
3000* .. Parameters ..
3001 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3002 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3003 $ RSRC_
3004 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3005 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3006 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3007 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3008 REAL ZERO
3009 PARAMETER ( ZERO = 0.0e+0 )
3010* ..
3011* .. Local Scalars ..
3012 LOGICAL COLREP, ROWREP
3013 INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX,
3014 $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL,
3015 $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL,
3016 $ nprow, nqall
3017 REAL EPS, ERR, ERRMAX
3018* ..
3019* .. External Subroutines ..
3020 EXTERNAL BLACS_GRIDINFO, PSERRSET, SGAMX2D
3021* ..
3022* .. External Functions ..
3023 INTEGER PB_NUMROC
3024 REAL PSLAMCH
3025 EXTERNAL PSLAMCH, PB_NUMROC
3026* ..
3027* .. Intrinsic Functions ..
3028 INTRINSIC abs, max, min, mod
3029* ..
3030* .. Executable Statements ..
3031*
3032 info = 0
3033 errmax = zero
3034*
3035* Quick return if possible
3036*
3037 IF( ( descx( m_ ).LE.0 ).OR.( descx( n_ ).LE.0 ) )
3038 $ RETURN
3039*
3040* Start the operations
3041*
3042 ictxt = descx( ctxt_ )
3043 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3044*
3045 eps = pslamch( ictxt, 'eps' )
3046*
3047 mpall = pb_numroc( descx( m_ ), 1, descx( imb_ ), descx( mb_ ),
3048 $ myrow, descx( rsrc_ ), nprow )
3049 nqall = pb_numroc( descx( n_ ), 1, descx( inb_ ), descx( nb_ ),
3050 $ mycol, descx( csrc_ ), npcol )
3051*
3052 mbx = descx( mb_ )
3053 nbx = descx( nb_ )
3054 ldx = descx( m_ )
3055 ldpx = descx( lld_ )
3056 icurrow = descx( rsrc_ )
3057 icurcol = descx( csrc_ )
3058 rowrep = ( icurrow.EQ.-1 )
3059 colrep = ( icurcol.EQ.-1 )
3060 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3061 imbx = descx( imb_ )
3062 ELSE
3063 imbx = mbx
3064 END IF
3065 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3066 inbx = descx( inb_ )
3067 ELSE
3068 inbx = nbx
3069 END IF
3070 IF( rowrep ) THEN
3071 myrowdist = 0
3072 ELSE
3073 myrowdist = mod( myrow - icurrow + nprow, nprow )
3074 END IF
3075 IF( colrep ) THEN
3076 mycoldist = 0
3077 ELSE
3078 mycoldist = mod( mycol - icurcol + npcol, npcol )
3079 END IF
3080 ii = 1
3081 jj = 1
3082*
3083 IF( incx.EQ.descx( m_ ) ) THEN
3084*
3085* sub( X ) is a row vector
3086*
3087 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3088*
3089 i = 1
3090 IF( mycoldist.EQ.0 ) THEN
3091 j = 1
3092 ELSE
3093 j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3094 END IF
3095 jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3096 ib = min( descx( m_ ), descx( imb_ ) )
3097*
3098 DO 20 kk = 0, jb-1
3099 DO 10 ll = 0, ib-1
3100 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR. j+kk.GT.jx+n-1 )
3101 $ CALL pserrset( err, errmax,
3102 $ x( i+ll+(j+kk-1)*ldx ),
3103 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3104 10 CONTINUE
3105 20 CONTINUE
3106 IF( colrep ) THEN
3107 j = j + inbx
3108 ELSE
3109 j = j + inbx + ( npcol - 1 ) * nbx
3110 END IF
3111*
3112 DO 50 jj = inbx+1, nqall, nbx
3113 jb = min( nqall-jj+1, nbx )
3114*
3115 DO 40 kk = 0, jb-1
3116 DO 30 ll = 0, ib-1
3117 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3118 $ j+kk.GT.jx+n-1 )
3119 $ CALL pserrset( err, errmax,
3120 $ x( i+ll+(j+kk-1)*ldx ),
3121 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3122 30 CONTINUE
3123 40 CONTINUE
3124*
3125 IF( colrep ) THEN
3126 j = j + nbx
3127 ELSE
3128 j = j + npcol * nbx
3129 END IF
3130*
3131 50 CONTINUE
3132*
3133 ii = ii + ib
3134*
3135 END IF
3136*
3137 icurrow = mod( icurrow + 1, nprow )
3138*
3139 DO 110 i = descx( imb_ ) + 1, descx( m_ ), mbx
3140 ib = min( descx( m_ ) - i + 1, mbx )
3141*
3142 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3143*
3144 IF( mycoldist.EQ.0 ) THEN
3145 j = 1
3146 ELSE
3147 j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3148 END IF
3149*
3150 jj = 1
3151 jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3152 DO 70 kk = 0, jb-1
3153 DO 60 ll = 0, ib-1
3154 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3155 $ j+kk.GT.jx+n-1 )
3156 $ CALL pserrset( err, errmax,
3157 $ x( i+ll+(j+kk-1)*ldx ),
3158 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3159 60 CONTINUE
3160 70 CONTINUE
3161 IF( colrep ) THEN
3162 j = j + inbx
3163 ELSE
3164 j = j + inbx + ( npcol - 1 ) * nbx
3165 END IF
3166*
3167 DO 100 jj = inbx+1, nqall, nbx
3168 jb = min( nqall-jj+1, nbx )
3169*
3170 DO 90 kk = 0, jb-1
3171 DO 80 ll = 0, ib-1
3172 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3173 $ j+kk.GT.jx+n-1 )
3174 $ CALL pserrset( err, errmax,
3175 $ x( i+ll+(j+kk-1)*ldx ),
3176 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3177 80 CONTINUE
3178 90 CONTINUE
3179*
3180 IF( colrep ) THEN
3181 j = j + nbx
3182 ELSE
3183 j = j + npcol * nbx
3184 END IF
3185*
3186 100 CONTINUE
3187*
3188 ii = ii + ib
3189*
3190 END IF
3191*
3192 icurrow = mod( icurrow + 1, nprow )
3193*
3194 110 CONTINUE
3195*
3196 ELSE
3197*
3198* sub( X ) is a column vector
3199*
3200 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3201*
3202 j = 1
3203 IF( myrowdist.EQ.0 ) THEN
3204 i = 1
3205 ELSE
3206 i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3207 END IF
3208 ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3209 jb = min( descx( n_ ), descx( inb_ ) )
3210*
3211 DO 130 kk = 0, jb-1
3212 DO 120 ll = 0, ib-1
3213 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR. i+ll.GT.ix+n-1 )
3214 $ CALL pserrset( err, errmax,
3215 $ x( i+ll+(j+kk-1)*ldx ),
3216 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3217 120 CONTINUE
3218 130 CONTINUE
3219 IF( rowrep ) THEN
3220 i = i + imbx
3221 ELSE
3222 i = i + imbx + ( nprow - 1 ) * mbx
3223 END IF
3224*
3225 DO 160 ii = imbx+1, mpall, mbx
3226 ib = min( mpall-ii+1, mbx )
3227*
3228 DO 150 kk = 0, jb-1
3229 DO 140 ll = 0, ib-1
3230 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3231 $ i+ll.GT.ix+n-1 )
3232 $ CALL pserrset( err, errmax,
3233 $ x( i+ll+(j+kk-1)*ldx ),
3234 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3235 140 CONTINUE
3236 150 CONTINUE
3237*
3238 IF( rowrep ) THEN
3239 i = i + mbx
3240 ELSE
3241 i = i + nprow * mbx
3242 END IF
3243*
3244 160 CONTINUE
3245*
3246 jj = jj + jb
3247*
3248 END IF
3249*
3250 icurcol = mod( icurcol + 1, npcol )
3251*
3252 DO 220 j = descx( inb_ ) + 1, descx( n_ ), nbx
3253 jb = min( descx( n_ ) - j + 1, nbx )
3254*
3255 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3256*
3257 IF( myrowdist.EQ.0 ) THEN
3258 i = 1
3259 ELSE
3260 i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3261 END IF
3262*
3263 ii = 1
3264 ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3265 DO 180 kk = 0, jb-1
3266 DO 170 ll = 0, ib-1
3267 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3268 $ i+ll.GT.ix+n-1 )
3269 $ CALL pserrset( err, errmax,
3270 $ x( i+ll+(j+kk-1)*ldx ),
3271 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3272 170 CONTINUE
3273 180 CONTINUE
3274 IF( rowrep ) THEN
3275 i = i + imbx
3276 ELSE
3277 i = i + imbx + ( nprow - 1 ) * mbx
3278 END IF
3279*
3280 DO 210 ii = imbx+1, mpall, mbx
3281 ib = min( mpall-ii+1, mbx )
3282*
3283 DO 200 kk = 0, jb-1
3284 DO 190 ll = 0, ib-1
3285 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3286 $ i+ll.GT.ix+n-1 )
3287 $ CALL pserrset( err, errmax,
3288 $ x( i+ll+(j+kk-1)*ldx ),
3289 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3290 190 CONTINUE
3291 200 CONTINUE
3292*
3293 IF( rowrep ) THEN
3294 i = i + mbx
3295 ELSE
3296 i = i + nprow * mbx
3297 END IF
3298*
3299 210 CONTINUE
3300*
3301 jj = jj + jb
3302*
3303 END IF
3304*
3305 icurcol = mod( icurcol + 1, npcol )
3306*
3307 220 CONTINUE
3308*
3309 END IF
3310*
3311 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3312 $ -1, -1 )
3313*
3314 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3315 info = 1
3316 ELSE IF( errmax.GT.eps ) THEN
3317 info = -1
3318 END IF
3319*
3320 RETURN
3321*
3322* End of PSCHKVOUT
3323*
3324 END
3325 SUBROUTINE pschkmin( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO )
3326*
3327* -- PBLAS test routine (version 2.0) --
3328* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3329* and University of California, Berkeley.
3330* April 1, 1998
3331*
3332* .. Scalar Arguments ..
3333 INTEGER IA, INFO, JA, M, N
3334 REAL ERRMAX
3335* ..
3336* .. Array Arguments ..
3337 INTEGER DESCA( * )
3338 REAL PA( * ), A( * )
3339* ..
3340*
3341* Purpose
3342* =======
3343*
3344* PSCHKMIN checks that the submatrix sub( PA ) remained unchanged. The
3345* local array entries are compared element by element, and their dif-
3346* ference is tested against 0.0 as well as the epsilon machine. Notice
3347* that this difference should be numerically exactly the zero machine,
3348* but because of the possible fluctuation of some of the data we flag-
3349* ged differently a difference less than twice the epsilon machine. The
3350* largest error is also returned.
3351*
3352* Notes
3353* =====
3354*
3355* A description vector is associated with each 2D block-cyclicly dis-
3356* tributed matrix. This vector stores the information required to
3357* establish the mapping between a matrix entry and its corresponding
3358* process and memory location.
3359*
3360* In the following comments, the character _ should be read as
3361* "of the distributed matrix". Let A be a generic term for any 2D
3362* block cyclicly distributed matrix. Its description vector is DESCA:
3363*
3364* NOTATION STORED IN EXPLANATION
3365* ---------------- --------------- ------------------------------------
3366* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3367* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3368* the NPROW x NPCOL BLACS process grid
3369* A is distributed over. The context
3370* itself is global, but the handle
3371* (the integer value) may vary.
3372* M_A (global) DESCA( M_ ) The number of rows in the distribu-
3373* ted matrix A, M_A >= 0.
3374* N_A (global) DESCA( N_ ) The number of columns in the distri-
3375* buted matrix A, N_A >= 0.
3376* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3377* block of the matrix A, IMB_A > 0.
3378* INB_A (global) DESCA( INB_ ) The number of columns of the upper
3379* left block of the matrix A,
3380* INB_A > 0.
3381* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3382* bute the last M_A-IMB_A rows of A,
3383* MB_A > 0.
3384* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3385* bute the last N_A-INB_A columns of
3386* A, NB_A > 0.
3387* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3388* row of the matrix A is distributed,
3389* NPROW > RSRC_A >= 0.
3390* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3391* first column of A is distributed.
3392* NPCOL > CSRC_A >= 0.
3393* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3394* array storing the local blocks of
3395* the distributed matrix A,
3396* IF( Lc( 1, N_A ) > 0 )
3397* LLD_A >= MAX( 1, Lr( 1, M_A ) )
3398* ELSE
3399* LLD_A >= 1.
3400*
3401* Let K be the number of rows of a matrix A starting at the global in-
3402* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3403* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3404* receive if these K rows were distributed over NPROW processes. If K
3405* is the number of columns of a matrix A starting at the global index
3406* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3407* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3408* these K columns were distributed over NPCOL processes.
3409*
3410* The values of Lr() and Lc() may be determined via a call to the func-
3411* tion PB_NUMROC:
3412* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3413* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3414*
3415* Arguments
3416* =========
3417*
3418* ERRMAX (global output) REAL
3419* On exit, ERRMAX specifies the largest absolute element-wise
3420* difference between sub( A ) and sub( PA ).
3421*
3422* M (global input) INTEGER
3423* On entry, M specifies the number of rows of the submatrix
3424* operand sub( A ). M must be at least zero.
3425*
3426* N (global input) INTEGER
3427* On entry, N specifies the number of columns of the submatrix
3428* operand sub( A ). N must be at least zero.
3429*
3430* A (local input) REAL array
3431* On entry, A is an array of dimension (DESCA( M_ ),*). This
3432* array contains a local copy of the initial entire matrix PA.
3433*
3434* PA (local input) REAL array
3435* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3436* array contains the local entries of the matrix PA.
3437*
3438* IA (global input) INTEGER
3439* On entry, IA specifies A's global row index, which points to
3440* the beginning of the submatrix sub( A ).
3441*
3442* JA (global input) INTEGER
3443* On entry, JA specifies A's global column index, which points
3444* to the beginning of the submatrix sub( A ).
3445*
3446* DESCA (global and local input) INTEGER array
3447* On entry, DESCA is an integer array of dimension DLEN_. This
3448* is the array descriptor for the matrix A.
3449*
3450* INFO (global output) INTEGER
3451* On exit, if INFO = 0, no error has been found,
3452* If INFO > 0, the maximum abolute error found is in (0,eps],
3453* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3454*
3455* -- Written on April 1, 1998 by
3456* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3457*
3458* =====================================================================
3459*
3460* .. Parameters ..
3461 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3462 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3463 $ RSRC_
3464 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3465 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3466 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3467 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3468 REAL ZERO
3469 PARAMETER ( ZERO = 0.0e+0 )
3470* ..
3471* .. Local Scalars ..
3472 LOGICAL COLREP, ROWREP
3473 INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
3474 $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
3475 $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW
3476 REAL ERR, EPS
3477* ..
3478* .. External Subroutines ..
3479 EXTERNAL blacs_gridinfo, pb_infog2l, pserrset, sgamx2d
3480* ..
3481* .. External Functions ..
3482 REAL PSLAMCH
3483 EXTERNAL pslamch
3484* ..
3485* .. Intrinsic Functions ..
3486 INTRINSIC abs, max, min, mod
3487* ..
3488* .. Executable Statements ..
3489*
3490 info = 0
3491 errmax = zero
3492*
3493* Quick return if posssible
3494*
3495 IF( ( m.EQ.0 ).OR.( n.EQ.0 ) )
3496 $ RETURN
3497*
3498* Start the operations
3499*
3500 ictxt = desca( ctxt_ )
3501 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3502*
3503 eps = pslamch( ictxt, 'eps' )
3504*
3505 CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
3506 $ jja, iarow, iacol )
3507*
3508 ii = iia
3509 jj = jja
3510 lda = desca( m_ )
3511 ldpa = desca( lld_ )
3512 icurrow = iarow
3513 icurcol = iacol
3514 rowrep = ( iarow.EQ.-1 )
3515 colrep = ( iacol.EQ.-1 )
3516*
3517* Handle the first block of column separately
3518*
3519 jb = desca( inb_ ) - ja + 1
3520 IF( jb.LE.0 )
3521 $ jb = ( ( -jb ) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
3522 jb = min( jb, n )
3523 jn = ja + jb - 1
3524*
3525 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3526*
3527 DO 40 h = 0, jb-1
3528 ib = desca( imb_ ) - ia + 1
3529 IF( ib.LE.0 )
3530 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
3531 ib = min( ib, m )
3532 in = ia + ib - 1
3533 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3534 DO 10 k = 0, ib-1
3535 CALL pserrset( err, errmax, a( ia+k+(ja+h-1)*lda ),
3536 $ pa( ii+k+(jj+h-1)*ldpa ) )
3537 10 CONTINUE
3538 ii = ii + ib
3539 END IF
3540 icurrow = mod( icurrow+1, nprow )
3541*
3542* Loop over remaining block of rows
3543*
3544 DO 30 i = in+1, ia+m-1, desca( mb_ )
3545 ib = min( desca( mb_ ), ia+m-i )
3546 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3547 DO 20 k = 0, ib-1
3548 CALL pserrset( err, errmax, a( i+k+(ja+h-1)*lda ),
3549 $ pa( ii+k+(jj+h-1)*ldpa ) )
3550 20 CONTINUE
3551 ii = ii + ib
3552 END IF
3553 icurrow = mod( icurrow+1, nprow )
3554 30 CONTINUE
3555*
3556 ii = iia
3557 icurrow = iarow
3558 40 CONTINUE
3559*
3560 jj = jj + jb
3561*
3562 END IF
3563*
3564 icurcol = mod( icurcol+1, npcol )
3565*
3566* Loop over remaining column blocks
3567*
3568 DO 90 j = jn+1, ja+n-1, desca( nb_ )
3569 jb = min( desca( nb_ ), ja+n-j )
3570 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3571 DO 80 h = 0, jb-1
3572 ib = desca( imb_ ) - ia + 1
3573 IF( ib.LE.0 )
3574 $ ib = ( ( -ib ) / desca( mb_ ) + 1 )*desca( mb_ ) + ib
3575 ib = min( ib, m )
3576 in = ia + ib - 1
3577 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3578 DO 50 k = 0, ib-1
3579 CALL pserrset( err, errmax, a( ia+k+(j+h-1)*lda ),
3580 $ pa( ii+k+(jj+h-1)*ldpa ) )
3581 50 CONTINUE
3582 ii = ii + ib
3583 END IF
3584 icurrow = mod( icurrow+1, nprow )
3585*
3586* Loop over remaining block of rows
3587*
3588 DO 70 i = in+1, ia+m-1, desca( mb_ )
3589 ib = min( desca( mb_ ), ia+m-i )
3590 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3591 DO 60 k = 0, ib-1
3592 CALL pserrset( err, errmax,
3593 $ a( i+k+(j+h-1)*lda ),
3594 $ pa( ii+k+(jj+h-1)*ldpa ) )
3595 60 CONTINUE
3596 ii = ii + ib
3597 END IF
3598 icurrow = mod( icurrow+1, nprow )
3599 70 CONTINUE
3600*
3601 ii = iia
3602 icurrow = iarow
3603 80 CONTINUE
3604*
3605 jj = jj + jb
3606 END IF
3607*
3608 icurcol = mod( icurcol+1, npcol )
3609*
3610 90 CONTINUE
3611*
3612 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3613 $ -1, -1 )
3614*
3615 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3616 info = 1
3617 ELSE IF( errmax.GT.eps ) THEN
3618 info = -1
3619 END IF
3620*
3621 RETURN
3622*
3623* End of PSCHKMIN
3624*
3625 END
3626 SUBROUTINE pschkmout( M, N, A, PA, IA, JA, DESCA, INFO )
3627*
3628* -- PBLAS test routine (version 2.0) --
3629* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3630* and University of California, Berkeley.
3631* April 1, 1998
3632*
3633* .. Scalar Arguments ..
3634 INTEGER IA, INFO, JA, M, N
3635* ..
3636* .. Array Arguments ..
3637 INTEGER DESCA( * )
3638 REAL A( * ), PA( * )
3639* ..
3640*
3641* Purpose
3642* =======
3643*
3644* PSCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged.
3645* The local array entries are compared element by element, and their
3646* difference is tested against 0.0 as well as the epsilon machine. No-
3647* tice that this difference should be numerically exactly the zero ma-
3648* chine, but because of the possible movement of some of the data we
3649* flagged differently a difference less than twice the epsilon machine.
3650* The largest error is reported.
3651*
3652* Notes
3653* =====
3654*
3655* A description vector is associated with each 2D block-cyclicly dis-
3656* tributed matrix. This vector stores the information required to
3657* establish the mapping between a matrix entry and its corresponding
3658* process and memory location.
3659*
3660* In the following comments, the character _ should be read as
3661* "of the distributed matrix". Let A be a generic term for any 2D
3662* block cyclicly distributed matrix. Its description vector is DESCA:
3663*
3664* NOTATION STORED IN EXPLANATION
3665* ---------------- --------------- ------------------------------------
3666* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3667* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3668* the NPROW x NPCOL BLACS process grid
3669* A is distributed over. The context
3670* itself is global, but the handle
3671* (the integer value) may vary.
3672* M_A (global) DESCA( M_ ) The number of rows in the distribu-
3673* ted matrix A, M_A >= 0.
3674* N_A (global) DESCA( N_ ) The number of columns in the distri-
3675* buted matrix A, N_A >= 0.
3676* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3677* block of the matrix A, IMB_A > 0.
3678* INB_A (global) DESCA( INB_ ) The number of columns of the upper
3679* left block of the matrix A,
3680* INB_A > 0.
3681* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3682* bute the last M_A-IMB_A rows of A,
3683* MB_A > 0.
3684* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3685* bute the last N_A-INB_A columns of
3686* A, NB_A > 0.
3687* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3688* row of the matrix A is distributed,
3689* NPROW > RSRC_A >= 0.
3690* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3691* first column of A is distributed.
3692* NPCOL > CSRC_A >= 0.
3693* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3694* array storing the local blocks of
3695* the distributed matrix A,
3696* IF( Lc( 1, N_A ) > 0 )
3697* LLD_A >= MAX( 1, Lr( 1, M_A ) )
3698* ELSE
3699* LLD_A >= 1.
3700*
3701* Let K be the number of rows of a matrix A starting at the global in-
3702* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3703* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3704* receive if these K rows were distributed over NPROW processes. If K
3705* is the number of columns of a matrix A starting at the global index
3706* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3707* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3708* these K columns were distributed over NPCOL processes.
3709*
3710* The values of Lr() and Lc() may be determined via a call to the func-
3711* tion PB_NUMROC:
3712* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3713* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3714*
3715* Arguments
3716* =========
3717*
3718* M (global input) INTEGER
3719* On entry, M specifies the number of rows of the submatrix
3720* sub( PA ). M must be at least zero.
3721*
3722* N (global input) INTEGER
3723* On entry, N specifies the number of columns of the submatrix
3724* sub( PA ). N must be at least zero.
3725*
3726* A (local input) REAL array
3727* On entry, A is an array of dimension (DESCA( M_ ),*). This
3728* array contains a local copy of the initial entire matrix PA.
3729*
3730* PA (local input) REAL array
3731* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3732* array contains the local entries of the matrix PA.
3733*
3734* IA (global input) INTEGER
3735* On entry, IA specifies A's global row index, which points to
3736* the beginning of the submatrix sub( A ).
3737*
3738* JA (global input) INTEGER
3739* On entry, JA specifies A's global column index, which points
3740* to the beginning of the submatrix sub( A ).
3741*
3742* DESCA (global and local input) INTEGER array
3743* On entry, DESCA is an integer array of dimension DLEN_. This
3744* is the array descriptor for the matrix A.
3745*
3746* INFO (global output) INTEGER
3747* On exit, if INFO = 0, no error has been found,
3748* If INFO > 0, the maximum abolute error found is in (0,eps],
3749* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3750*
3751* -- Written on April 1, 1998 by
3752* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3753*
3754* =====================================================================
3755*
3756* .. Parameters ..
3757 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3758 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3759 $ RSRC_
3760 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3761 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3762 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3763 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3764 REAL ZERO
3765 PARAMETER ( ZERO = 0.0e+0 )
3766* ..
3767* .. Local Scalars ..
3768 LOGICAL COLREP, ROWREP
3769 INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK,
3770 $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST,
3771 $ NPCOL, NPROW
3772 REAL EPS, ERR, ERRMAX
3773* ..
3774* .. External Subroutines ..
3775 EXTERNAL blacs_gridinfo, pserrset, sgamx2d
3776* ..
3777* .. External Functions ..
3778 INTEGER PB_NUMROC
3779 REAL PSLAMCH
3780 EXTERNAL PSLAMCH, PB_NUMROC
3781* ..
3782* .. Intrinsic Functions ..
3783 INTRINSIC max, min, mod
3784* ..
3785* .. Executable Statements ..
3786*
3787 info = 0
3788 errmax = zero
3789*
3790* Quick return if possible
3791*
3792 IF( ( desca( m_ ).LE.0 ).OR.( desca( n_ ).LE.0 ) )
3793 $ RETURN
3794*
3795* Start the operations
3796*
3797 ictxt = desca( ctxt_ )
3798 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3799*
3800 eps = pslamch( ictxt, 'eps' )
3801*
3802 mpall = pb_numroc( desca( m_ ), 1, desca( imb_ ), desca( mb_ ),
3803 $ myrow, desca( rsrc_ ), nprow )
3804*
3805 lda = desca( m_ )
3806 ldpa = desca( lld_ )
3807*
3808 ii = 1
3809 jj = 1
3810 rowrep = ( desca( rsrc_ ).EQ.-1 )
3811 colrep = ( desca( csrc_ ).EQ.-1 )
3812 icurcol = desca( csrc_ )
3813 IF( myrow.EQ.desca( rsrc_ ) .OR. rowrep ) THEN
3814 imba = desca( imb_ )
3815 ELSE
3816 imba = desca( mb_ )
3817 END IF
3818 IF( rowrep ) THEN
3819 myrowdist = 0
3820 ELSE
3821 myrowdist = mod( myrow - desca( rsrc_ ) + nprow, nprow )
3822 END IF
3823*
3824 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3825*
3826 j = 1
3827 IF( myrowdist.EQ.0 ) THEN
3828 i = 1
3829 ELSE
3830 i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3831 END IF
3832 ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3833 jb = min( desca( n_ ), desca( inb_ ) )
3834*
3835 DO 20 kk = 0, jb-1
3836 DO 10 ll = 0, ib-1
3837 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3838 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3839 $ CALL pserrset( err, errmax, a( i+ll+(j+kk-1)*lda ),
3840 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3841 10 CONTINUE
3842 20 CONTINUE
3843 IF( rowrep ) THEN
3844 i = i + imba
3845 ELSE
3846 i = i + imba + ( nprow - 1 ) * desca( mb_ )
3847 END IF
3848*
3849 DO 50 ii = imba + 1, mpall, desca( mb_ )
3850 ib = min( mpall-ii+1, desca( mb_ ) )
3851*
3852 DO 40 kk = 0, jb-1
3853 DO 30 ll = 0, ib-1
3854 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3855 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3856 $ CALL pserrset( err, errmax,
3857 $ a( i+ll+(j+kk-1)*lda ),
3858 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3859 30 CONTINUE
3860 40 CONTINUE
3861*
3862 IF( rowrep ) THEN
3863 i = i + desca( mb_ )
3864 ELSE
3865 i = i + nprow * desca( mb_ )
3866 END IF
3867*
3868 50 CONTINUE
3869*
3870 jj = jj + jb
3871*
3872 END IF
3873*
3874 icurcol = mod( icurcol + 1, npcol )
3875*
3876 DO 110 j = desca( inb_ ) + 1, desca( n_ ), desca( nb_ )
3877 jb = min( desca( n_ ) - j + 1, desca( nb_ ) )
3878*
3879 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3880*
3881 IF( myrowdist.EQ.0 ) THEN
3882 i = 1
3883 ELSE
3884 i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3885 END IF
3886*
3887 ii = 1
3888 ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3889 DO 70 kk = 0, jb-1
3890 DO 60 ll = 0, ib-1
3891 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3892 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3893 $ CALL pserrset( err, errmax,
3894 $ a( i+ll+(j+kk-1)*lda ),
3895 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3896 60 CONTINUE
3897 70 CONTINUE
3898 IF( rowrep ) THEN
3899 i = i + imba
3900 ELSE
3901 i = i + imba + ( nprow - 1 ) * desca( mb_ )
3902 END IF
3903*
3904 DO 100 ii = imba+1, mpall, desca( mb_ )
3905 ib = min( mpall-ii+1, desca( mb_ ) )
3906*
3907 DO 90 kk = 0, jb-1
3908 DO 80 ll = 0, ib-1
3909 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3910 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3911 $ CALL pserrset( err, errmax,
3912 $ a( i+ll+(j+kk-1)*lda ),
3913 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3914 80 CONTINUE
3915 90 CONTINUE
3916*
3917 IF( rowrep ) THEN
3918 i = i + desca( mb_ )
3919 ELSE
3920 i = i + nprow * desca( mb_ )
3921 END IF
3922*
3923 100 CONTINUE
3924*
3925 jj = jj + jb
3926*
3927 END IF
3928*
3929 icurcol = mod( icurcol + 1, npcol )
3930* INSERT MODE
3931 110 CONTINUE
3932*
3933 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3934 $ -1, -1 )
3935*
3936 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3937 info = 1
3938 ELSE IF( errmax.GT.eps ) THEN
3939 info = -1
3940 END IF
3941*
3942 RETURN
3943*
3944* End of PSCHKMOUT
3945*
3946 END
3947 SUBROUTINE psmprnt( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT,
3948 $ CMATNM )
3949*
3950* -- PBLAS test routine (version 2.0) --
3951* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3952* and University of California, Berkeley.
3953* April 1, 1998
3954*
3955* .. Scalar Arguments ..
3956 INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT
3957* ..
3958* .. Array Arguments ..
3959 CHARACTER*(*) CMATNM
3960 REAL A( LDA, * )
3961* ..
3962*
3963* Purpose
3964* =======
3965*
3966* PSMPRNT prints to the standard output an array A of size m by n. Only
3967* the process of coordinates ( IRPRNT, ICPRNT ) is printing.
3968*
3969* Arguments
3970* =========
3971*
3972* ICTXT (local input) INTEGER
3973* On entry, ICTXT specifies the BLACS context handle, indica-
3974* ting the global context of the operation. The context itself
3975* is global, but the value of ICTXT is local.
3976*
3977* NOUT (global input) INTEGER
3978* On entry, NOUT specifies the unit number for the output file.
3979* When NOUT is 6, output to screen, when NOUT is 0, output to
3980* stderr. NOUT is only defined for process 0.
3981*
3982* M (global input) INTEGER
3983* On entry, M specifies the number of rows of the matrix A. M
3984* must be at least zero.
3985*
3986* N (global input) INTEGER
3987* On entry, N specifies the number of columns of the matrix A.
3988* N must be at least zero.
3989*
3990* A (local input) REAL array
3991* On entry, A is an array of dimension (LDA,N). The leading m
3992* by n part of this array is printed.
3993*
3994* LDA (local input) INTEGER
3995* On entry, LDA specifies the leading dimension of the local
3996* array A to be printed. LDA must be at least MAX( 1, M ).
3997*
3998* IRPRNT (global input) INTEGER
3999* On entry, IRPRNT specifies the process row coordinate of the
4000* printing process.
4001*
4002* ICPRNT (global input) INTEGER
4003* On entry, ICPRNT specifies the process column coordinate of
4004* the printing process.
4005*
4006* CMATNM (global input) CHARACTER*(*)
4007* On entry, CMATNM specifies the identifier of the matrix to be
4008* printed.
4009*
4010* -- Written on April 1, 1998 by
4011* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4012*
4013* =====================================================================
4014*
4015* .. Local Scalars ..
4016 INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW
4017* ..
4018* .. External Subroutines ..
4019 EXTERNAL BLACS_GRIDINFO
4020* ..
4021* .. Executable Statements ..
4022*
4023* Quick return if possible
4024*
4025 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
4026 $ RETURN
4027*
4028* Get grid parameters
4029*
4030 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4031*
4032 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4033*
4034 WRITE( nout, fmt = * )
4035 DO 20 j = 1, n
4036*
4037 DO 10 i = 1, m
4038*
4039 WRITE( nout, fmt = 9999 ) cmatnm, i, j, a( i, j )
4040*
4041 10 CONTINUE
4042*
4043 20 CONTINUE
4044*
4045 END IF
4046*
4047 9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8 )
4048*
4049 RETURN
4050*
4051* End of PSMPRNT
4052*
4053 END
4054 SUBROUTINE psvprnt( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT,
4055 $ CVECNM )
4056*
4057* -- PBLAS test routine (version 2.0) --
4058* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4059* and University of California, Berkeley.
4060* April 1, 1998
4061*
4062* .. Scalar Arguments ..
4063 INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT
4064* ..
4065* .. Array Arguments ..
4066 CHARACTER*(*) CVECNM
4067 REAL X( * )
4068* ..
4069*
4070* Purpose
4071* =======
4072*
4073* PSVPRNT prints to the standard output an vector x of length n. Only
4074* the process of coordinates ( IRPRNT, ICPRNT ) is printing.
4075*
4076* Arguments
4077* =========
4078*
4079* ICTXT (local input) INTEGER
4080* On entry, ICTXT specifies the BLACS context handle, indica-
4081* ting the global context of the operation. The context itself
4082* is global, but the value of ICTXT is local.
4083*
4084* NOUT (global input) INTEGER
4085* On entry, NOUT specifies the unit number for the output file.
4086* When NOUT is 6, output to screen, when NOUT is 0, output to
4087* stderr. NOUT is only defined for process 0.
4088*
4089* N (global input) INTEGER
4090* On entry, N specifies the length of the vector X. N must be
4091* at least zero.
4092*
4093* X (global input) REAL array
4094* On entry, X is an array of dimension at least
4095* ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen-
4096* ted array X must contain the vector x.
4097*
4098* INCX (global input) INTEGER.
4099* On entry, INCX specifies the increment for the elements of X.
4100* INCX must not be zero.
4101*
4102* IRPRNT (global input) INTEGER
4103* On entry, IRPRNT specifies the process row coordinate of the
4104* printing process.
4105*
4106* ICPRNT (global input) INTEGER
4107* On entry, ICPRNT specifies the process column coordinate of
4108* the printing process.
4109*
4110* CVECNM (global input) CHARACTER*(*)
4111* On entry, CVECNM specifies the identifier of the vector to be
4112* printed.
4113*
4114* -- Written on April 1, 1998 by
4115* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4116*
4117* =====================================================================
4118*
4119* .. Local Scalars ..
4120 INTEGER I, MYCOL, MYROW, NPCOL, NPROW
4121* ..
4122* .. External Subroutines ..
4123 EXTERNAL BLACS_GRIDINFO
4124* ..
4125* .. Executable Statements ..
4126*
4127* Quick return if possible
4128*
4129 IF( n.LE.0 )
4130 $ RETURN
4131*
4132* Get grid parameters
4133*
4134 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4135*
4136 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4137*
4138 WRITE( nout, fmt = * )
4139 DO 10 i = 1, 1 + ( n-1 )*incx, incx
4140*
4141 WRITE( nout, fmt = 9999 ) cvecnm, i, x( i )
4142*
4143 10 CONTINUE
4144*
4145 END IF
4146*
4147 9999 FORMAT( 1x, a, '(', i6, ')=', e16.8 )
4148*
4149 RETURN
4150*
4151* End of PSVPRNT
4152*
4153 END
4154 SUBROUTINE psmvch( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
4155 $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY,
4156 $ DESCY, INCY, G, ERR, INFO )
4157*
4158* -- PBLAS test routine (version 2.0) --
4159* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4160* and University of California, Berkeley.
4161* April 1, 1998
4162*
4163* .. Scalar Arguments ..
4164 CHARACTER*1 TRANS
4165 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4166 $ JY, M, N
4167 REAL ALPHA, BETA, ERR
4168* ..
4169* .. Array Arguments ..
4170 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4171 REAL A( * ), G( * ), PY( * ), X( * ), Y( * )
4172* ..
4173*
4174* Purpose
4175* =======
4176*
4177* PSMVCH checks the results of the computational tests.
4178*
4179* Notes
4180* =====
4181*
4182* A description vector is associated with each 2D block-cyclicly dis-
4183* tributed matrix. This vector stores the information required to
4184* establish the mapping between a matrix entry and its corresponding
4185* process and memory location.
4186*
4187* In the following comments, the character _ should be read as
4188* "of the distributed matrix". Let A be a generic term for any 2D
4189* block cyclicly distributed matrix. Its description vector is DESCA:
4190*
4191* NOTATION STORED IN EXPLANATION
4192* ---------------- --------------- ------------------------------------
4193* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4194* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4195* the NPROW x NPCOL BLACS process grid
4196* A is distributed over. The context
4197* itself is global, but the handle
4198* (the integer value) may vary.
4199* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4200* ted matrix A, M_A >= 0.
4201* N_A (global) DESCA( N_ ) The number of columns in the distri-
4202* buted matrix A, N_A >= 0.
4203* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4204* block of the matrix A, IMB_A > 0.
4205* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4206* left block of the matrix A,
4207* INB_A > 0.
4208* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4209* bute the last M_A-IMB_A rows of A,
4210* MB_A > 0.
4211* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4212* bute the last N_A-INB_A columns of
4213* A, NB_A > 0.
4214* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4215* row of the matrix A is distributed,
4216* NPROW > RSRC_A >= 0.
4217* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4218* first column of A is distributed.
4219* NPCOL > CSRC_A >= 0.
4220* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4221* array storing the local blocks of
4222* the distributed matrix A,
4223* IF( Lc( 1, N_A ) > 0 )
4224* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4225* ELSE
4226* LLD_A >= 1.
4227*
4228* Let K be the number of rows of a matrix A starting at the global in-
4229* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4230* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4231* receive if these K rows were distributed over NPROW processes. If K
4232* is the number of columns of a matrix A starting at the global index
4233* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4234* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4235* these K columns were distributed over NPCOL processes.
4236*
4237* The values of Lr() and Lc() may be determined via a call to the func-
4238* tion PB_NUMROC:
4239* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4240* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4241*
4242* Arguments
4243* =========
4244*
4245* ICTXT (local input) INTEGER
4246* On entry, ICTXT specifies the BLACS context handle, indica-
4247* ting the global context of the operation. The context itself
4248* is global, but the value of ICTXT is local.
4249*
4250* TRANS (global input) CHARACTER*1
4251* On entry, TRANS specifies which matrix-vector product is to
4252* be computed as follows:
4253* If TRANS = 'N',
4254* sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ),
4255* otherwise
4256* sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ).
4257*
4258* M (global input) INTEGER
4259* On entry, M specifies the number of rows of the submatrix
4260* operand matrix A. M must be at least zero.
4261*
4262* N (global input) INTEGER
4263* On entry, N specifies the number of columns of the subma-
4264* trix operand matrix A. N must be at least zero.
4265*
4266* ALPHA (global input) REAL
4267* On entry, ALPHA specifies the scalar alpha.
4268*
4269* A (local input) REAL array
4270* On entry, A is an array of dimension (DESCA( M_ ),*). This
4271* array contains a local copy of the initial entire matrix PA.
4272*
4273* IA (global input) INTEGER
4274* On entry, IA specifies A's global row index, which points to
4275* the beginning of the submatrix sub( A ).
4276*
4277* JA (global input) INTEGER
4278* On entry, JA specifies A's global column index, which points
4279* to the beginning of the submatrix sub( A ).
4280*
4281* DESCA (global and local input) INTEGER array
4282* On entry, DESCA is an integer array of dimension DLEN_. This
4283* is the array descriptor for the matrix A.
4284*
4285* X (local input) REAL array
4286* On entry, X is an array of dimension (DESCX( M_ ),*). This
4287* array contains a local copy of the initial entire matrix PX.
4288*
4289* IX (global input) INTEGER
4290* On entry, IX specifies X's global row index, which points to
4291* the beginning of the submatrix sub( X ).
4292*
4293* JX (global input) INTEGER
4294* On entry, JX specifies X's global column index, which points
4295* to the beginning of the submatrix sub( X ).
4296*
4297* DESCX (global and local input) INTEGER array
4298* On entry, DESCX is an integer array of dimension DLEN_. This
4299* is the array descriptor for the matrix X.
4300*
4301* INCX (global input) INTEGER
4302* On entry, INCX specifies the global increment for the
4303* elements of X. Only two values of INCX are supported in
4304* this version, namely 1 and M_X. INCX must not be zero.
4305*
4306* BETA (global input) REAL
4307* On entry, BETA specifies the scalar beta.
4308*
4309* Y (local input/local output) REAL array
4310* On entry, Y is an array of dimension (DESCY( M_ ),*). This
4311* array contains a local copy of the initial entire matrix PY.
4312*
4313* PY (local input) REAL array
4314* On entry, PY is an array of dimension (DESCY( LLD_ ),*). This
4315* array contains the local entries of the matrix PY.
4316*
4317* IY (global input) INTEGER
4318* On entry, IY specifies Y's global row index, which points to
4319* the beginning of the submatrix sub( Y ).
4320*
4321* JY (global input) INTEGER
4322* On entry, JY specifies Y's global column index, which points
4323* to the beginning of the submatrix sub( Y ).
4324*
4325* DESCY (global and local input) INTEGER array
4326* On entry, DESCY is an integer array of dimension DLEN_. This
4327* is the array descriptor for the matrix Y.
4328*
4329* INCY (global input) INTEGER
4330* On entry, INCY specifies the global increment for the
4331* elements of Y. Only two values of INCY are supported in
4332* this version, namely 1 and M_Y. INCY must not be zero.
4333*
4334* G (workspace) REAL array
4335* On entry, G is an array of dimension at least MAX( M, N ). G
4336* is used to compute the gauges.
4337*
4338* ERR (global output) REAL
4339* On exit, ERR specifies the largest error in absolute value.
4340*
4341* INFO (global output) INTEGER
4342* On exit, if INFO <> 0, the result is less than half accurate.
4343*
4344* -- Written on April 1, 1998 by
4345* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4346*
4347* =====================================================================
4348*
4349* .. Parameters ..
4350 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4351 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4352 $ RSRC_
4353 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4354 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4355 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4356 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4357 REAL ZERO, ONE
4358 parameter( zero = 0.0e+0, one = 1.0e+0 )
4359* ..
4360* .. Local Scalars ..
4361 LOGICAL COLREP, ROWREP, TRAN
4362 INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX,
4363 $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA,
4364 $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL,
4365 $ nprow
4366 REAL EPS, ERRI, GTMP, TBETA, YTMP
4367* ..
4368* .. External Subroutines ..
4369 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4370* ..
4371* .. External Functions ..
4372 LOGICAL LSAME
4373 REAL PSLAMCH
4374 EXTERNAL lsame, pslamch
4375* ..
4376* .. Intrinsic Functions ..
4377 INTRINSIC abs, max, min, mod, sqrt
4378* ..
4379* .. Executable Statements ..
4380*
4381 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4382*
4383 eps = pslamch( ictxt, 'eps' )
4384*
4385 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
4386 tbeta = one
4387 ELSE
4388 tbeta = beta
4389 END IF
4390*
4391 tran = lsame( trans, 'T' ).OR.lsame( trans, 'C' )
4392 IF( tran ) THEN
4393 ml = n
4394 nl = m
4395 ELSE
4396 ml = m
4397 nl = n
4398 END IF
4399*
4400 lda = max( 1, desca( m_ ) )
4401 ldx = max( 1, descx( m_ ) )
4402 ldy = max( 1, descy( m_ ) )
4403*
4404* Compute expected result in Y using data in A, X and Y.
4405* Compute gauges in G. This part of the computation is performed
4406* by every process in the grid.
4407*
4408 ioffy = iy + ( jy - 1 ) * ldy
4409 DO 30 i = 1, ml
4410 ytmp = zero
4411 gtmp = zero
4412 ioffx = ix + ( jx - 1 ) * ldx
4413 IF( tran )THEN
4414 ioffa = ia + ( ja + i - 2 ) * lda
4415 DO 10 j = 1, nl
4416 ytmp = ytmp + a( ioffa ) * x( ioffx )
4417 gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4418 ioffa = ioffa + 1
4419 ioffx = ioffx + incx
4420 10 CONTINUE
4421 ELSE
4422 ioffa = ia + i - 1 + ( ja - 1 ) * lda
4423 DO 20 j = 1, nl
4424 ytmp = ytmp + a( ioffa ) * x( ioffx )
4425 gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4426 ioffa = ioffa + lda
4427 ioffx = ioffx + incx
4428 20 CONTINUE
4429 END IF
4430 g( i ) = abs( alpha ) * gtmp + abs( tbeta * y( ioffy ) )
4431 y( ioffy ) = alpha * ytmp + tbeta * y( ioffy )
4432 ioffy = ioffy + incy
4433 30 CONTINUE
4434*
4435* Compute the error ratio for this result.
4436*
4437 err = zero
4438 info = 0
4439 ldpy = descy( lld_ )
4440 ioffy = iy + ( jy - 1 ) * ldy
4441 CALL pb_infog2l( iy, jy, descy, nprow, npcol, myrow, mycol, iiy,
4442 $ jjy, iyrow, iycol )
4443 icurrow = iyrow
4444 icurcol = iycol
4445 rowrep = ( iyrow.EQ.-1 )
4446 colrep = ( iycol.EQ.-1 )
4447*
4448 IF( incy.EQ.descy( m_ ) ) THEN
4449*
4450* sub( Y ) is a row vector
4451*
4452 jb = descy( inb_ ) - jy + 1
4453 IF( jb.LE.0 )
4454 $ jb = ( ( -jb ) / descy( nb_ ) + 1 ) * descy( nb_ ) + jb
4455 jb = min( jb, ml )
4456 jn = jy + jb - 1
4457*
4458 DO 50 j = jy, jn
4459*
4460 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4461 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4462 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4463 IF( g( j-jy+1 ).NE.zero )
4464 $ erri = erri / g( j-jy+1 )
4465 err = max( err, erri )
4466 IF( err*sqrt( eps ).GE.one )
4467 $ info = 1
4468 jjy = jjy + 1
4469 END IF
4470*
4471 ioffy = ioffy + incy
4472*
4473 50 CONTINUE
4474*
4475 icurcol = mod( icurcol+1, npcol )
4476*
4477 DO 70 j = jn+1, jy+ml-1, descy( nb_ )
4478 jb = min( jy+ml-j, descy( nb_ ) )
4479*
4480 DO 60 kk = 0, jb-1
4481*
4482 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4483 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4484 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4485 IF( g( j+kk-jy+1 ).NE.zero )
4486 $ erri = erri / g( j+kk-jy+1 )
4487 err = max( err, erri )
4488 IF( err*sqrt( eps ).GE.one )
4489 $ info = 1
4490 jjy = jjy + 1
4491 END IF
4492*
4493 ioffy = ioffy + incy
4494*
4495 60 CONTINUE
4496*
4497 icurcol = mod( icurcol+1, npcol )
4498*
4499 70 CONTINUE
4500*
4501 ELSE
4502*
4503* sub( Y ) is a column vector
4504*
4505 ib = descy( imb_ ) - iy + 1
4506 IF( ib.LE.0 )
4507 $ ib = ( ( -ib ) / descy( mb_ ) + 1 ) * descy( mb_ ) + ib
4508 ib = min( ib, ml )
4509 in = iy + ib - 1
4510*
4511 DO 80 i = iy, in
4512*
4513 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4514 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4515 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4516 IF( g( i-iy+1 ).NE.zero )
4517 $ erri = erri / g( i-iy+1 )
4518 err = max( err, erri )
4519 IF( err*sqrt( eps ).GE.one )
4520 $ info = 1
4521 iiy = iiy + 1
4522 END IF
4523*
4524 ioffy = ioffy + incy
4525*
4526 80 CONTINUE
4527*
4528 icurrow = mod( icurrow+1, nprow )
4529*
4530 DO 100 i = in+1, iy+ml-1, descy( mb_ )
4531 ib = min( iy+ml-i, descy( mb_ ) )
4532*
4533 DO 90 kk = 0, ib-1
4534*
4535 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4536 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4537 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4538 IF( g( i+kk-iy+1 ).NE.zero )
4539 $ erri = erri / g( i+kk-iy+1 )
4540 err = max( err, erri )
4541 IF( err*sqrt( eps ).GE.one )
4542 $ info = 1
4543 iiy = iiy + 1
4544 END IF
4545*
4546 ioffy = ioffy + incy
4547*
4548 90 CONTINUE
4549*
4550 icurrow = mod( icurrow+1, nprow )
4551*
4552 100 CONTINUE
4553*
4554 END IF
4555*
4556* If INFO = 0, all results are at least half accurate.
4557*
4558 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4559 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4560 $ mycol )
4561*
4562 RETURN
4563*
4564* End of PSMVCH
4565*
4566 END
4567 SUBROUTINE psvmch( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4568 $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA,
4569 $ DESCA, G, ERR, INFO )
4570*
4571* -- PBLAS test routine (version 2.0) --
4572* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4573* and University of California, Berkeley.
4574* April 1, 1998
4575*
4576* .. Scalar Arguments ..
4577 CHARACTER*1 UPLO
4578 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4579 $ JY, M, N
4580 REAL ALPHA, ERR
4581* ..
4582* .. Array Arguments ..
4583 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4584 REAL A( * ), G( * ), PA( * ), X( * ), Y( * )
4585* ..
4586*
4587* Purpose
4588* =======
4589*
4590* PSVMCH checks the results of the computational tests.
4591*
4592* Notes
4593* =====
4594*
4595* A description vector is associated with each 2D block-cyclicly dis-
4596* tributed matrix. This vector stores the information required to
4597* establish the mapping between a matrix entry and its corresponding
4598* process and memory location.
4599*
4600* In the following comments, the character _ should be read as
4601* "of the distributed matrix". Let A be a generic term for any 2D
4602* block cyclicly distributed matrix. Its description vector is DESCA:
4603*
4604* NOTATION STORED IN EXPLANATION
4605* ---------------- --------------- ------------------------------------
4606* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4607* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4608* the NPROW x NPCOL BLACS process grid
4609* A is distributed over. The context
4610* itself is global, but the handle
4611* (the integer value) may vary.
4612* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4613* ted matrix A, M_A >= 0.
4614* N_A (global) DESCA( N_ ) The number of columns in the distri-
4615* buted matrix A, N_A >= 0.
4616* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4617* block of the matrix A, IMB_A > 0.
4618* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4619* left block of the matrix A,
4620* INB_A > 0.
4621* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4622* bute the last M_A-IMB_A rows of A,
4623* MB_A > 0.
4624* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4625* bute the last N_A-INB_A columns of
4626* A, NB_A > 0.
4627* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4628* row of the matrix A is distributed,
4629* NPROW > RSRC_A >= 0.
4630* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4631* first column of A is distributed.
4632* NPCOL > CSRC_A >= 0.
4633* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4634* array storing the local blocks of
4635* the distributed matrix A,
4636* IF( Lc( 1, N_A ) > 0 )
4637* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4638* ELSE
4639* LLD_A >= 1.
4640*
4641* Let K be the number of rows of a matrix A starting at the global in-
4642* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4643* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4644* receive if these K rows were distributed over NPROW processes. If K
4645* is the number of columns of a matrix A starting at the global index
4646* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4647* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4648* these K columns were distributed over NPCOL processes.
4649*
4650* The values of Lr() and Lc() may be determined via a call to the func-
4651* tion PB_NUMROC:
4652* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4653* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4654*
4655* Arguments
4656* =========
4657*
4658* ICTXT (local input) INTEGER
4659* On entry, ICTXT specifies the BLACS context handle, indica-
4660* ting the global context of the operation. The context itself
4661* is global, but the value of ICTXT is local.
4662*
4663* UPLO (global input) CHARACTER*1
4664* On entry, UPLO specifies which part of the submatrix sub( A )
4665* is to be referenced as follows:
4666* If UPLO = 'L', only the lower triangular part,
4667* If UPLO = 'U', only the upper triangular part,
4668* else the entire matrix is to be referenced.
4669*
4670* M (global input) INTEGER
4671* On entry, M specifies the number of rows of the submatrix
4672* operand matrix A. M must be at least zero.
4673*
4674* N (global input) INTEGER
4675* On entry, N specifies the number of columns of the subma-
4676* trix operand matrix A. N must be at least zero.
4677*
4678* ALPHA (global input) REAL
4679* On entry, ALPHA specifies the scalar alpha.
4680*
4681* X (local input) REAL array
4682* On entry, X is an array of dimension (DESCX( M_ ),*). This
4683* array contains a local copy of the initial entire matrix PX.
4684*
4685* IX (global input) INTEGER
4686* On entry, IX specifies X's global row index, which points to
4687* the beginning of the submatrix sub( X ).
4688*
4689* JX (global input) INTEGER
4690* On entry, JX specifies X's global column index, which points
4691* to the beginning of the submatrix sub( X ).
4692*
4693* DESCX (global and local input) INTEGER array
4694* On entry, DESCX is an integer array of dimension DLEN_. This
4695* is the array descriptor for the matrix X.
4696*
4697* INCX (global input) INTEGER
4698* On entry, INCX specifies the global increment for the
4699* elements of X. Only two values of INCX are supported in
4700* this version, namely 1 and M_X. INCX must not be zero.
4701*
4702* Y (local input) REAL array
4703* On entry, Y is an array of dimension (DESCY( M_ ),*). This
4704* array contains a local copy of the initial entire matrix PY.
4705*
4706* IY (global input) INTEGER
4707* On entry, IY specifies Y's global row index, which points to
4708* the beginning of the submatrix sub( Y ).
4709*
4710* JY (global input) INTEGER
4711* On entry, JY specifies Y's global column index, which points
4712* to the beginning of the submatrix sub( Y ).
4713*
4714* DESCY (global and local input) INTEGER array
4715* On entry, DESCY is an integer array of dimension DLEN_. This
4716* is the array descriptor for the matrix Y.
4717*
4718* INCY (global input) INTEGER
4719* On entry, INCY specifies the global increment for the
4720* elements of Y. Only two values of INCY are supported in
4721* this version, namely 1 and M_Y. INCY must not be zero.
4722*
4723* A (local input/local output) REAL array
4724* On entry, A is an array of dimension (DESCA( M_ ),*). This
4725* array contains a local copy of the initial entire matrix PA.
4726*
4727* PA (local input) REAL array
4728* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
4729* array contains the local entries of the matrix PA.
4730*
4731* IA (global input) INTEGER
4732* On entry, IA specifies A's global row index, which points to
4733* the beginning of the submatrix sub( A ).
4734*
4735* JA (global input) INTEGER
4736* On entry, JA specifies A's global column index, which points
4737* to the beginning of the submatrix sub( A ).
4738*
4739* DESCA (global and local input) INTEGER array
4740* On entry, DESCA is an integer array of dimension DLEN_. This
4741* is the array descriptor for the matrix A.
4742*
4743* G (workspace) REAL array
4744* On entry, G is an array of dimension at least MAX( M, N ). G
4745* is used to compute the gauges.
4746*
4747* ERR (global output) REAL
4748* On exit, ERR specifies the largest error in absolute value.
4749*
4750* INFO (global output) INTEGER
4751* On exit, if INFO <> 0, the result is less than half accurate.
4752*
4753* -- Written on April 1, 1998 by
4754* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4755*
4756* =====================================================================
4757*
4758* .. Parameters ..
4759 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4760 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4761 $ RSRC_
4762 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4763 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4764 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4765 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4766 REAL ZERO, ONE
4767 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
4768* ..
4769* .. Local Scalars ..
4770 LOGICAL COLREP, LOWER, ROWREP, UPPER
4771 INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
4772 $ in, ioffa, ioffx, ioffy, j, jja, kk, lda, ldpa,
4773 $ ldx, ldy, mycol, myrow, npcol, nprow
4774 REAL ATMP, EPS, ERRI, GTMP
4775* ..
4776* .. External Subroutines ..
4777 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4778* ..
4779* .. External Functions ..
4780 LOGICAL LSAME
4781 REAL PSLAMCH
4782 EXTERNAL LSAME, PSLAMCH
4783* ..
4784* .. Intrinsic Functions ..
4785 INTRINSIC abs, max, min, mod, sqrt
4786* ..
4787* .. Executable Statements ..
4788*
4789 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4790*
4791 eps = pslamch( ictxt, 'eps' )
4792*
4793 upper = lsame( uplo, 'U' )
4794 lower = lsame( uplo, 'L' )
4795*
4796 lda = max( 1, desca( m_ ) )
4797 ldx = max( 1, descx( m_ ) )
4798 ldy = max( 1, descy( m_ ) )
4799*
4800* Compute expected result in A using data in A, X and Y.
4801* Compute gauges in G. This part of the computation is performed
4802* by every process in the grid.
4803*
4804 DO 70 j = 1, n
4805*
4806 ioffy = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
4807*
4808 IF( lower ) THEN
4809 ibeg = j
4810 iend = m
4811 DO 10 i = 1, j-1
4812 g( i ) = zero
4813 10 CONTINUE
4814 ELSE IF( upper ) THEN
4815 ibeg = 1
4816 iend = j
4817 DO 20 i = j+1, m
4818 g( i ) = zero
4819 20 CONTINUE
4820 ELSE
4821 ibeg = 1
4822 iend = m
4823 END IF
4824*
4825 DO 30 i = ibeg, iend
4826*
4827 ioffx = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
4828 ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
4829 atmp = x( ioffx ) * y( ioffy )
4830 gtmp = abs( x( ioffx ) * y( ioffy ) )
4831 g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
4832 a( ioffa ) = alpha * atmp + a( ioffa )
4833*
4834 30 CONTINUE
4835*
4836* Compute the error ratio for this result.
4837*
4838 info = 0
4839 err = zero
4840 ldpa = desca( lld_ )
4841 ioffa = ia + ( ja + j - 2 ) * lda
4842 CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
4843 $ iia, jja, iarow, iacol )
4844 rowrep = ( iarow.EQ.-1 )
4845 colrep = ( iacol.EQ.-1 )
4846*
4847 IF( mycol.EQ.iacol .OR. colrep ) THEN
4848*
4849 icurrow = iarow
4850 ib = desca( imb_ ) - ia + 1
4851 IF( ib.LE.0 )
4852 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
4853 ib = min( ib, m )
4854 in = ia + ib - 1
4855*
4856 DO 40 i = ia, in
4857*
4858 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4859 erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
4860 IF( g( i-ia+1 ).NE.zero )
4861 $ erri = erri / g( i-ia+1 )
4862 err = max( err, erri )
4863 IF( err*sqrt( eps ).GE.one )
4864 $ info = 1
4865 iia = iia + 1
4866 END IF
4867*
4868 ioffa = ioffa + 1
4869*
4870 40 CONTINUE
4871*
4872 icurrow = mod( icurrow+1, nprow )
4873*
4874 DO 60 i = in+1, ia+m-1, desca( mb_ )
4875 ib = min( ia+m-i, desca( mb_ ) )
4876*
4877 DO 50 kk = 0, ib-1
4878*
4879 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4880 erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
4881 IF( g( i+kk-ia+1 ).NE.zero )
4882 $ erri = erri / g( i+kk-ia+1 )
4883 err = max( err, erri )
4884 IF( err*sqrt( eps ).GE.one )
4885 $ info = 1
4886 iia = iia + 1
4887 END IF
4888*
4889 ioffa = ioffa + 1
4890*
4891 50 CONTINUE
4892*
4893 icurrow = mod( icurrow+1, nprow )
4894*
4895 60 CONTINUE
4896*
4897 END IF
4898*
4899* If INFO = 0, all results are at least half accurate.
4900*
4901 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4902 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4903 $ mycol )
4904 IF( info.NE.0 )
4905 $ GO TO 80
4906*
4907 70 CONTINUE
4908*
4909 80 CONTINUE
4910*
4911 RETURN
4912*
4913* End of PSVMCH
4914*
4915 END
4916 SUBROUTINE psvmch2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4917 $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA,
4918 $ JA, DESCA, G, ERR, INFO )
4919*
4920* -- PBLAS test routine (version 2.0) --
4921* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4922* and University of California, Berkeley.
4923* April 1, 1998
4924*
4925* .. Scalar Arguments ..
4926 CHARACTER*1 UPLO
4927 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4928 $ jy, m, n
4929 REAL ALPHA, ERR
4930* ..
4931* .. Array Arguments ..
4932 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4933 REAL A( * ), G( * ), PA( * ), X( * ), Y( * )
4934* ..
4935*
4936* Purpose
4937* =======
4938*
4939* PSVMCH2 checks the results of the computational tests.
4940*
4941* Notes
4942* =====
4943*
4944* A description vector is associated with each 2D block-cyclicly dis-
4945* tributed matrix. This vector stores the information required to
4946* establish the mapping between a matrix entry and its corresponding
4947* process and memory location.
4948*
4949* In the following comments, the character _ should be read as
4950* "of the distributed matrix". Let A be a generic term for any 2D
4951* block cyclicly distributed matrix. Its description vector is DESCA:
4952*
4953* NOTATION STORED IN EXPLANATION
4954* ---------------- --------------- ------------------------------------
4955* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4956* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4957* the NPROW x NPCOL BLACS process grid
4958* A is distributed over. The context
4959* itself is global, but the handle
4960* (the integer value) may vary.
4961* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4962* ted matrix A, M_A >= 0.
4963* N_A (global) DESCA( N_ ) The number of columns in the distri-
4964* buted matrix A, N_A >= 0.
4965* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4966* block of the matrix A, IMB_A > 0.
4967* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4968* left block of the matrix A,
4969* INB_A > 0.
4970* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4971* bute the last M_A-IMB_A rows of A,
4972* MB_A > 0.
4973* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4974* bute the last N_A-INB_A columns of
4975* A, NB_A > 0.
4976* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4977* row of the matrix A is distributed,
4978* NPROW > RSRC_A >= 0.
4979* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4980* first column of A is distributed.
4981* NPCOL > CSRC_A >= 0.
4982* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4983* array storing the local blocks of
4984* the distributed matrix A,
4985* IF( Lc( 1, N_A ) > 0 )
4986* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4987* ELSE
4988* LLD_A >= 1.
4989*
4990* Let K be the number of rows of a matrix A starting at the global in-
4991* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4992* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4993* receive if these K rows were distributed over NPROW processes. If K
4994* is the number of columns of a matrix A starting at the global index
4995* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4996* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4997* these K columns were distributed over NPCOL processes.
4998*
4999* The values of Lr() and Lc() may be determined via a call to the func-
5000* tion PB_NUMROC:
5001* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5002* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5003*
5004* Arguments
5005* =========
5006*
5007* ICTXT (local input) INTEGER
5008* On entry, ICTXT specifies the BLACS context handle, indica-
5009* ting the global context of the operation. The context itself
5010* is global, but the value of ICTXT is local.
5011*
5012* UPLO (global input) CHARACTER*1
5013* On entry, UPLO specifies which part of the submatrix sub( A )
5014* is to be referenced as follows:
5015* If UPLO = 'L', only the lower triangular part,
5016* If UPLO = 'U', only the upper triangular part,
5017* else the entire matrix is to be referenced.
5018*
5019* M (global input) INTEGER
5020* On entry, M specifies the number of rows of the submatrix
5021* operand matrix A. M must be at least zero.
5022*
5023* N (global input) INTEGER
5024* On entry, N specifies the number of columns of the subma-
5025* trix operand matrix A. N must be at least zero.
5026*
5027* ALPHA (global input) REAL
5028* On entry, ALPHA specifies the scalar alpha.
5029*
5030* X (local input) REAL array
5031* On entry, X is an array of dimension (DESCX( M_ ),*). This
5032* array contains a local copy of the initial entire matrix PX.
5033*
5034* IX (global input) INTEGER
5035* On entry, IX specifies X's global row index, which points to
5036* the beginning of the submatrix sub( X ).
5037*
5038* JX (global input) INTEGER
5039* On entry, JX specifies X's global column index, which points
5040* to the beginning of the submatrix sub( X ).
5041*
5042* DESCX (global and local input) INTEGER array
5043* On entry, DESCX is an integer array of dimension DLEN_. This
5044* is the array descriptor for the matrix X.
5045*
5046* INCX (global input) INTEGER
5047* On entry, INCX specifies the global increment for the
5048* elements of X. Only two values of INCX are supported in
5049* this version, namely 1 and M_X. INCX must not be zero.
5050*
5051* Y (local input) REAL array
5052* On entry, Y is an array of dimension (DESCY( M_ ),*). This
5053* array contains a local copy of the initial entire matrix PY.
5054*
5055* IY (global input) INTEGER
5056* On entry, IY specifies Y's global row index, which points to
5057* the beginning of the submatrix sub( Y ).
5058*
5059* JY (global input) INTEGER
5060* On entry, JY specifies Y's global column index, which points
5061* to the beginning of the submatrix sub( Y ).
5062*
5063* DESCY (global and local input) INTEGER array
5064* On entry, DESCY is an integer array of dimension DLEN_. This
5065* is the array descriptor for the matrix Y.
5066*
5067* INCY (global input) INTEGER
5068* On entry, INCY specifies the global increment for the
5069* elements of Y. Only two values of INCY are supported in
5070* this version, namely 1 and M_Y. INCY must not be zero.
5071*
5072* A (local input/local output) REAL array
5073* On entry, A is an array of dimension (DESCA( M_ ),*). This
5074* array contains a local copy of the initial entire matrix PA.
5075*
5076* PA (local input) REAL array
5077* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
5078* array contains the local entries of the matrix PA.
5079*
5080* IA (global input) INTEGER
5081* On entry, IA specifies A's global row index, which points to
5082* the beginning of the submatrix sub( A ).
5083*
5084* JA (global input) INTEGER
5085* On entry, JA specifies A's global column index, which points
5086* to the beginning of the submatrix sub( A ).
5087*
5088* DESCA (global and local input) INTEGER array
5089* On entry, DESCA is an integer array of dimension DLEN_. This
5090* is the array descriptor for the matrix A.
5091*
5092* G (workspace) REAL array
5093* On entry, G is an array of dimension at least MAX( M, N ). G
5094* is used to compute the gauges.
5095*
5096* ERR (global output) REAL
5097* On exit, ERR specifies the largest error in absolute value.
5098*
5099* INFO (global output) INTEGER
5100* On exit, if INFO <> 0, the result is less than half accurate.
5101*
5102* -- Written on April 1, 1998 by
5103* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5104*
5105* =====================================================================
5106*
5107* .. Parameters ..
5108 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5109 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5110 $ RSRC_
5111 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5112 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5113 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5114 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5115 REAL ZERO, ONE
5116 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5117* ..
5118* .. Local Scalars ..
5119 LOGICAL COLREP, LOWER, ROWREP, UPPER
5120 INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
5121 $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J,
5122 $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW,
5123 $ npcol, nprow
5124 REAL EPS, ERRI, GTMP, ATMP
5125* ..
5126* .. External Subroutines ..
5127 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5128* ..
5129* .. External Functions ..
5130 LOGICAL LSAME
5131 REAL PSLAMCH
5132 EXTERNAL lsame, pslamch
5133* ..
5134* .. Intrinsic Functions ..
5135 INTRINSIC abs, max, min, mod, sqrt
5136* ..
5137* .. Executable Statements ..
5138*
5139 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5140*
5141 eps = pslamch( ictxt, 'eps' )
5142*
5143 upper = lsame( uplo, 'U' )
5144 lower = lsame( uplo, 'L' )
5145*
5146 lda = max( 1, desca( m_ ) )
5147 ldx = max( 1, descx( m_ ) )
5148 ldy = max( 1, descy( m_ ) )
5149*
5150* Compute expected result in A using data in A, X and Y.
5151* Compute gauges in G. This part of the computation is performed
5152* by every process in the grid.
5153*
5154 DO 70 j = 1, n
5155*
5156 ioffxj = ix + ( jx - 1 ) * ldx + ( j - 1 ) * incx
5157 ioffyj = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
5158*
5159 IF( lower ) THEN
5160 ibeg = j
5161 iend = m
5162 DO 10 i = 1, j-1
5163 g( i ) = zero
5164 10 CONTINUE
5165 ELSE IF( upper ) THEN
5166 ibeg = 1
5167 iend = j
5168 DO 20 i = j+1, m
5169 g( i ) = zero
5170 20 CONTINUE
5171 ELSE
5172 ibeg = 1
5173 iend = m
5174 END IF
5175*
5176 DO 30 i = ibeg, iend
5177 ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
5178 ioffxi = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
5179 ioffyi = iy + ( jy - 1 ) * ldy + ( i - 1 ) * incy
5180 atmp = x( ioffxi ) * y( ioffyj )
5181 atmp = atmp + y( ioffyi ) * x( ioffxj )
5182 gtmp = abs( x( ioffxi ) * y( ioffyj ) )
5183 gtmp = gtmp + abs( y( ioffyi ) * x( ioffxj ) )
5184 g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
5185 a( ioffa ) = alpha*atmp + a( ioffa )
5186*
5187 30 CONTINUE
5188*
5189* Compute the error ratio for this result.
5190*
5191 info = 0
5192 err = zero
5193 ldpa = desca( lld_ )
5194 ioffa = ia + ( ja + j - 2 ) * lda
5195 CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
5196 $ iia, jja, iarow, iacol )
5197 rowrep = ( iarow.EQ.-1 )
5198 colrep = ( iacol.EQ.-1 )
5199*
5200 IF( mycol.EQ.iacol .OR. colrep ) THEN
5201*
5202 icurrow = iarow
5203 ib = desca( imb_ ) - ia + 1
5204 IF( ib.LE.0 )
5205 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
5206 ib = min( ib, m )
5207 in = ia + ib - 1
5208*
5209 DO 40 i = ia, in
5210*
5211 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5212 erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
5213 IF( g( i-ia+1 ).NE.zero )
5214 $ erri = erri / g( i-ia+1 )
5215 err = max( err, erri )
5216 IF( err*sqrt( eps ).GE.one )
5217 $ info = 1
5218 iia = iia + 1
5219 END IF
5220*
5221 ioffa = ioffa + 1
5222*
5223 40 CONTINUE
5224*
5225 icurrow = mod( icurrow+1, nprow )
5226*
5227 DO 60 i = in+1, ia+m-1, desca( mb_ )
5228 ib = min( ia+m-i, desca( mb_ ) )
5229*
5230 DO 50 kk = 0, ib-1
5231*
5232 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5233 erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
5234 IF( g( i+kk-ia+1 ).NE.zero )
5235 $ erri = erri / g( i+kk-ia+1 )
5236 err = max( err, erri )
5237 IF( err*sqrt( eps ).GE.one )
5238 $ info = 1
5239 iia = iia + 1
5240 END IF
5241*
5242 ioffa = ioffa + 1
5243*
5244 50 CONTINUE
5245*
5246 icurrow = mod( icurrow+1, nprow )
5247*
5248 60 CONTINUE
5249*
5250 END IF
5251*
5252* If INFO = 0, all results are at least half accurate.
5253*
5254 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5255 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5256 $ mycol )
5257 IF( info.NE.0 )
5258 $ GO TO 80
5259*
5260 70 CONTINUE
5261*
5262 80 CONTINUE
5263*
5264 RETURN
5265*
5266* End of PSVMCH2
5267*
5268 END
5269 SUBROUTINE psmmch( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA,
5270 $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5271 $ JC, DESCC, CT, G, ERR, INFO )
5272*
5273* -- PBLAS test routine (version 2.0) --
5274* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5275* and University of California, Berkeley.
5276* April 1, 1998
5277*
5278* .. Scalar Arguments ..
5279 CHARACTER*1 TRANSA, TRANSB
5280 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5281 REAL ALPHA, BETA, ERR
5282* ..
5283* .. Array Arguments ..
5284 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5285 REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
5286 $ PC( * )
5287* ..
5288*
5289* Purpose
5290* =======
5291*
5292* PSMMCH checks the results of the computational tests.
5293*
5294* Notes
5295* =====
5296*
5297* A description vector is associated with each 2D block-cyclicly dis-
5298* tributed matrix. This vector stores the information required to
5299* establish the mapping between a matrix entry and its corresponding
5300* process and memory location.
5301*
5302* In the following comments, the character _ should be read as
5303* "of the distributed matrix". Let A be a generic term for any 2D
5304* block cyclicly distributed matrix. Its description vector is DESCA:
5305*
5306* NOTATION STORED IN EXPLANATION
5307* ---------------- --------------- ------------------------------------
5308* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5309* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5310* the NPROW x NPCOL BLACS process grid
5311* A is distributed over. The context
5312* itself is global, but the handle
5313* (the integer value) may vary.
5314* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5315* ted matrix A, M_A >= 0.
5316* N_A (global) DESCA( N_ ) The number of columns in the distri-
5317* buted matrix A, N_A >= 0.
5318* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5319* block of the matrix A, IMB_A > 0.
5320* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5321* left block of the matrix A,
5322* INB_A > 0.
5323* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5324* bute the last M_A-IMB_A rows of A,
5325* MB_A > 0.
5326* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5327* bute the last N_A-INB_A columns of
5328* A, NB_A > 0.
5329* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5330* row of the matrix A is distributed,
5331* NPROW > RSRC_A >= 0.
5332* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5333* first column of A is distributed.
5334* NPCOL > CSRC_A >= 0.
5335* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5336* array storing the local blocks of
5337* the distributed matrix A,
5338* IF( Lc( 1, N_A ) > 0 )
5339* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5340* ELSE
5341* LLD_A >= 1.
5342*
5343* Let K be the number of rows of a matrix A starting at the global in-
5344* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5345* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5346* receive if these K rows were distributed over NPROW processes. If K
5347* is the number of columns of a matrix A starting at the global index
5348* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5349* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5350* these K columns were distributed over NPCOL processes.
5351*
5352* The values of Lr() and Lc() may be determined via a call to the func-
5353* tion PB_NUMROC:
5354* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5355* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5356*
5357* Arguments
5358* =========
5359*
5360* ICTXT (local input) INTEGER
5361* On entry, ICTXT specifies the BLACS context handle, indica-
5362* ting the global context of the operation. The context itself
5363* is global, but the value of ICTXT is local.
5364*
5365* TRANSA (global input) CHARACTER*1
5366* On entry, TRANSA specifies if the matrix operand A is to be
5367* transposed.
5368*
5369* TRANSB (global input) CHARACTER*1
5370* On entry, TRANSB specifies if the matrix operand B is to be
5371* transposed.
5372*
5373* M (global input) INTEGER
5374* On entry, M specifies the number of rows of C.
5375*
5376* N (global input) INTEGER
5377* On entry, N specifies the number of columns of C.
5378*
5379* K (global input) INTEGER
5380* On entry, K specifies the number of columns (resp. rows) of A
5381* when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5382* PxSYR2K, PxHERK and PxHER2K.
5383*
5384* ALPHA (global input) REAL
5385* On entry, ALPHA specifies the scalar alpha.
5386*
5387* A (local input) REAL array
5388* On entry, A is an array of dimension (DESCA( M_ ),*). This
5389* array contains a local copy of the initial entire matrix PA.
5390*
5391* IA (global input) INTEGER
5392* On entry, IA specifies A's global row index, which points to
5393* the beginning of the submatrix sub( A ).
5394*
5395* JA (global input) INTEGER
5396* On entry, JA specifies A's global column index, which points
5397* to the beginning of the submatrix sub( A ).
5398*
5399* DESCA (global and local input) INTEGER array
5400* On entry, DESCA is an integer array of dimension DLEN_. This
5401* is the array descriptor for the matrix A.
5402*
5403* B (local input) REAL array
5404* On entry, B is an array of dimension (DESCB( M_ ),*). This
5405* array contains a local copy of the initial entire matrix PB.
5406*
5407* IB (global input) INTEGER
5408* On entry, IB specifies B's global row index, which points to
5409* the beginning of the submatrix sub( B ).
5410*
5411* JB (global input) INTEGER
5412* On entry, JB specifies B's global column index, which points
5413* to the beginning of the submatrix sub( B ).
5414*
5415* DESCB (global and local input) INTEGER array
5416* On entry, DESCB is an integer array of dimension DLEN_. This
5417* is the array descriptor for the matrix B.
5418*
5419* BETA (global input) REAL
5420* On entry, BETA specifies the scalar beta.
5421*
5422* C (local input/local output) REAL array
5423* On entry, C is an array of dimension (DESCC( M_ ),*). This
5424* array contains a local copy of the initial entire matrix PC.
5425*
5426* PC (local input) REAL array
5427* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5428* array contains the local pieces of the matrix PC.
5429*
5430* IC (global input) INTEGER
5431* On entry, IC specifies C's global row index, which points to
5432* the beginning of the submatrix sub( C ).
5433*
5434* JC (global input) INTEGER
5435* On entry, JC specifies C's global column index, which points
5436* to the beginning of the submatrix sub( C ).
5437*
5438* DESCC (global and local input) INTEGER array
5439* On entry, DESCC is an integer array of dimension DLEN_. This
5440* is the array descriptor for the matrix C.
5441*
5442* CT (workspace) REAL array
5443* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5444* holds a copy of the current column of C.
5445*
5446* G (workspace) REAL array
5447* On entry, G is an array of dimension at least MAX(M,N,K). G
5448* is used to compute the gauges.
5449*
5450* ERR (global output) REAL
5451* On exit, ERR specifies the largest error in absolute value.
5452*
5453* INFO (global output) INTEGER
5454* On exit, if INFO <> 0, the result is less than half accurate.
5455*
5456* -- Written on April 1, 1998 by
5457* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5458*
5459* =====================================================================
5460*
5461* .. Parameters ..
5462 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5463 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5464 $ RSRC_
5465 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5466 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5467 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5468 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5469 REAL ZERO, ONE
5470 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5471* ..
5472* .. Local Scalars ..
5473 LOGICAL COLREP, ROWREP, TRANA, TRANB
5474 INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5475 $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5476 $ mycol, myrow, npcol, nprow
5477 REAL EPS, ERRI
5478* ..
5479* .. External Subroutines ..
5480 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5481* ..
5482* .. External Functions ..
5483 LOGICAL LSAME
5484 REAL PSLAMCH
5485 EXTERNAL LSAME, PSLAMCH
5486* ..
5487* .. Intrinsic Functions ..
5488 INTRINSIC abs, max, min, mod, sqrt
5489* ..
5490* .. Executable Statements ..
5491*
5492 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5493*
5494 eps = pslamch( ictxt, 'eps' )
5495*
5496 trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5497 tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5498*
5499 lda = max( 1, desca( m_ ) )
5500 ldb = max( 1, descb( m_ ) )
5501 ldc = max( 1, descc( m_ ) )
5502*
5503* Compute expected result in C using data in A, B and C.
5504* Compute gauges in G. This part of the computation is performed
5505* by every process in the grid.
5506*
5507 DO 240 j = 1, n
5508*
5509 ioffc = ic + ( jc + j - 2 ) * ldc
5510 DO 10 i = 1, m
5511 ct( i ) = zero
5512 g( i ) = zero
5513 10 CONTINUE
5514*
5515 IF( .NOT.trana .AND. .NOT.tranb ) THEN
5516 DO 30 kk = 1, k
5517 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5518 DO 20 i = 1, m
5519 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5520 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5521 g( i ) = g( i ) + abs( a( ioffa ) ) *
5522 $ abs( b( ioffb ) )
5523 20 CONTINUE
5524 30 CONTINUE
5525 ELSE IF( trana .AND. .NOT.tranb ) THEN
5526 DO 50 kk = 1, k
5527 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5528 DO 40 i = 1, m
5529 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5530 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5531 g( i ) = g( i ) + abs( a( ioffa ) ) *
5532 $ abs( b( ioffb ) )
5533 40 CONTINUE
5534 50 CONTINUE
5535 ELSE IF( .NOT.trana .AND. tranb ) THEN
5536 DO 70 kk = 1, k
5537 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5538 DO 60 i = 1, m
5539 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5540 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5541 g( i ) = g( i ) + abs( a( ioffa ) ) *
5542 $ abs( b( ioffb ) )
5543 60 CONTINUE
5544 70 CONTINUE
5545 ELSE IF( trana .AND. tranb ) THEN
5546 DO 90 kk = 1, k
5547 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5548 DO 80 i = 1, m
5549 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5550 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5551 g( i ) = g( i ) + abs( a( ioffa ) ) *
5552 $ abs( b( ioffb ) )
5553 80 CONTINUE
5554 90 CONTINUE
5555 END IF
5556*
5557 DO 200 i = 1, m
5558 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5559 g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5560 c( ioffc ) = ct( i )
5561 ioffc = ioffc + 1
5562 200 CONTINUE
5563*
5564* Compute the error ratio for this result.
5565*
5566 err = zero
5567 info = 0
5568 ldpc = descc( lld_ )
5569 ioffc = ic + ( jc + j - 2 ) * ldc
5570 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5571 $ iic, jjc, icrow, iccol )
5572 icurrow = icrow
5573 rowrep = ( icrow.EQ.-1 )
5574 colrep = ( iccol.EQ.-1 )
5575*
5576 IF( mycol.EQ.iccol .OR. colrep ) THEN
5577*
5578 ibb = descc( imb_ ) - ic + 1
5579 IF( ibb.LE.0 )
5580 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5581 ibb = min( ibb, m )
5582 in = ic + ibb - 1
5583*
5584 DO 210 i = ic, in
5585*
5586 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5587 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5588 $ c( ioffc ) ) / eps
5589 IF( g( i-ic+1 ).NE.zero )
5590 $ erri = erri / g( i-ic+1 )
5591 err = max( err, erri )
5592 IF( err*sqrt( eps ).GE.one )
5593 $ info = 1
5594 iic = iic + 1
5595 END IF
5596*
5597 ioffc = ioffc + 1
5598*
5599 210 CONTINUE
5600*
5601 icurrow = mod( icurrow+1, nprow )
5602*
5603 DO 230 i = in+1, ic+m-1, descc( mb_ )
5604 ibb = min( ic+m-i, descc( mb_ ) )
5605*
5606 DO 220 kk = 0, ibb-1
5607*
5608 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5609 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5610 $ c( ioffc ) )/eps
5611 IF( g( i+kk-ic+1 ).NE.zero )
5612 $ erri = erri / g( i+kk-ic+1 )
5613 err = max( err, erri )
5614 IF( err*sqrt( eps ).GE.one )
5615 $ info = 1
5616 iic = iic + 1
5617 END IF
5618*
5619 ioffc = ioffc + 1
5620*
5621 220 CONTINUE
5622*
5623 icurrow = mod( icurrow+1, nprow )
5624*
5625 230 CONTINUE
5626*
5627 END IF
5628*
5629* If INFO = 0, all results are at least half accurate.
5630*
5631 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5632 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5633 $ mycol )
5634 IF( info.NE.0 )
5635 $ GO TO 250
5636*
5637 240 CONTINUE
5638*
5639 250 CONTINUE
5640*
5641 RETURN
5642*
5643* End of PSMMCH
5644*
5645 END
5646 SUBROUTINE psmmch1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5647 $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G,
5648 $ ERR, INFO )
5649*
5650* -- PBLAS test routine (version 2.0) --
5651* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5652* and University of California, Berkeley.
5653* April 1, 1998
5654*
5655* .. Scalar Arguments ..
5656 CHARACTER*1 TRANS, UPLO
5657 INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5658 REAL ALPHA, BETA, ERR
5659* ..
5660* .. Array Arguments ..
5661 INTEGER DESCA( * ), DESCC( * )
5662 REAL A( * ), C( * ), CT( * ), G( * ), PC( * )
5663* ..
5664*
5665* Purpose
5666* =======
5667*
5668* PSMMCH1 checks the results of the computational tests.
5669*
5670* Notes
5671* =====
5672*
5673* A description vector is associated with each 2D block-cyclicly dis-
5674* tributed matrix. This vector stores the information required to
5675* establish the mapping between a matrix entry and its corresponding
5676* process and memory location.
5677*
5678* In the following comments, the character _ should be read as
5679* "of the distributed matrix". Let A be a generic term for any 2D
5680* block cyclicly distributed matrix. Its description vector is DESCA:
5681*
5682* NOTATION STORED IN EXPLANATION
5683* ---------------- --------------- ------------------------------------
5684* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5685* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5686* the NPROW x NPCOL BLACS process grid
5687* A is distributed over. The context
5688* itself is global, but the handle
5689* (the integer value) may vary.
5690* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5691* ted matrix A, M_A >= 0.
5692* N_A (global) DESCA( N_ ) The number of columns in the distri-
5693* buted matrix A, N_A >= 0.
5694* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5695* block of the matrix A, IMB_A > 0.
5696* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5697* left block of the matrix A,
5698* INB_A > 0.
5699* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5700* bute the last M_A-IMB_A rows of A,
5701* MB_A > 0.
5702* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5703* bute the last N_A-INB_A columns of
5704* A, NB_A > 0.
5705* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5706* row of the matrix A is distributed,
5707* NPROW > RSRC_A >= 0.
5708* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5709* first column of A is distributed.
5710* NPCOL > CSRC_A >= 0.
5711* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5712* array storing the local blocks of
5713* the distributed matrix A,
5714* IF( Lc( 1, N_A ) > 0 )
5715* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5716* ELSE
5717* LLD_A >= 1.
5718*
5719* Let K be the number of rows of a matrix A starting at the global in-
5720* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5721* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5722* receive if these K rows were distributed over NPROW processes. If K
5723* is the number of columns of a matrix A starting at the global index
5724* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5725* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5726* these K columns were distributed over NPCOL processes.
5727*
5728* The values of Lr() and Lc() may be determined via a call to the func-
5729* tion PB_NUMROC:
5730* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5731* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5732*
5733* Arguments
5734* =========
5735*
5736* ICTXT (local input) INTEGER
5737* On entry, ICTXT specifies the BLACS context handle, indica-
5738* ting the global context of the operation. The context itself
5739* is global, but the value of ICTXT is local.
5740*
5741* UPLO (global input) CHARACTER*1
5742* On entry, UPLO specifies which part of C should contain the
5743* result.
5744*
5745* TRANS (global input) CHARACTER*1
5746* On entry, TRANS specifies whether the matrix A has to be
5747* transposed or not before computing the matrix-matrix product.
5748*
5749* N (global input) INTEGER
5750* On entry, N specifies the order the submatrix operand C. N
5751* must be at least zero.
5752*
5753* K (global input) INTEGER
5754* On entry, K specifies the number of columns (resp. rows) of A
5755* when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5756* zero.
5757*
5758* ALPHA (global input) REAL
5759* On entry, ALPHA specifies the scalar alpha.
5760*
5761* A (local input) REAL array
5762* On entry, A is an array of dimension (DESCA( M_ ),*). This
5763* array contains a local copy of the initial entire matrix PA.
5764*
5765* IA (global input) INTEGER
5766* On entry, IA specifies A's global row index, which points to
5767* the beginning of the submatrix sub( A ).
5768*
5769* JA (global input) INTEGER
5770* On entry, JA specifies A's global column index, which points
5771* to the beginning of the submatrix sub( A ).
5772*
5773* DESCA (global and local input) INTEGER array
5774* On entry, DESCA is an integer array of dimension DLEN_. This
5775* is the array descriptor for the matrix A.
5776*
5777* BETA (global input) REAL
5778* On entry, BETA specifies the scalar beta.
5779*
5780* C (local input/local output) REAL array
5781* On entry, C is an array of dimension (DESCC( M_ ),*). This
5782* array contains a local copy of the initial entire matrix PC.
5783*
5784* PC (local input) REAL array
5785* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5786* array contains the local pieces of the matrix PC.
5787*
5788* IC (global input) INTEGER
5789* On entry, IC specifies C's global row index, which points to
5790* the beginning of the submatrix sub( C ).
5791*
5792* JC (global input) INTEGER
5793* On entry, JC specifies C's global column index, which points
5794* to the beginning of the submatrix sub( C ).
5795*
5796* DESCC (global and local input) INTEGER array
5797* On entry, DESCC is an integer array of dimension DLEN_. This
5798* is the array descriptor for the matrix C.
5799*
5800* CT (workspace) REAL array
5801* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5802* holds a copy of the current column of C.
5803*
5804* G (workspace) REAL array
5805* On entry, G is an array of dimension at least MAX(M,N,K). G
5806* is used to compute the gauges.
5807*
5808* ERR (global output) REAL
5809* On exit, ERR specifies the largest error in absolute value.
5810*
5811* INFO (global output) INTEGER
5812* On exit, if INFO <> 0, the result is less than half accurate.
5813*
5814* -- Written on April 1, 1998 by
5815* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5816*
5817* =====================================================================
5818*
5819* .. Parameters ..
5820 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5821 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5822 $ RSRC_
5823 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5824 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5825 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5826 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5827 REAL ZERO, ONE
5828 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5829* ..
5830* .. Local Scalars ..
5831 LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
5832 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5833 $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5834 $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5835 REAL EPS, ERRI
5836* ..
5837* .. External Subroutines ..
5838 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5839* ..
5840* .. External Functions ..
5841 LOGICAL LSAME
5842 REAL PSLAMCH
5843 EXTERNAL lsame, pslamch
5844* ..
5845* .. Intrinsic Functions ..
5846 INTRINSIC abs, max, min, mod, sqrt
5847* ..
5848* .. Executable Statements ..
5849*
5850 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5851*
5852 eps = pslamch( ictxt, 'eps' )
5853*
5854 upper = lsame( uplo, 'U' )
5855 notran = lsame( trans, 'N' )
5856 tran = lsame( trans, 'T' )
5857*
5858 lda = max( 1, desca( m_ ) )
5859 ldc = max( 1, descc( m_ ) )
5860*
5861* Compute expected result in C using data in A, B and C.
5862* Compute gauges in G. This part of the computation is performed
5863* by every process in the grid.
5864*
5865 DO 140 j = 1, n
5866*
5867 IF( upper ) THEN
5868 ibeg = 1
5869 iend = j
5870 ELSE
5871 ibeg = j
5872 iend = n
5873 END IF
5874*
5875 DO 10 i = 1, n
5876 ct( i ) = zero
5877 g( i ) = zero
5878 10 CONTINUE
5879*
5880 IF( notran ) THEN
5881 DO 30 kk = 1, k
5882 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
5883 DO 20 i = ibeg, iend
5884 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
5885 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5886 g( i ) = g( i ) + abs( a( ioffak ) ) *
5887 $ abs( a( ioffan ) )
5888 20 CONTINUE
5889 30 CONTINUE
5890 ELSE IF( tran ) THEN
5891 DO 50 kk = 1, k
5892 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
5893 DO 40 i = ibeg, iend
5894 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
5895 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5896 g( i ) = g( i ) + abs( a( ioffak ) ) *
5897 $ abs( a( ioffan ) )
5898 40 CONTINUE
5899 50 CONTINUE
5900 END IF
5901*
5902 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
5903*
5904 DO 100 i = ibeg, iend
5905 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5906 g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5907 c( ioffc ) = ct( i )
5908 ioffc = ioffc + 1
5909 100 CONTINUE
5910*
5911* Compute the error ratio for this result.
5912*
5913 err = zero
5914 info = 0
5915 ldpc = descc( lld_ )
5916 ioffc = ic + ( jc + j - 2 ) * ldc
5917 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5918 $ iic, jjc, icrow, iccol )
5919 icurrow = icrow
5920 rowrep = ( icrow.EQ.-1 )
5921 colrep = ( iccol.EQ.-1 )
5922*
5923 IF( mycol.EQ.iccol .OR. colrep ) THEN
5924*
5925 ibb = descc( imb_ ) - ic + 1
5926 IF( ibb.LE.0 )
5927 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5928 ibb = min( ibb, n )
5929 in = ic + ibb - 1
5930*
5931 DO 110 i = ic, in
5932*
5933 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5934 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5935 $ c( ioffc ) ) / eps
5936 IF( g( i-ic+1 ).NE.zero )
5937 $ erri = erri / g( i-ic+1 )
5938 err = max( err, erri )
5939 IF( err*sqrt( eps ).GE.one )
5940 $ info = 1
5941 iic = iic + 1
5942 END IF
5943*
5944 ioffc = ioffc + 1
5945*
5946 110 CONTINUE
5947*
5948 icurrow = mod( icurrow+1, nprow )
5949*
5950 DO 130 i = in+1, ic+n-1, descc( mb_ )
5951 ibb = min( ic+n-i, descc( mb_ ) )
5952*
5953 DO 120 kk = 0, ibb-1
5954*
5955 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5956 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5957 $ c( ioffc ) )/eps
5958 IF( g( i+kk-ic+1 ).NE.zero )
5959 $ erri = erri / g( i+kk-ic+1 )
5960 err = max( err, erri )
5961 IF( err*sqrt( eps ).GE.one )
5962 $ info = 1
5963 iic = iic + 1
5964 END IF
5965*
5966 ioffc = ioffc + 1
5967*
5968 120 CONTINUE
5969*
5970 icurrow = mod( icurrow+1, nprow )
5971*
5972 130 CONTINUE
5973*
5974 END IF
5975*
5976* If INFO = 0, all results are at least half accurate.
5977*
5978 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5979 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5980 $ mycol )
5981 IF( info.NE.0 )
5982 $ GO TO 150
5983*
5984 140 CONTINUE
5985*
5986 150 CONTINUE
5987*
5988 RETURN
5989*
5990* End of PSMMCH1
5991*
5992 END
5993 SUBROUTINE psmmch2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5994 $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5995 $ JC, DESCC, CT, G, ERR, INFO )
5996*
5997* -- PBLAS test routine (version 2.0) --
5998* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5999* and University of California, Berkeley.
6000* April 1, 1998
6001*
6002* .. Scalar Arguments ..
6003 CHARACTER*1 TRANS, UPLO
6004 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6005 REAL ALPHA, BETA, ERR
6006* ..
6007* .. Array Arguments ..
6008 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6009 REAL A( * ), B( * ), C( * ), CT( * ), G( * ),
6010 $ pc( * )
6011* ..
6012*
6013* Purpose
6014* =======
6015*
6016* PSMMCH2 checks the results of the computational tests.
6017*
6018* Notes
6019* =====
6020*
6021* A description vector is associated with each 2D block-cyclicly dis-
6022* tributed matrix. This vector stores the information required to
6023* establish the mapping between a matrix entry and its corresponding
6024* process and memory location.
6025*
6026* In the following comments, the character _ should be read as
6027* "of the distributed matrix". Let A be a generic term for any 2D
6028* block cyclicly distributed matrix. Its description vector is DESCA:
6029*
6030* NOTATION STORED IN EXPLANATION
6031* ---------------- --------------- ------------------------------------
6032* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6033* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6034* the NPROW x NPCOL BLACS process grid
6035* A is distributed over. The context
6036* itself is global, but the handle
6037* (the integer value) may vary.
6038* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6039* ted matrix A, M_A >= 0.
6040* N_A (global) DESCA( N_ ) The number of columns in the distri-
6041* buted matrix A, N_A >= 0.
6042* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6043* block of the matrix A, IMB_A > 0.
6044* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6045* left block of the matrix A,
6046* INB_A > 0.
6047* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6048* bute the last M_A-IMB_A rows of A,
6049* MB_A > 0.
6050* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6051* bute the last N_A-INB_A columns of
6052* A, NB_A > 0.
6053* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6054* row of the matrix A is distributed,
6055* NPROW > RSRC_A >= 0.
6056* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6057* first column of A is distributed.
6058* NPCOL > CSRC_A >= 0.
6059* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6060* array storing the local blocks of
6061* the distributed matrix A,
6062* IF( Lc( 1, N_A ) > 0 )
6063* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6064* ELSE
6065* LLD_A >= 1.
6066*
6067* Let K be the number of rows of a matrix A starting at the global in-
6068* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6069* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6070* receive if these K rows were distributed over NPROW processes. If K
6071* is the number of columns of a matrix A starting at the global index
6072* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6073* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6074* these K columns were distributed over NPCOL processes.
6075*
6076* The values of Lr() and Lc() may be determined via a call to the func-
6077* tion PB_NUMROC:
6078* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6079* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6080*
6081* Arguments
6082* =========
6083*
6084* ICTXT (local input) INTEGER
6085* On entry, ICTXT specifies the BLACS context handle, indica-
6086* ting the global context of the operation. The context itself
6087* is global, but the value of ICTXT is local.
6088*
6089* UPLO (global input) CHARACTER*1
6090* On entry, UPLO specifies which part of C should contain the
6091* result.
6092*
6093* TRANS (global input) CHARACTER*1
6094* On entry, TRANS specifies whether the matrices A and B have
6095* to be transposed or not before computing the matrix-matrix
6096* product.
6097*
6098* N (global input) INTEGER
6099* On entry, N specifies the order the submatrix operand C. N
6100* must be at least zero.
6101*
6102* K (global input) INTEGER
6103* On entry, K specifies the number of columns (resp. rows) of A
6104* and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6105* least zero.
6106*
6107* ALPHA (global input) REAL
6108* On entry, ALPHA specifies the scalar alpha.
6109*
6110* A (local input) REAL array
6111* On entry, A is an array of dimension (DESCA( M_ ),*). This
6112* array contains a local copy of the initial entire matrix PA.
6113*
6114* IA (global input) INTEGER
6115* On entry, IA specifies A's global row index, which points to
6116* the beginning of the submatrix sub( A ).
6117*
6118* JA (global input) INTEGER
6119* On entry, JA specifies A's global column index, which points
6120* to the beginning of the submatrix sub( A ).
6121*
6122* DESCA (global and local input) INTEGER array
6123* On entry, DESCA is an integer array of dimension DLEN_. This
6124* is the array descriptor for the matrix A.
6125*
6126* B (local input) REAL array
6127* On entry, B is an array of dimension (DESCB( M_ ),*). This
6128* array contains a local copy of the initial entire matrix PB.
6129*
6130* IB (global input) INTEGER
6131* On entry, IB specifies B's global row index, which points to
6132* the beginning of the submatrix sub( B ).
6133*
6134* JB (global input) INTEGER
6135* On entry, JB specifies B's global column index, which points
6136* to the beginning of the submatrix sub( B ).
6137*
6138* DESCB (global and local input) INTEGER array
6139* On entry, DESCB is an integer array of dimension DLEN_. This
6140* is the array descriptor for the matrix B.
6141*
6142* BETA (global input) REAL
6143* On entry, BETA specifies the scalar beta.
6144*
6145* C (local input/local output) REAL array
6146* On entry, C is an array of dimension (DESCC( M_ ),*). This
6147* array contains a local copy of the initial entire matrix PC.
6148*
6149* PC (local input) REAL array
6150* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6151* array contains the local pieces of the matrix PC.
6152*
6153* IC (global input) INTEGER
6154* On entry, IC specifies C's global row index, which points to
6155* the beginning of the submatrix sub( C ).
6156*
6157* JC (global input) INTEGER
6158* On entry, JC specifies C's global column index, which points
6159* to the beginning of the submatrix sub( C ).
6160*
6161* DESCC (global and local input) INTEGER array
6162* On entry, DESCC is an integer array of dimension DLEN_. This
6163* is the array descriptor for the matrix C.
6164*
6165* CT (workspace) REAL array
6166* On entry, CT is an array of dimension at least MAX(M,N,K). CT
6167* holds a copy of the current column of C.
6168*
6169* G (workspace) REAL array
6170* On entry, G is an array of dimension at least MAX(M,N,K). G
6171* is used to compute the gauges.
6172*
6173* ERR (global output) REAL
6174* On exit, ERR specifies the largest error in absolute value.
6175*
6176* INFO (global output) INTEGER
6177* On exit, if INFO <> 0, the result is less than half accurate.
6178*
6179* -- Written on April 1, 1998 by
6180* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6181*
6182* =====================================================================
6183*
6184* .. Parameters ..
6185 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6186 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6187 $ RSRC_
6188 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6189 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6190 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6191 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6192 REAL ZERO, ONE
6193 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
6194* ..
6195* .. Local Scalars ..
6196 LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
6197 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6198 $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6199 $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6200 $ NPCOL, NPROW
6201 REAL EPS, ERRI
6202* ..
6203* .. External Subroutines ..
6204 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
6205* ..
6206* .. External Functions ..
6207 LOGICAL LSAME
6208 REAL PSLAMCH
6209 EXTERNAL LSAME, PSLAMCH
6210* ..
6211* .. Intrinsic Functions ..
6212 INTRINSIC abs, max, min, mod, sqrt
6213* ..
6214* .. Executable Statements ..
6215*
6216 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6217*
6218 eps = pslamch( ictxt, 'eps' )
6219*
6220 upper = lsame( uplo, 'U' )
6221 notran = lsame( trans, 'N' )
6222 tran = lsame( trans, 'T' )
6223*
6224 lda = max( 1, desca( m_ ) )
6225 ldb = max( 1, descb( m_ ) )
6226 ldc = max( 1, descc( m_ ) )
6227*
6228* Compute expected result in C using data in A, B and C.
6229* Compute gauges in G. This part of the computation is performed
6230* by every process in the grid.
6231*
6232 DO 140 j = 1, n
6233*
6234 IF( upper ) THEN
6235 ibeg = 1
6236 iend = j
6237 ELSE
6238 ibeg = j
6239 iend = n
6240 END IF
6241*
6242 DO 10 i = 1, n
6243 ct( i ) = zero
6244 g( i ) = zero
6245 10 CONTINUE
6246*
6247 IF( notran ) THEN
6248 DO 30 kk = 1, k
6249 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6250 ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6251 DO 20 i = ibeg, iend
6252 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6253 ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6254 ct( i ) = ct( i ) + alpha * (
6255 $ a( ioffan ) * b( ioffbk ) +
6256 $ b( ioffbn ) * a( ioffak ) )
6257 g( i ) = g( i ) + abs( alpha ) * (
6258 $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6259 $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6260 20 CONTINUE
6261 30 CONTINUE
6262 ELSE IF( tran ) THEN
6263 DO 50 kk = 1, k
6264 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6265 ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6266 DO 40 i = ibeg, iend
6267 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6268 ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6269 ct( i ) = ct( i ) + alpha * (
6270 $ a( ioffan ) * b( ioffbk ) +
6271 $ b( ioffbn ) * a( ioffak ) )
6272 g( i ) = g( i ) + abs( alpha ) * (
6273 $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6274 $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6275 40 CONTINUE
6276 50 CONTINUE
6277 END IF
6278*
6279 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6280*
6281 DO 100 i = ibeg, iend
6282 ct( i ) = ct( i ) + beta * c( ioffc )
6283 g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
6284 c( ioffc ) = ct( i )
6285 ioffc = ioffc + 1
6286 100 CONTINUE
6287*
6288* Compute the error ratio for this result.
6289*
6290 err = zero
6291 info = 0
6292 ldpc = descc( lld_ )
6293 ioffc = ic + ( jc + j - 2 ) * ldc
6294 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6295 $ iic, jjc, icrow, iccol )
6296 icurrow = icrow
6297 rowrep = ( icrow.EQ.-1 )
6298 colrep = ( iccol.EQ.-1 )
6299*
6300 IF( mycol.EQ.iccol .OR. colrep ) THEN
6301*
6302 ibb = descc( imb_ ) - ic + 1
6303 IF( ibb.LE.0 )
6304 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6305 ibb = min( ibb, n )
6306 in = ic + ibb - 1
6307*
6308 DO 110 i = ic, in
6309*
6310 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6311 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6312 $ c( ioffc ) ) / eps
6313 IF( g( i-ic+1 ).NE.zero )
6314 $ erri = erri / g( i-ic+1 )
6315 err = max( err, erri )
6316 IF( err*sqrt( eps ).GE.one )
6317 $ info = 1
6318 iic = iic + 1
6319 END IF
6320*
6321 ioffc = ioffc + 1
6322*
6323 110 CONTINUE
6324*
6325 icurrow = mod( icurrow+1, nprow )
6326*
6327 DO 130 i = in+1, ic+n-1, descc( mb_ )
6328 ibb = min( ic+n-i, descc( mb_ ) )
6329*
6330 DO 120 kk = 0, ibb-1
6331*
6332 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6333 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6334 $ c( ioffc ) )/eps
6335 IF( g( i+kk-ic+1 ).NE.zero )
6336 $ erri = erri / g( i+kk-ic+1 )
6337 err = max( err, erri )
6338 IF( err*sqrt( eps ).GE.one )
6339 $ info = 1
6340 iic = iic + 1
6341 END IF
6342*
6343 ioffc = ioffc + 1
6344*
6345 120 CONTINUE
6346*
6347 icurrow = mod( icurrow+1, nprow )
6348*
6349 130 CONTINUE
6350*
6351 END IF
6352*
6353* If INFO = 0, all results are at least half accurate.
6354*
6355 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6356 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6357 $ mycol )
6358 IF( info.NE.0 )
6359 $ GO TO 150
6360*
6361 140 CONTINUE
6362*
6363 150 CONTINUE
6364*
6365 RETURN
6366*
6367* End of PSMMCH2
6368*
6369 END
6370 SUBROUTINE psmmch3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
6371 $ BETA, C, PC, IC, JC, DESCC, ERR, INFO )
6372*
6373* -- PBLAS test routine (version 2.0) --
6374* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6375* and University of California, Berkeley.
6376* April 1, 1998
6377*
6378* .. Scalar Arguments ..
6379 CHARACTER*1 TRANS, UPLO
6380 INTEGER IA, IC, INFO, JA, JC, M, N
6381 REAL ALPHA, BETA, ERR
6382* ..
6383* .. Array Arguments ..
6384 INTEGER DESCA( * ), DESCC( * )
6385 REAL A( * ), C( * ), PC( * )
6386* ..
6387*
6388* Purpose
6389* =======
6390*
6391* PSMMCH3 checks the results of the computational tests.
6392*
6393* Notes
6394* =====
6395*
6396* A description vector is associated with each 2D block-cyclicly dis-
6397* tributed matrix. This vector stores the information required to
6398* establish the mapping between a matrix entry and its corresponding
6399* process and memory location.
6400*
6401* In the following comments, the character _ should be read as
6402* "of the distributed matrix". Let A be a generic term for any 2D
6403* block cyclicly distributed matrix. Its description vector is DESCA:
6404*
6405* NOTATION STORED IN EXPLANATION
6406* ---------------- --------------- ------------------------------------
6407* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6408* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6409* the NPROW x NPCOL BLACS process grid
6410* A is distributed over. The context
6411* itself is global, but the handle
6412* (the integer value) may vary.
6413* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6414* ted matrix A, M_A >= 0.
6415* N_A (global) DESCA( N_ ) The number of columns in the distri-
6416* buted matrix A, N_A >= 0.
6417* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6418* block of the matrix A, IMB_A > 0.
6419* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6420* left block of the matrix A,
6421* INB_A > 0.
6422* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6423* bute the last M_A-IMB_A rows of A,
6424* MB_A > 0.
6425* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6426* bute the last N_A-INB_A columns of
6427* A, NB_A > 0.
6428* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6429* row of the matrix A is distributed,
6430* NPROW > RSRC_A >= 0.
6431* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6432* first column of A is distributed.
6433* NPCOL > CSRC_A >= 0.
6434* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6435* array storing the local blocks of
6436* the distributed matrix A,
6437* IF( Lc( 1, N_A ) > 0 )
6438* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6439* ELSE
6440* LLD_A >= 1.
6441*
6442* Let K be the number of rows of a matrix A starting at the global in-
6443* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6444* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6445* receive if these K rows were distributed over NPROW processes. If K
6446* is the number of columns of a matrix A starting at the global index
6447* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6448* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6449* these K columns were distributed over NPCOL processes.
6450*
6451* The values of Lr() and Lc() may be determined via a call to the func-
6452* tion PB_NUMROC:
6453* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6454* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6455*
6456* Arguments
6457* =========
6458*
6459* UPLO (global input) CHARACTER*1
6460* On entry, UPLO specifies which part of C should contain the
6461* result.
6462*
6463* TRANS (global input) CHARACTER*1
6464* On entry, TRANS specifies whether the matrix A has to be
6465* transposed or not before computing the matrix-matrix addi-
6466* tion.
6467*
6468* M (global input) INTEGER
6469* On entry, M specifies the number of rows of C.
6470*
6471* N (global input) INTEGER
6472* On entry, N specifies the number of columns of C.
6473*
6474* ALPHA (global input) REAL
6475* On entry, ALPHA specifies the scalar alpha.
6476*
6477* A (local input) REAL array
6478* On entry, A is an array of dimension (DESCA( M_ ),*). This
6479* array contains a local copy of the initial entire matrix PA.
6480*
6481* IA (global input) INTEGER
6482* On entry, IA specifies A's global row index, which points to
6483* the beginning of the submatrix sub( A ).
6484*
6485* JA (global input) INTEGER
6486* On entry, JA specifies A's global column index, which points
6487* to the beginning of the submatrix sub( A ).
6488*
6489* DESCA (global and local input) INTEGER array
6490* On entry, DESCA is an integer array of dimension DLEN_. This
6491* is the array descriptor for the matrix A.
6492*
6493* BETA (global input) REAL
6494* On entry, BETA specifies the scalar beta.
6495*
6496* C (local input/local output) REAL array
6497* On entry, C is an array of dimension (DESCC( M_ ),*). This
6498* array contains a local copy of the initial entire matrix PC.
6499*
6500* PC (local input) REAL array
6501* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6502* array contains the local pieces of the matrix PC.
6503*
6504* IC (global input) INTEGER
6505* On entry, IC specifies C's global row index, which points to
6506* the beginning of the submatrix sub( C ).
6507*
6508* JC (global input) INTEGER
6509* On entry, JC specifies C's global column index, which points
6510* to the beginning of the submatrix sub( C ).
6511*
6512* DESCC (global and local input) INTEGER array
6513* On entry, DESCC is an integer array of dimension DLEN_. This
6514* is the array descriptor for the matrix C.
6515*
6516* ERR (global output) REAL
6517* On exit, ERR specifies the largest error in absolute value.
6518*
6519* INFO (global output) INTEGER
6520* On exit, if INFO <> 0, the result is less than half accurate.
6521*
6522* -- Written on April 1, 1998 by
6523* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6524*
6525* =====================================================================
6526*
6527* .. Parameters ..
6528 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6529 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6530 $ RSRC_
6531 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6532 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6533 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6534 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6535 REAL ZERO
6536 PARAMETER ( ZERO = 0.0e+0 )
6537* ..
6538* .. Local Scalars ..
6539 LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER
6540 INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J,
6541 $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL,
6542 $ NPROW
6543 REAL ERR0, ERRI, PREC
6544* ..
6545* .. External Subroutines ..
6546 EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L,
6547 $ pserraxpby, sgamx2d
6548* ..
6549* .. External Functions ..
6550 LOGICAL LSAME
6551 REAL PSLAMCH
6552 EXTERNAL LSAME, PSLAMCH
6553* ..
6554* .. Intrinsic Functions ..
6555 INTRINSIC abs, max
6556* ..
6557* .. Executable Statements ..
6558*
6559 ictxt = descc( ctxt_ )
6560 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6561*
6562 prec = pslamch( ictxt, 'eps' )
6563*
6564 upper = lsame( uplo, 'U' )
6565 lower = lsame( uplo, 'L' )
6566 notran = lsame( trans, 'N' )
6567*
6568* Compute expected result in C using data in A and C. This part of
6569* the computation is performed by every process in the grid.
6570*
6571 info = 0
6572 err = zero
6573*
6574 lda = max( 1, desca( m_ ) )
6575 ldc = max( 1, descc( m_ ) )
6576 ldpc = max( 1, descc( lld_ ) )
6577 rowrep = ( descc( rsrc_ ).EQ.-1 )
6578 colrep = ( descc( csrc_ ).EQ.-1 )
6579*
6580 IF( notran ) THEN
6581*
6582 DO 20 j = jc, jc + n - 1
6583*
6584 ioffc = ic + ( j - 1 ) * ldc
6585 ioffa = ia + ( ja - 1 + j - jc ) * lda
6586*
6587 DO 10 i = ic, ic + m - 1
6588*
6589 IF( upper ) THEN
6590 IF( ( j - jc ).GE.( i - ic ) ) THEN
6591 CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6592 $ c( ioffc ), prec )
6593 ELSE
6594 erri = zero
6595 END IF
6596 ELSE IF( lower ) THEN
6597 IF( ( j - jc ).LE.( i - ic ) ) THEN
6598 CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6599 $ c( ioffc ), prec )
6600 ELSE
6601 erri = zero
6602 END IF
6603 ELSE
6604 CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6605 $ c( ioffc ), prec )
6606 END IF
6607*
6608 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6609 $ iic, jjc, icrow, iccol )
6610 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6611 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6612 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6613 IF( err0.GT.erri )
6614 $ info = 1
6615 err = max( err, err0 )
6616 END IF
6617*
6618 ioffa = ioffa + 1
6619 ioffc = ioffc + 1
6620*
6621 10 CONTINUE
6622*
6623 20 CONTINUE
6624*
6625 ELSE
6626*
6627 DO 40 j = jc, jc + n - 1
6628*
6629 ioffc = ic + ( j - 1 ) * ldc
6630 ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6631*
6632 DO 30 i = ic, ic + m - 1
6633*
6634 IF( upper ) THEN
6635 IF( ( j - jc ).GE.( i - ic ) ) THEN
6636 CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6637 $ c( ioffc ), prec )
6638 ELSE
6639 erri = zero
6640 END IF
6641 ELSE IF( lower ) THEN
6642 IF( ( j - jc ).LE.( i - ic ) ) THEN
6643 CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6644 $ c( ioffc ), prec )
6645 ELSE
6646 erri = zero
6647 END IF
6648 ELSE
6649 CALL pserraxpby( erri, alpha, a( ioffa ), beta,
6650 $ c( ioffc ), prec )
6651 END IF
6652*
6653 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6654 $ iic, jjc, icrow, iccol )
6655 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6656 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6657 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6658 IF( err0.GT.erri )
6659 $ info = 1
6660 err = max( err, err0 )
6661 END IF
6662*
6663 ioffc = ioffc + 1
6664 ioffa = ioffa + lda
6665*
6666 30 CONTINUE
6667*
6668 40 CONTINUE
6669*
6670 END IF
6671*
6672* If INFO = 0, all results are at least half accurate.
6673*
6674 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6675 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6676 $ mycol )
6677*
6678 RETURN
6679*
6680* End of PSMMCH3
6681*
6682 END
6683 SUBROUTINE pserraxpby( ERRBND, ALPHA, X, BETA, Y, PREC )
6684*
6685* -- PBLAS test routine (version 2.0) --
6686* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6687* and University of California, Berkeley.
6688* April 1, 1998
6689*
6690* .. Scalar Arguments ..
6691 REAL ALPHA, BETA, ERRBND, PREC, X, Y
6692* ..
6693*
6694* Purpose
6695* =======
6696*
6697* PSERRAXPBY serially computes y := beta*y + alpha * x and returns a
6698* scaled relative acceptable error bound on the result.
6699*
6700* Arguments
6701* =========
6702*
6703* ERRBND (global output) REAL
6704* On exit, ERRBND specifies the scaled relative acceptable er-
6705* ror bound.
6706*
6707* ALPHA (global input) REAL
6708* On entry, ALPHA specifies the scalar alpha.
6709*
6710* X (global input) REAL
6711* On entry, X specifies the scalar x to be scaled.
6712*
6713* BETA (global input) REAL
6714* On entry, BETA specifies the scalar beta.
6715*
6716* Y (global input/global output) REAL
6717* On entry, Y specifies the scalar y to be added. On exit, Y
6718* contains the resulting scalar y.
6719*
6720* PREC (global input) REAL
6721* On entry, PREC specifies the machine precision.
6722*
6723* -- Written on April 1, 1998 by
6724* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6725*
6726* =====================================================================
6727*
6728* .. Parameters ..
6729 REAL ONE, TWO, ZERO
6730 PARAMETER ( ONE = 1.0e+0, two = 2.0e+0,
6731 $ zero = 0.0e+0 )
6732* ..
6733* .. Local Scalars ..
6734 REAL ADDBND, FACT, SUMPOS, SUMNEG, TMP
6735* ..
6736* .. Intrinsic Functions ..
6737* ..
6738* .. Executable Statements ..
6739*
6740 SUMPOS = zero
6741 sumneg = zero
6742 fact = one + two * prec
6743 addbnd = two * two * two * prec
6744*
6745 tmp = alpha * x
6746 IF( tmp.GE.zero ) THEN
6747 sumpos = sumpos + tmp * fact
6748 ELSE
6749 sumneg = sumneg - tmp * fact
6750 END IF
6751*
6752 tmp = beta * y
6753 IF( tmp.GE.zero ) THEN
6754 sumpos = sumpos + tmp * fact
6755 ELSE
6756 sumneg = sumneg - tmp * fact
6757 END IF
6758*
6759 y = ( beta * y ) + ( alpha * x )
6760*
6761 errbnd = addbnd * max( sumpos, sumneg )
6762*
6763 RETURN
6764*
6765* End of PSERRAXPBY
6766*
6767 END
6768 REAL function pslamch( ictxt, cmach )
6769*
6770* -- PBLAS test routine (version 2.0) --
6771* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6772* and University of California, Berkeley.
6773* April 1, 1998
6774*
6775* .. Scalar Arguments ..
6776 CHARACTER*1 cmach
6777 INTEGER ictxt
6778* ..
6779*
6780* Purpose
6781* =======
6782*
6783* PSLAMCH determines single precision machine parameters.
6784*
6785* Arguments
6786* =========
6787*
6788* ICTXT (local input) INTEGER
6789* On entry, ICTXT specifies the BLACS context handle, indica-
6790* ting the global context of the operation. The context itself
6791* is global, but the value of ICTXT is local.
6792*
6793* CMACH (global input) CHARACTER*1
6794* On entry, CMACH specifies the value to be returned by PSLAMCH
6795* as follows:
6796* = 'E' or 'e', PSLAMCH := eps,
6797* = 'S' or 's , PSLAMCH := sfmin,
6798* = 'B' or 'b', PSLAMCH := base,
6799* = 'P' or 'p', PSLAMCH := eps*base,
6800* = 'N' or 'n', PSLAMCH := t,
6801* = 'R' or 'r', PSLAMCH := rnd,
6802* = 'M' or 'm', PSLAMCH := emin,
6803* = 'U' or 'u', PSLAMCH := rmin,
6804* = 'L' or 'l', PSLAMCH := emax,
6805* = 'O' or 'o', PSLAMCH := rmax,
6806*
6807* where
6808*
6809* eps = relative machine precision,
6810* sfmin = safe minimum, such that 1/sfmin does not overflow,
6811* base = base of the machine,
6812* prec = eps*base,
6813* t = number of (base) digits in the mantissa,
6814* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise,
6815* emin = minimum exponent before (gradual) underflow,
6816* rmin = underflow threshold - base**(emin-1),
6817* emax = largest exponent before overflow,
6818* rmax = overflow threshold - (base**emax)*(1-eps).
6819*
6820* -- Written on April 1, 1998 by
6821* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6822*
6823* =====================================================================
6824*
6825* .. Local Scalars ..
6826 CHARACTER*1 top
6827 INTEGER idumm
6828 REAL temp
6829* ..
6830* .. External Subroutines ..
6831 EXTERNAL pb_topget, sgamn2d, sgamx2d
6832* ..
6833* .. External Functions ..
6834 LOGICAL lsame
6835 REAL slamch
6836 EXTERNAL lsame, slamch
6837* ..
6838* .. Executable Statements ..
6839*
6840 temp = slamch( cmach )
6841*
6842 IF( lsame( cmach, 'E' ).OR.lsame( cmach, 'S' ).OR.
6843 $ lsame( cmach, 'M' ).OR.lsame( cmach, 'U' ) ) THEN
6844 CALL pb_topget( ictxt, 'Combine', 'All', top )
6845 idumm = 0
6846 CALL sgamx2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6847 $ idumm, -1, -1, idumm )
6848 ELSE IF( lsame( cmach, 'L' ).OR.lsame( cmach, 'O' ) ) THEN
6849 CALL pb_topget( ictxt, 'Combine', 'All', top )
6850 idumm = 0
6851 CALL sgamn2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6852 $ idumm, -1, -1, idumm )
6853 END IF
6854*
6855 pslamch = temp
6856*
6857 RETURN
6858*
6859* End of PSLAMCH
6860*
6861 END
6862 SUBROUTINE pslaset( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA )
6863*
6864* -- PBLAS test routine (version 2.0) --
6865* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6866* and University of California, Berkeley.
6867* April 1, 1998
6868*
6869* .. Scalar Arguments ..
6870 CHARACTER*1 UPLO
6871 INTEGER IA, JA, M, N
6872 REAL ALPHA, BETA
6873* ..
6874* .. Array Arguments ..
6875 INTEGER DESCA( * )
6876 REAL A( * )
6877* ..
6878*
6879* Purpose
6880* =======
6881*
6882* PSLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno-
6883* ted by sub( A ) to beta on the diagonal and alpha on the offdiago-
6884* nals.
6885*
6886* Notes
6887* =====
6888*
6889* A description vector is associated with each 2D block-cyclicly dis-
6890* tributed matrix. This vector stores the information required to
6891* establish the mapping between a matrix entry and its corresponding
6892* process and memory location.
6893*
6894* In the following comments, the character _ should be read as
6895* "of the distributed matrix". Let A be a generic term for any 2D
6896* block cyclicly distributed matrix. Its description vector is DESCA:
6897*
6898* NOTATION STORED IN EXPLANATION
6899* ---------------- --------------- ------------------------------------
6900* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6901* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6902* the NPROW x NPCOL BLACS process grid
6903* A is distributed over. The context
6904* itself is global, but the handle
6905* (the integer value) may vary.
6906* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6907* ted matrix A, M_A >= 0.
6908* N_A (global) DESCA( N_ ) The number of columns in the distri-
6909* buted matrix A, N_A >= 0.
6910* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6911* block of the matrix A, IMB_A > 0.
6912* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6913* left block of the matrix A,
6914* INB_A > 0.
6915* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6916* bute the last M_A-IMB_A rows of A,
6917* MB_A > 0.
6918* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6919* bute the last N_A-INB_A columns of
6920* A, NB_A > 0.
6921* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6922* row of the matrix A is distributed,
6923* NPROW > RSRC_A >= 0.
6924* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6925* first column of A is distributed.
6926* NPCOL > CSRC_A >= 0.
6927* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6928* array storing the local blocks of
6929* the distributed matrix A,
6930* IF( Lc( 1, N_A ) > 0 )
6931* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6932* ELSE
6933* LLD_A >= 1.
6934*
6935* Let K be the number of rows of a matrix A starting at the global in-
6936* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6937* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6938* receive if these K rows were distributed over NPROW processes. If K
6939* is the number of columns of a matrix A starting at the global index
6940* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6941* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6942* these K columns were distributed over NPCOL processes.
6943*
6944* The values of Lr() and Lc() may be determined via a call to the func-
6945* tion PB_NUMROC:
6946* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6947* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6948*
6949* Arguments
6950* =========
6951*
6952* UPLO (global input) CHARACTER*1
6953* On entry, UPLO specifies the part of the submatrix sub( A )
6954* to be set:
6955* = 'L' or 'l': Lower triangular part is set; the strictly
6956* upper triangular part of sub( A ) is not changed;
6957* = 'U' or 'u': Upper triangular part is set; the strictly
6958* lower triangular part of sub( A ) is not changed;
6959* Otherwise: All of the matrix sub( A ) is set.
6960*
6961* M (global input) INTEGER
6962* On entry, M specifies the number of rows of the submatrix
6963* sub( A ). M must be at least zero.
6964*
6965* N (global input) INTEGER
6966* On entry, N specifies the number of columns of the submatrix
6967* sub( A ). N must be at least zero.
6968*
6969* ALPHA (global input) REAL
6970* On entry, ALPHA specifies the scalar alpha, i.e., the cons-
6971* tant to which the offdiagonal elements are to be set.
6972*
6973* BETA (global input) REAL
6974* On entry, BETA specifies the scalar beta, i.e., the constant
6975* to which the diagonal elements are to be set.
6976*
6977* A (local input/local output) REAL array
6978* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
6979* at least Lc( 1, JA+N-1 ). Before entry, this array contains
6980* the local entries of the matrix A to be set. On exit, the
6981* leading m by n submatrix sub( A ) is set as follows:
6982*
6983* if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N,
6984* if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N,
6985* otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N,
6986* and IA+i.NE.JA+j,
6987* and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N).
6988*
6989* IA (global input) INTEGER
6990* On entry, IA specifies A's global row index, which points to
6991* the beginning of the submatrix sub( A ).
6992*
6993* JA (global input) INTEGER
6994* On entry, JA specifies A's global column index, which points
6995* to the beginning of the submatrix sub( A ).
6996*
6997* DESCA (global and local input) INTEGER array
6998* On entry, DESCA is an integer array of dimension DLEN_. This
6999* is the array descriptor for the matrix A.
7000*
7001* -- Written on April 1, 1998 by
7002* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7003*
7004* =====================================================================
7005*
7006* .. Parameters ..
7007 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7008 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7009 $ RSRC_
7010 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7011 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7012 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7013 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7014* ..
7015* .. Local Scalars ..
7016 LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER,
7017 $ UPPER
7018 INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7019 $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA,
7020 $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC,
7021 $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP,
7022 $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD,
7023 $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1,
7024 $ UPP
7025* ..
7026* .. Local Arrays ..
7027 INTEGER DESCA2( DLEN_ )
7028* ..
7029* .. External Subroutines ..
7030 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7032* ..
7033* .. External Functions ..
7034 LOGICAL LSAME
7035 EXTERNAL lsame
7036* ..
7037* .. Intrinsic Functions ..
7038 INTRINSIC min
7039* ..
7040* .. Executable Statements ..
7041*
7042 IF( m.EQ.0 .OR. n.EQ.0 )
7043 $ RETURN
7044*
7045* Convert descriptor
7046*
7047 CALL pb_desctrans( desca, desca2 )
7048*
7049* Get grid parameters
7050*
7051 ictxt = desca2( ctxt_ )
7052 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7053*
7054 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7055 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7056 $ iacol, mrrow, mrcol )
7057*
7058 IF( mp.LE.0 .OR. nq.LE.0 )
7059 $ RETURN
7060*
7061 isrowrep = ( desca2( rsrc_ ).LT.0 )
7062 iscolrep = ( desca2( csrc_ ).LT.0 )
7063 lda = desca2( lld_ )
7064*
7065 upper = .NOT.( lsame( uplo, 'L' ) )
7066 lower = .NOT.( lsame( uplo, 'U' ) )
7067*
7068 IF( ( ( lower.AND.upper ).AND.( alpha.EQ.beta ) ).OR.
7069 $ ( isrowrep .AND. iscolrep ) ) THEN
7070 IF( ( mp.GT.0 ).AND.( nq.GT.0 ) )
7071 $ CALL pb_slaset( uplo, mp, nq, 0, alpha, beta,
7072 $ a( iia + ( jja - 1 ) * lda ), lda )
7073 RETURN
7074 END IF
7075*
7076* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
7077* ILOW, LOW, IUPP, and UPP.
7078*
7079 mb = desca2( mb_ )
7080 nb = desca2( nb_ )
7081 CALL pb_binfo( 0, mp, nq, imb1, inb1, mb, nb, mrrow, mrcol,
7082 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
7083 $ lnbloc, ilow, low, iupp, upp )
7084*
7085 ioffa = iia - 1
7086 joffa = jja - 1
7087 iimax = ioffa + mp
7088 jjmax = joffa + nq
7089*
7090 IF( isrowrep ) THEN
7091 pmb = mb
7092 ELSE
7093 pmb = nprow * mb
7094 END IF
7095 IF( iscolrep ) THEN
7096 qnb = nb
7097 ELSE
7098 qnb = npcol * nb
7099 END IF
7100*
7101 m1 = mp
7102 n1 = nq
7103*
7104* Handle the first block of rows or columns separately, and update
7105* LCMT00, MBLKS and NBLKS.
7106*
7107 godown = ( lcmt00.GT.iupp )
7108 goleft = ( lcmt00.LT.ilow )
7109*
7110 IF( .NOT.godown .AND. .NOT.goleft ) THEN
7111*
7112* LCMT00 >= ILOW && LCMT00 <= IUPP
7113*
7114 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7115 godown = .NOT.goleft
7116*
7117 CALL pb_slaset( uplo, imbloc, inbloc, lcmt00, alpha, beta,
7118 $ a( iia+joffa*lda ), lda )
7119 IF( godown ) THEN
7120 IF( upper .AND. nq.GT.inbloc )
7121 $ CALL pb_slaset( 'All', imbloc, nq-inbloc, 0, alpha,
7122 $ alpha, a( iia+(joffa+inbloc)*lda ), lda )
7123 iia = iia + imbloc
7124 m1 = m1 - imbloc
7125 ELSE
7126 IF( lower .AND. mp.GT.imbloc )
7127 $ CALL pb_slaset( 'All', mp-imbloc, inbloc, 0, alpha,
7128 $ alpha, a( iia+imbloc+joffa*lda ), lda )
7129 jja = jja + inbloc
7130 n1 = n1 - inbloc
7131 END IF
7132*
7133 END IF
7134*
7135 IF( godown ) THEN
7136*
7137 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7138 mblks = mblks - 1
7139 ioffa = ioffa + imbloc
7140*
7141 10 CONTINUE
7142 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7143 lcmt00 = lcmt00 - pmb
7144 mblks = mblks - 1
7145 ioffa = ioffa + mb
7146 GO TO 10
7147 END IF
7148*
7149 tmp1 = min( ioffa, iimax ) - iia + 1
7150 IF( upper .AND. tmp1.GT.0 ) THEN
7151 CALL pb_slaset( 'All', tmp1, n1, 0, alpha, alpha,
7152 $ a( iia+joffa*lda ), lda )
7153 iia = iia + tmp1
7154 m1 = m1 - tmp1
7155 END IF
7156*
7157 IF( mblks.LE.0 )
7158 $ RETURN
7159*
7160 lcmt = lcmt00
7161 mblkd = mblks
7162 ioffd = ioffa
7163*
7164 mbloc = mb
7165 20 CONTINUE
7166 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7167 IF( mblkd.EQ.1 )
7168 $ mbloc = lmbloc
7169 CALL pb_slaset( uplo, mbloc, inbloc, lcmt, alpha, beta,
7170 $ a( ioffd+1+joffa*lda ), lda )
7171 lcmt00 = lcmt
7172 lcmt = lcmt - pmb
7173 mblks = mblkd
7174 mblkd = mblkd - 1
7175 ioffa = ioffd
7176 ioffd = ioffd + mbloc
7177 GO TO 20
7178 END IF
7179*
7180 tmp1 = m1 - ioffd + iia - 1
7181 IF( lower .AND. tmp1.GT.0 )
7182 $ CALL pb_slaset( 'ALL', tmp1, inbloc, 0, alpha, alpha,
7183 $ a( ioffd+1+joffa*lda ), lda )
7184*
7185 tmp1 = ioffa - iia + 1
7186 m1 = m1 - tmp1
7187 n1 = n1 - inbloc
7188 lcmt00 = lcmt00 + low - ilow + qnb
7189 nblks = nblks - 1
7190 joffa = joffa + inbloc
7191*
7192 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7193 $ CALL pb_slaset( 'ALL', tmp1, n1, 0, alpha, alpha,
7194 $ a( iia+joffa*lda ), lda )
7195*
7196 iia = ioffa + 1
7197 jja = joffa + 1
7198*
7199 ELSE IF( goleft ) THEN
7200*
7201 lcmt00 = lcmt00 + low - ilow + qnb
7202 nblks = nblks - 1
7203 joffa = joffa + inbloc
7204*
7205 30 CONTINUE
7206 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7207 lcmt00 = lcmt00 + qnb
7208 nblks = nblks - 1
7209 joffa = joffa + nb
7210 GO TO 30
7211 END IF
7212*
7213 tmp1 = min( joffa, jjmax ) - jja + 1
7214 IF( lower .AND. tmp1.GT.0 ) THEN
7215 CALL pb_slaset( 'All', m1, tmp1, 0, alpha, alpha,
7216 $ a( iia+(jja-1)*lda ), lda )
7217 jja = jja + tmp1
7218 n1 = n1 - tmp1
7219 END IF
7220*
7221 IF( nblks.LE.0 )
7222 $ RETURN
7223*
7224 lcmt = lcmt00
7225 nblkd = nblks
7226 joffd = joffa
7227*
7228 nbloc = nb
7229 40 CONTINUE
7230 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7231 IF( nblkd.EQ.1 )
7232 $ nbloc = lnbloc
7233 CALL pb_slaset( uplo, imbloc, nbloc, lcmt, alpha, beta,
7234 $ a( iia+joffd*lda ), lda )
7235 lcmt00 = lcmt
7236 lcmt = lcmt + qnb
7237 nblks = nblkd
7238 nblkd = nblkd - 1
7239 joffa = joffd
7240 joffd = joffd + nbloc
7241 GO TO 40
7242 END IF
7243*
7244 tmp1 = n1 - joffd + jja - 1
7245 IF( upper .AND. tmp1.GT.0 )
7246 $ CALL pb_slaset( 'All', imbloc, tmp1, 0, alpha, alpha,
7247 $ a( iia+joffd*lda ), lda )
7248*
7249 tmp1 = joffa - jja + 1
7250 m1 = m1 - imbloc
7251 n1 = n1 - tmp1
7252 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7253 mblks = mblks - 1
7254 ioffa = ioffa + imbloc
7255*
7256 IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7257 $ CALL pb_slaset( 'All', m1, tmp1, 0, alpha, alpha,
7258 $ a( ioffa+1+(jja-1)*lda ), lda )
7259*
7260 iia = ioffa + 1
7261 jja = joffa + 1
7262*
7263 END IF
7264*
7265 nbloc = nb
7266 50 CONTINUE
7267 IF( nblks.GT.0 ) THEN
7268 IF( nblks.EQ.1 )
7269 $ nbloc = lnbloc
7270 60 CONTINUE
7271 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7272 lcmt00 = lcmt00 - pmb
7273 mblks = mblks - 1
7274 ioffa = ioffa + mb
7275 GO TO 60
7276 END IF
7277*
7278 tmp1 = min( ioffa, iimax ) - iia + 1
7279 IF( upper .AND. tmp1.GT.0 ) THEN
7280 CALL pb_slaset( 'All', tmp1, n1, 0, alpha, alpha,
7281 $ a( iia+joffa*lda ), lda )
7282 iia = iia + tmp1
7283 m1 = m1 - tmp1
7284 END IF
7285*
7286 IF( mblks.LE.0 )
7287 $ RETURN
7288*
7289 lcmt = lcmt00
7290 mblkd = mblks
7291 ioffd = ioffa
7292*
7293 mbloc = mb
7294 70 CONTINUE
7295 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7296 IF( mblkd.EQ.1 )
7297 $ mbloc = lmbloc
7298 CALL pb_slaset( uplo, mbloc, nbloc, lcmt, alpha, beta,
7299 $ a( ioffd+1+joffa*lda ), lda )
7300 lcmt00 = lcmt
7301 lcmt = lcmt - pmb
7302 mblks = mblkd
7303 mblkd = mblkd - 1
7304 ioffa = ioffd
7305 ioffd = ioffd + mbloc
7306 GO TO 70
7307 END IF
7308*
7309 tmp1 = m1 - ioffd + iia - 1
7310 IF( lower .AND. tmp1.GT.0 )
7311 $ CALL pb_slaset( 'All', tmp1, nbloc, 0, alpha, alpha,
7312 $ a( ioffd+1+joffa*lda ), lda )
7313*
7314 tmp1 = min( ioffa, iimax ) - iia + 1
7315 m1 = m1 - tmp1
7316 n1 = n1 - nbloc
7317 lcmt00 = lcmt00 + qnb
7318 nblks = nblks - 1
7319 joffa = joffa + nbloc
7320*
7321 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7322 $ CALL pb_slaset( 'All', tmp1, n1, 0, alpha, alpha,
7323 $ a( iia+joffa*lda ), lda )
7324*
7325 iia = ioffa + 1
7326 jja = joffa + 1
7327*
7328 GO TO 50
7329*
7330 END IF
7331*
7332 RETURN
7333*
7334* End of PSLASET
7335*
7336 END
7337 SUBROUTINE pslascal( TYPE, M, N, ALPHA, A, IA, JA, DESCA )
7338*
7339* -- PBLAS test routine (version 2.0) --
7340* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7341* and University of California, Berkeley.
7342* April 1, 1998
7343*
7344* .. Scalar Arguments ..
7345 CHARACTER*1 TYPE
7346 INTEGER IA, JA, M, N
7347 REAL ALPHA
7348* ..
7349* .. Array Arguments ..
7350 INTEGER DESCA( * )
7351 REAL A( * )
7352* ..
7353*
7354* Purpose
7355* =======
7356*
7357* PSLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted
7358* by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full,
7359* upper triangular, lower triangular or upper Hessenberg.
7360*
7361* Notes
7362* =====
7363*
7364* A description vector is associated with each 2D block-cyclicly dis-
7365* tributed matrix. This vector stores the information required to
7366* establish the mapping between a matrix entry and its corresponding
7367* process and memory location.
7368*
7369* In the following comments, the character _ should be read as
7370* "of the distributed matrix". Let A be a generic term for any 2D
7371* block cyclicly distributed matrix. Its description vector is DESCA:
7372*
7373* NOTATION STORED IN EXPLANATION
7374* ---------------- --------------- ------------------------------------
7375* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7376* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7377* the NPROW x NPCOL BLACS process grid
7378* A is distributed over. The context
7379* itself is global, but the handle
7380* (the integer value) may vary.
7381* M_A (global) DESCA( M_ ) The number of rows in the distribu-
7382* ted matrix A, M_A >= 0.
7383* N_A (global) DESCA( N_ ) The number of columns in the distri-
7384* buted matrix A, N_A >= 0.
7385* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7386* block of the matrix A, IMB_A > 0.
7387* INB_A (global) DESCA( INB_ ) The number of columns of the upper
7388* left block of the matrix A,
7389* INB_A > 0.
7390* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7391* bute the last M_A-IMB_A rows of A,
7392* MB_A > 0.
7393* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7394* bute the last N_A-INB_A columns of
7395* A, NB_A > 0.
7396* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7397* row of the matrix A is distributed,
7398* NPROW > RSRC_A >= 0.
7399* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7400* first column of A is distributed.
7401* NPCOL > CSRC_A >= 0.
7402* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7403* array storing the local blocks of
7404* the distributed matrix A,
7405* IF( Lc( 1, N_A ) > 0 )
7406* LLD_A >= MAX( 1, Lr( 1, M_A ) )
7407* ELSE
7408* LLD_A >= 1.
7409*
7410* Let K be the number of rows of a matrix A starting at the global in-
7411* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7412* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7413* receive if these K rows were distributed over NPROW processes. If K
7414* is the number of columns of a matrix A starting at the global index
7415* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7416* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7417* these K columns were distributed over NPCOL processes.
7418*
7419* The values of Lr() and Lc() may be determined via a call to the func-
7420* tion PB_NUMROC:
7421* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7422* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7423*
7424* Arguments
7425* =========
7426*
7427* TYPE (global input) CHARACTER*1
7428* On entry, TYPE specifies the type of the input submatrix as
7429* follows:
7430* = 'L' or 'l': sub( A ) is a lower triangular matrix,
7431* = 'U' or 'u': sub( A ) is an upper triangular matrix,
7432* = 'H' or 'h': sub( A ) is an upper Hessenberg matrix,
7433* otherwise sub( A ) is a full matrix.
7434*
7435* M (global input) INTEGER
7436* On entry, M specifies the number of rows of the submatrix
7437* sub( A ). M must be at least zero.
7438*
7439* N (global input) INTEGER
7440* On entry, N specifies the number of columns of the submatrix
7441* sub( A ). N must be at least zero.
7442*
7443* ALPHA (global input) REAL
7444* On entry, ALPHA specifies the scalar alpha.
7445*
7446* A (local input/local output) REAL array
7447* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7448* at least Lc( 1, JA+N-1 ). Before entry, this array contains
7449* the local entries of the matrix A.
7450* On exit, the local entries of this array corresponding to the
7451* to the entries of the submatrix sub( A ) are overwritten by
7452* the local entries of the m by n scaled submatrix.
7453*
7454* IA (global input) INTEGER
7455* On entry, IA specifies A's global row index, which points to
7456* the beginning of the submatrix sub( A ).
7457*
7458* JA (global input) INTEGER
7459* On entry, JA specifies A's global column index, which points
7460* to the beginning of the submatrix sub( A ).
7461*
7462* DESCA (global and local input) INTEGER array
7463* On entry, DESCA is an integer array of dimension DLEN_. This
7464* is the array descriptor for the matrix A.
7465*
7466* -- Written on April 1, 1998 by
7467* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7468*
7469* =====================================================================
7470*
7471* .. Parameters ..
7472 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7473 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7474 $ RSRC_
7475 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7476 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7477 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7478 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7479* ..
7480* .. Local Scalars ..
7481 CHARACTER*1 UPLO
7482 LOGICAL GODOWN, GOLEFT, LOWER, UPPER
7483 INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7484 $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE,
7485 $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00,
7486 $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS,
7487 $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB,
7488 $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB,
7489 $ QNB, TMP1, UPP
7490* ..
7491* .. Local Arrays ..
7492 INTEGER DESCA2( DLEN_ )
7493* ..
7494* .. External Subroutines ..
7495 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7497* ..
7498* .. External Functions ..
7499 LOGICAL LSAME
7500 INTEGER PB_NUMROC
7501 EXTERNAL lsame, pb_numroc
7502* ..
7503* .. Intrinsic Functions ..
7504 INTRINSIC min
7505* ..
7506* .. Executable Statements ..
7507*
7508* Convert descriptor
7509*
7510 CALL pb_desctrans( desca, desca2 )
7511*
7512* Get grid parameters
7513*
7514 ictxt = desca2( ctxt_ )
7515 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7516*
7517* Quick return if possible
7518*
7519 IF( m.EQ.0 .OR. n.EQ.0 )
7520 $ RETURN
7521*
7522 IF( lsame( TYPE, 'L' ) ) then
7523 itype = 1
7524 uplo = TYPE
7525 upper = .false.
7526 lower = .true.
7527 ioffd = 0
7528 ELSE IF( lsame( TYPE, 'U' ) ) then
7529 itype = 2
7530 uplo = TYPE
7531 upper = .true.
7532 lower = .false.
7533 ioffd = 0
7534 ELSE IF( lsame( TYPE, 'H' ) ) then
7535 itype = 3
7536 uplo = 'U'
7537 upper = .true.
7538 lower = .false.
7539 ioffd = 1
7540 ELSE
7541 itype = 0
7542 uplo = 'A'
7543 upper = .true.
7544 lower = .true.
7545 ioffd = 0
7546 END IF
7547*
7548* Compute local indexes
7549*
7550 IF( itype.EQ.0 ) THEN
7551*
7552* Full matrix
7553*
7554 CALL pb_infog2l( ia, ja, desca2, nprow, npcol, myrow, mycol,
7555 $ iia, jja, iarow, iacol )
7556 mp = pb_numroc( m, ia, desca2( imb_ ), desca2( mb_ ), myrow,
7557 $ desca2( rsrc_ ), nprow )
7558 nq = pb_numroc( n, ja, desca2( inb_ ), desca2( nb_ ), mycol,
7559 $ desca2( csrc_ ), npcol )
7560*
7561 IF( mp.LE.0 .OR. nq.LE.0 )
7562 $ RETURN
7563*
7564 lda = desca2( lld_ )
7565 ioffa = iia + ( jja - 1 ) * lda
7566*
7567 CALL pb_slascal( 'All', mp, nq, 0, alpha, a( ioffa ), lda )
7568*
7569 ELSE
7570*
7571* Trapezoidal matrix
7572*
7573 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7574 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7575 $ iacol, mrrow, mrcol )
7576*
7577 IF( mp.LE.0 .OR. nq.LE.0 )
7578 $ RETURN
7579*
7580* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC,
7581* LNBLOC, ILOW, LOW, IUPP, and UPP.
7582*
7583 mb = desca2( mb_ )
7584 nb = desca2( nb_ )
7585 lda = desca2( lld_ )
7586*
7587 CALL pb_binfo( ioffd, mp, nq, imb1, inb1, mb, nb, mrrow,
7588 $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
7589 $ lmbloc, lnbloc, ilow, low, iupp, upp )
7590*
7591 m1 = mp
7592 n1 = nq
7593 ioffa = iia - 1
7594 joffa = jja - 1
7595 iimax = ioffa + mp
7596 jjmax = joffa + nq
7597*
7598 IF( desca2( rsrc_ ).LT.0 ) THEN
7599 pmb = mb
7600 ELSE
7601 pmb = nprow * mb
7602 END IF
7603 IF( desca2( csrc_ ).LT.0 ) THEN
7604 qnb = nb
7605 ELSE
7606 qnb = npcol * nb
7607 END IF
7608*
7609* Handle the first block of rows or columns separately, and
7610* update LCMT00, MBLKS and NBLKS.
7611*
7612 godown = ( lcmt00.GT.iupp )
7613 goleft = ( lcmt00.LT.ilow )
7614*
7615 IF( .NOT.godown .AND. .NOT.goleft ) THEN
7616*
7617* LCMT00 >= ILOW && LCMT00 <= IUPP
7618*
7619 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7620 godown = .NOT.goleft
7621*
7622 CALL pb_slascal( uplo, imbloc, inbloc, lcmt00, alpha,
7623 $ a( iia+joffa*lda ), lda )
7624 IF( godown ) THEN
7625 IF( upper .AND. nq.GT.inbloc )
7626 $ CALL pb_slascal( 'All', imbloc, nq-inbloc, 0, alpha,
7627 $ a( iia+(joffa+inbloc)*lda ), lda )
7628 iia = iia + imbloc
7629 m1 = m1 - imbloc
7630 ELSE
7631 IF( lower .AND. mp.GT.imbloc )
7632 $ CALL pb_slascal( 'All', mp-imbloc, inbloc, 0, alpha,
7633 $ a( iia+imbloc+joffa*lda ), lda )
7634 jja = jja + inbloc
7635 n1 = n1 - inbloc
7636 END IF
7637*
7638 END IF
7639*
7640 IF( godown ) THEN
7641*
7642 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7643 mblks = mblks - 1
7644 ioffa = ioffa + imbloc
7645*
7646 10 CONTINUE
7647 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7648 lcmt00 = lcmt00 - pmb
7649 mblks = mblks - 1
7650 ioffa = ioffa + mb
7651 GO TO 10
7652 END IF
7653*
7654 tmp1 = min( ioffa, iimax ) - iia + 1
7655 IF( upper .AND. tmp1.GT.0 ) THEN
7656 CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7657 $ a( iia+joffa*lda ), lda )
7658 iia = iia + tmp1
7659 m1 = m1 - tmp1
7660 END IF
7661*
7662 IF( mblks.LE.0 )
7663 $ RETURN
7664*
7665 lcmt = lcmt00
7666 mblkd = mblks
7667 ioffd = ioffa
7668*
7669 mbloc = mb
7670 20 CONTINUE
7671 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7672 IF( mblkd.EQ.1 )
7673 $ mbloc = lmbloc
7674 CALL pb_slascal( uplo, mbloc, inbloc, lcmt, alpha,
7675 $ a( ioffd+1+joffa*lda ), lda )
7676 lcmt00 = lcmt
7677 lcmt = lcmt - pmb
7678 mblks = mblkd
7679 mblkd = mblkd - 1
7680 ioffa = ioffd
7681 ioffd = ioffd + mbloc
7682 GO TO 20
7683 END IF
7684*
7685 tmp1 = m1 - ioffd + iia - 1
7686 IF( lower .AND. tmp1.GT.0 )
7687 $ CALL pb_slascal( 'All', tmp1, inbloc, 0, alpha,
7688 $ a( ioffd+1+joffa*lda ), lda )
7689*
7690 tmp1 = ioffa - iia + 1
7691 m1 = m1 - tmp1
7692 n1 = n1 - inbloc
7693 lcmt00 = lcmt00 + low - ilow + qnb
7694 nblks = nblks - 1
7695 joffa = joffa + inbloc
7696*
7697 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7698 $ CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7699 $ a( iia+joffa*lda ), lda )
7700*
7701 iia = ioffa + 1
7702 jja = joffa + 1
7703*
7704 ELSE IF( goleft ) THEN
7705*
7706 lcmt00 = lcmt00 + low - ilow + qnb
7707 nblks = nblks - 1
7708 joffa = joffa + inbloc
7709*
7710 30 CONTINUE
7711 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7712 lcmt00 = lcmt00 + qnb
7713 nblks = nblks - 1
7714 joffa = joffa + nb
7715 GO TO 30
7716 END IF
7717*
7718 tmp1 = min( joffa, jjmax ) - jja + 1
7719 IF( lower .AND. tmp1.GT.0 ) THEN
7720 CALL pb_slascal( 'All', m1, tmp1, 0, alpha,
7721 $ a( iia+(jja-1)*lda ), lda )
7722 jja = jja + tmp1
7723 n1 = n1 - tmp1
7724 END IF
7725*
7726 IF( nblks.LE.0 )
7727 $ RETURN
7728*
7729 lcmt = lcmt00
7730 nblkd = nblks
7731 joffd = joffa
7732*
7733 nbloc = nb
7734 40 CONTINUE
7735 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7736 IF( nblkd.EQ.1 )
7737 $ nbloc = lnbloc
7738 CALL pb_slascal( uplo, imbloc, nbloc, lcmt, alpha,
7739 $ a( iia+joffd*lda ), lda )
7740 lcmt00 = lcmt
7741 lcmt = lcmt + qnb
7742 nblks = nblkd
7743 nblkd = nblkd - 1
7744 joffa = joffd
7745 joffd = joffd + nbloc
7746 GO TO 40
7747 END IF
7748*
7749 tmp1 = n1 - joffd + jja - 1
7750 IF( upper .AND. tmp1.GT.0 )
7751 $ CALL pb_slascal( 'All', imbloc, tmp1, 0, alpha,
7752 $ a( iia+joffd*lda ), lda )
7753*
7754 tmp1 = joffa - jja + 1
7755 m1 = m1 - imbloc
7756 n1 = n1 - tmp1
7757 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7758 mblks = mblks - 1
7759 ioffa = ioffa + imbloc
7760*
7761 IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7762 $ CALL pb_slascal( 'All', m1, tmp1, 0, alpha,
7763 $ a( ioffa+1+(jja-1)*lda ), lda )
7764*
7765 iia = ioffa + 1
7766 jja = joffa + 1
7767*
7768 END IF
7769*
7770 nbloc = nb
7771 50 CONTINUE
7772 IF( nblks.GT.0 ) THEN
7773 IF( nblks.EQ.1 )
7774 $ nbloc = lnbloc
7775 60 CONTINUE
7776 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7777 lcmt00 = lcmt00 - pmb
7778 mblks = mblks - 1
7779 ioffa = ioffa + mb
7780 GO TO 60
7781 END IF
7782*
7783 tmp1 = min( ioffa, iimax ) - iia + 1
7784 IF( upper .AND. tmp1.GT.0 ) THEN
7785 CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7786 $ a( iia+joffa*lda ), lda )
7787 iia = iia + tmp1
7788 m1 = m1 - tmp1
7789 END IF
7790*
7791 IF( mblks.LE.0 )
7792 $ RETURN
7793*
7794 lcmt = lcmt00
7795 mblkd = mblks
7796 ioffd = ioffa
7797*
7798 mbloc = mb
7799 70 CONTINUE
7800 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7801 IF( mblkd.EQ.1 )
7802 $ mbloc = lmbloc
7803 CALL pb_slascal( uplo, mbloc, nbloc, lcmt, alpha,
7804 $ a( ioffd+1+joffa*lda ), lda )
7805 lcmt00 = lcmt
7806 lcmt = lcmt - pmb
7807 mblks = mblkd
7808 mblkd = mblkd - 1
7809 ioffa = ioffd
7810 ioffd = ioffd + mbloc
7811 GO TO 70
7812 END IF
7813*
7814 tmp1 = m1 - ioffd + iia - 1
7815 IF( lower .AND. tmp1.GT.0 )
7816 $ CALL pb_slascal( 'All', tmp1, nbloc, 0, alpha,
7817 $ a( ioffd+1+joffa*lda ), lda )
7818*
7819 tmp1 = min( ioffa, iimax ) - iia + 1
7820 m1 = m1 - tmp1
7821 n1 = n1 - nbloc
7822 lcmt00 = lcmt00 + qnb
7823 nblks = nblks - 1
7824 joffa = joffa + nbloc
7825*
7826 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7827 $ CALL pb_slascal( 'All', tmp1, n1, 0, alpha,
7828 $ a( iia+joffa*lda ), lda )
7829*
7830 iia = ioffa + 1
7831 jja = joffa + 1
7832*
7833 GO TO 50
7834*
7835 END IF
7836*
7837 END IF
7838*
7839 RETURN
7840*
7841* End of PSLASCAL
7842*
7843 END
7844 SUBROUTINE pslagen( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA,
7845 $ DESCA, IASEED, A, LDA )
7846*
7847* -- PBLAS test routine (version 2.0) --
7848* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7849* and University of California, Berkeley.
7850* April 1, 1998
7851*
7852* .. Scalar Arguments ..
7853 LOGICAL inplace
7854 CHARACTER*1 aform, diag
7855 INTEGER ia, iaseed, ja, lda, m, n, offa
7856* ..
7857* .. Array Arguments ..
7858 INTEGER desca( * )
7859 REAL A( LDA, * )
7860* ..
7861*
7862* Purpose
7863* =======
7864*
7865* PSLAGEN generates (or regenerates) a submatrix sub( A ) denoting
7866* A(IA:IA+M-1,JA:JA+N-1).
7867*
7868* Notes
7869* =====
7870*
7871* A description vector is associated with each 2D block-cyclicly dis-
7872* tributed matrix. This vector stores the information required to
7873* establish the mapping between a matrix entry and its corresponding
7874* process and memory location.
7875*
7876* In the following comments, the character _ should be read as
7877* "of the distributed matrix". Let A be a generic term for any 2D
7878* block cyclicly distributed matrix. Its description vector is DESCA:
7879*
7880* NOTATION STORED IN EXPLANATION
7881* ---------------- --------------- ------------------------------------
7882* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7883* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7884* the NPROW x NPCOL BLACS process grid
7885* A is distributed over. The context
7886* itself is global, but the handle
7887* (the integer value) may vary.
7888* M_A (global) DESCA( M_ ) The number of rows in the distribu-
7889* ted matrix A, M_A >= 0.
7890* N_A (global) DESCA( N_ ) The number of columns in the distri-
7891* buted matrix A, N_A >= 0.
7892* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7893* block of the matrix A, IMB_A > 0.
7894* INB_A (global) DESCA( INB_ ) The number of columns of the upper
7895* left block of the matrix A,
7896* INB_A > 0.
7897* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7898* bute the last M_A-IMB_A rows of A,
7899* MB_A > 0.
7900* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7901* bute the last N_A-INB_A columns of
7902* A, NB_A > 0.
7903* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7904* row of the matrix A is distributed,
7905* NPROW > RSRC_A >= 0.
7906* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7907* first column of A is distributed.
7908* NPCOL > CSRC_A >= 0.
7909* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7910* array storing the local blocks of
7911* the distributed matrix A,
7912* IF( Lc( 1, N_A ) > 0 )
7913* LLD_A >= MAX( 1, Lr( 1, M_A ) )
7914* ELSE
7915* LLD_A >= 1.
7916*
7917* Let K be the number of rows of a matrix A starting at the global in-
7918* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7919* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7920* receive if these K rows were distributed over NPROW processes. If K
7921* is the number of columns of a matrix A starting at the global index
7922* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7923* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7924* these K columns were distributed over NPCOL processes.
7925*
7926* The values of Lr() and Lc() may be determined via a call to the func-
7927* tion PB_NUMROC:
7928* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7929* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7930*
7931* Arguments
7932* =========
7933*
7934* INPLACE (global input) LOGICAL
7935* On entry, INPLACE specifies if the matrix should be generated
7936* in place or not. If INPLACE is .TRUE., the local random array
7937* to be generated will start in memory at the local memory lo-
7938* cation A( 1, 1 ), otherwise it will start at the local posi-
7939* tion induced by IA and JA.
7940*
7941* AFORM (global input) CHARACTER*1
7942* On entry, AFORM specifies the type of submatrix to be genera-
7943* ted as follows:
7944* AFORM = 'S', sub( A ) is a symmetric matrix,
7945* AFORM = 'H', sub( A ) is a Hermitian matrix,
7946* AFORM = 'T', sub( A ) is overrwritten with the transpose
7947* of what would normally be generated,
7948* AFORM = 'C', sub( A ) is overwritten with the conjugate
7949* transpose of what would normally be genera-
7950* ted.
7951* AFORM = 'N', a random submatrix is generated.
7952*
7953* DIAG (global input) CHARACTER*1
7954* On entry, DIAG specifies if the generated submatrix is diago-
7955* nally dominant or not as follows:
7956* DIAG = 'D' : sub( A ) is diagonally dominant,
7957* DIAG = 'N' : sub( A ) is not diagonally dominant.
7958*
7959* OFFA (global input) INTEGER
7960* On entry, OFFA specifies the offdiagonal of the underlying
7961* matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma-
7962* trix is symmetric, Hermitian or diagonally dominant. OFFA = 0
7963* specifies the main diagonal, OFFA > 0 specifies a subdiago-
7964* nal, and OFFA < 0 specifies a superdiagonal (see further de-
7965* tails).
7966*
7967* M (global input) INTEGER
7968* On entry, M specifies the global number of matrix rows of the
7969* submatrix sub( A ) to be generated. M must be at least zero.
7970*
7971* N (global input) INTEGER
7972* On entry, N specifies the global number of matrix columns of
7973* the submatrix sub( A ) to be generated. N must be at least
7974* zero.
7975*
7976* IA (global input) INTEGER
7977* On entry, IA specifies A's global row index, which points to
7978* the beginning of the submatrix sub( A ).
7979*
7980* JA (global input) INTEGER
7981* On entry, JA specifies A's global column index, which points
7982* to the beginning of the submatrix sub( A ).
7983*
7984* DESCA (global and local input) INTEGER array
7985* On entry, DESCA is an integer array of dimension DLEN_. This
7986* is the array descriptor for the matrix A.
7987*
7988* IASEED (global input) INTEGER
7989* On entry, IASEED specifies the seed number to generate the
7990* matrix A. IASEED must be at least zero.
7991*
7992* A (local output) REAL array
7993* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7994* at least Lc( 1, JA+N-1 ). On exit, this array contains the
7995* local entries of the randomly generated submatrix sub( A ).
7996*
7997* LDA (local input) INTEGER
7998* On entry, LDA specifies the local leading dimension of the
7999* array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_).
8000* This restriction is however not enforced, and this subroutine
8001* requires only that LDA >= MAX( 1, Mp ) where
8002*
8003* Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ).
8004*
8005* PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW
8006* and NPCOL can be determined by calling the BLACS subroutine
8007* BLACS_GRIDINFO.
8008*
8009* Further Details
8010* ===============
8011*
8012* OFFD is tied to the matrix described by DESCA, as opposed to the
8013* piece that is currently (re)generated. This is a global information
8014* independent from the distribution parameters. Below are examples of
8015* the meaning of OFFD for a global 7 by 5 matrix:
8016*
8017* ---------------------------------------------------------------------
8018* OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4
8019* -------|-------------------------------------------------------------
8020* | | OFFD=-1 | OFFD=0 OFFD=2
8021* | V V
8022* 0 | . d . . . -> d . . . . . . . . .
8023* 1 | . . d . . . d . . . . . . . .
8024* 2 | . . . d . . . d . . -> d . . . .
8025* 3 | . . . . d . . . d . . d . . .
8026* 4 | . . . . . . . . . d . . d . .
8027* 5 | . . . . . . . . . . . . . d .
8028* 6 | . . . . . . . . . . . . . . d
8029* ---------------------------------------------------------------------
8030*
8031* -- Written on April 1, 1998 by
8032* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8033*
8034* =====================================================================
8035*
8036* .. Parameters ..
8037 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8038 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8039 $ RSRC_
8040 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8041 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8042 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8043 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8044 INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
8045 $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
8046 $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
8047 PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
8048 $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
8049 $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
8050 $ jmp_nqnb = 10, jmp_nqinbloc = 11,
8051 $ jmp_len = 11 )
8052* ..
8053* .. Local Scalars ..
8054 LOGICAL DIAGDO, SYMM, HERM, NOTRAN
8055 INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK,
8056 $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB,
8057 $ inb1, inbloc, inbvir, info, ioffda, itmp, iupp,
8058 $ ivir, jja, jlocblk, jlocoff, jvir, lcmt00,
8059 $ lmbloc, lnbloc, low, maxmn, mb, mblks, mp,
8060 $ mrcol, mrrow, mycdist, mycol, myrdist, myrow,
8061 $ nb, nblks, npcol, nprow, nq, nvir, rsrc, upp
8062 REAL ALPHA
8063* ..
8064* .. Local Arrays ..
8065 INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ),
8066 $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 )
8067* ..
8068* .. External Subroutines ..
8069 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8073 $ pxerbla
8074* ..
8075* .. External Functions ..
8076 LOGICAL LSAME
8077 EXTERNAL LSAME
8078* ..
8079* .. Intrinsic Functions ..
8080 INTRINSIC MAX, MIN, REAL
8081* ..
8082* .. Data Statements ..
8083 DATA ( muladd0( i ), i = 1, 4 ) / 20077, 16838,
8084 $ 12345, 0 /
8085* ..
8086* .. Executable Statements ..
8087*
8088* Convert descriptor
8089*
8090 CALL pb_desctrans( desca, desca2 )
8091*
8092* Test the input arguments
8093*
8094 ictxt = desca2( ctxt_ )
8095 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8096*
8097* Test the input parameters
8098*
8099 info = 0
8100 IF( nprow.EQ.-1 ) THEN
8101 info = -( 1000 + ctxt_ )
8102 ELSE
8103 symm = lsame( aform, 'S' )
8104 herm = lsame( aform, 'H' )
8105 notran = lsame( aform, 'N' )
8106 diagdo = lsame( diag, 'D' )
8107 IF( .NOT.( symm.OR.herm.OR.notran ) .AND.
8108 $ .NOT.( lsame( aform, 'T' ) ) .AND.
8109 $ .NOT.( lsame( aform, 'C' ) ) ) THEN
8110 info = -2
8111 ELSE IF( ( .NOT.diagdo ) .AND.
8112 $ ( .NOT.lsame( diag, 'N' ) ) ) THEN
8113 info = -3
8114 END IF
8115 CALL pb_chkmat( ictxt, m, 5, n, 6, ia, ja, desca2, 10, info )
8116 END IF
8117*
8118 IF( info.NE.0 ) THEN
8119 CALL pxerbla( ictxt, 'PSLAGEN', -info )
8120 RETURN
8121 END IF
8122*
8123* Quick return if possible
8124*
8125 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8126 $ RETURN
8127*
8128* Start the operations
8129*
8130 mb = desca2( mb_ )
8131 nb = desca2( nb_ )
8132 imb = desca2( imb_ )
8133 inb = desca2( inb_ )
8134 rsrc = desca2( rsrc_ )
8135 csrc = desca2( csrc_ )
8136*
8137* Figure out local information about the distributed matrix operand
8138*
8139 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
8140 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
8141 $ iacol, mrrow, mrcol )
8142*
8143* Decide where the entries shall be stored in memory
8144*
8145 IF( inplace ) THEN
8146 iia = 1
8147 jja = 1
8148 END IF
8149*
8150* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8151* ILOW, LOW, IUPP, and UPP.
8152*
8153 ioffda = ja + offa - ia
8154 CALL pb_binfo( ioffda, mp, nq, imb1, inb1, mb, nb, mrrow,
8155 $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
8156 $ lmbloc, lnbloc, ilow, low, iupp, upp )
8157*
8158* Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST
8159* This values correspond to the square virtual underlying matrix
8160* of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used
8161* to set up the random sequence. For practical purposes, the size
8162* of this virtual matrix is upper bounded by M_ + N_ - 1.
8163*
8164 itmp = max( 0, -offa )
8165 ivir = ia + itmp
8166 imbvir = imb + itmp
8167 nvir = desca2( m_ ) + itmp
8168*
8169 CALL pb_locinfo( ivir, imbvir, mb, myrow, rsrc, nprow, ilocblk,
8170 $ ilocoff, myrdist )
8171*
8172 itmp = max( 0, offa )
8173 jvir = ja + itmp
8174 inbvir = inb + itmp
8175 nvir = max( max( nvir, desca2( n_ ) + itmp ),
8176 $ desca2( m_ ) + desca2( n_ ) - 1 )
8177*
8178 CALL pb_locinfo( jvir, inbvir, nb, mycol, csrc, npcol, jlocblk,
8179 $ jlocoff, mycdist )
8180*
8181 IF( symm .OR. herm .OR. notran ) THEN
8182*
8183 CALL pb_initjmp( .true., nvir, imbvir, inbvir, imbloc, inbloc,
8184 $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8185*
8186* Compute constants to jump JMP( * ) numbers in the sequence
8187*
8188 CALL pb_initmuladd( muladd0, jmp, imuladd )
8189*
8190* Compute and set the random value corresponding to A( IA, JA )
8191*
8192 CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8193 $ myrdist, mycdist, nprow, npcol, jmp,
8194 $ imuladd, iran )
8195*
8196 CALL pb_slagen( 'Lower', aform, a( iia, jja ), lda, lcmt00,
8197 $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8198 $ nb, lnbloc, jmp, imuladd )
8199*
8200 END IF
8201*
8202 IF( symm .OR. herm .OR. ( .NOT. notran ) ) THEN
8203*
8204 CALL pb_initjmp( .false., nvir, imbvir, inbvir, imbloc, inbloc,
8205 $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8206*
8207* Compute constants to jump JMP( * ) numbers in the sequence
8208*
8209 CALL pb_initmuladd( muladd0, jmp, imuladd )
8210*
8211* Compute and set the random value corresponding to A( IA, JA )
8212*
8213 CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8214 $ myrdist, mycdist, nprow, npcol, jmp,
8215 $ imuladd, iran )
8216*
8217 CALL pb_slagen( 'Upper', aform, a( iia, jja ), lda, lcmt00,
8218 $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8219 $ nb, lnbloc, jmp, imuladd )
8220*
8221 END IF
8222*
8223 IF( diagdo ) THEN
8224*
8225 maxmn = max( desca2( m_ ), desca2( n_ ) )
8226 alpha = real( maxmn )
8227*
8228 IF( ioffda.GE.0 ) THEN
8229 CALL psladom( inplace, min( max( 0, m-ioffda ), n ), alpha,
8230 $ a, min( ia+ioffda, ia+m-1 ), ja, desca )
8231 ELSE
8232 CALL psladom( inplace, min( m, max( 0, n+ioffda ) ), alpha,
8233 $ a, ia, min( ja-ioffda, ja+n-1 ), desca )
8234 END IF
8235*
8236 END IF
8237*
8238 RETURN
8239*
8240* End of PSLAGEN
8241*
8242 END
8243 SUBROUTINE psladom( INPLACE, N, ALPHA, A, IA, JA, DESCA )
8244*
8245* -- PBLAS test routine (version 2.0) --
8246* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8247* and University of California, Berkeley.
8248* April 1, 1998
8249*
8250* .. Scalar Arguments ..
8251 LOGICAL INPLACE
8252 INTEGER IA, JA, N
8253 REAL ALPHA
8254* ..
8255* .. Array Arguments ..
8256 INTEGER DESCA( * )
8257 REAL A( * )
8258* ..
8259*
8260* Purpose
8261* =======
8262*
8263* PSLADOM adds alpha to the diagonal entries of an n by n submatrix
8264* sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ).
8265*
8266* Notes
8267* =====
8268*
8269* A description vector is associated with each 2D block-cyclicly dis-
8270* tributed matrix. This vector stores the information required to
8271* establish the mapping between a matrix entry and its corresponding
8272* process and memory location.
8273*
8274* In the following comments, the character _ should be read as
8275* "of the distributed matrix". Let A be a generic term for any 2D
8276* block cyclicly distributed matrix. Its description vector is DESCA:
8277*
8278* NOTATION STORED IN EXPLANATION
8279* ---------------- --------------- ------------------------------------
8280* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8281* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8282* the NPROW x NPCOL BLACS process grid
8283* A is distributed over. The context
8284* itself is global, but the handle
8285* (the integer value) may vary.
8286* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8287* ted matrix A, M_A >= 0.
8288* N_A (global) DESCA( N_ ) The number of columns in the distri-
8289* buted matrix A, N_A >= 0.
8290* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8291* block of the matrix A, IMB_A > 0.
8292* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8293* left block of the matrix A,
8294* INB_A > 0.
8295* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8296* bute the last M_A-IMB_A rows of A,
8297* MB_A > 0.
8298* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8299* bute the last N_A-INB_A columns of
8300* A, NB_A > 0.
8301* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8302* row of the matrix A is distributed,
8303* NPROW > RSRC_A >= 0.
8304* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8305* first column of A is distributed.
8306* NPCOL > CSRC_A >= 0.
8307* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8308* array storing the local blocks of
8309* the distributed matrix A,
8310* IF( Lc( 1, N_A ) > 0 )
8311* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8312* ELSE
8313* LLD_A >= 1.
8314*
8315* Let K be the number of rows of a matrix A starting at the global in-
8316* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8317* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8318* receive if these K rows were distributed over NPROW processes. If K
8319* is the number of columns of a matrix A starting at the global index
8320* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8321* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8322* these K columns were distributed over NPCOL processes.
8323*
8324* The values of Lr() and Lc() may be determined via a call to the func-
8325* tion PB_NUMROC:
8326* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8327* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8328*
8329* Arguments
8330* =========
8331*
8332* INPLACE (global input) LOGICAL
8333* On entry, INPLACE specifies if the matrix should be generated
8334* in place or not. If INPLACE is .TRUE., the local random array
8335* to be generated will start in memory at the local memory lo-
8336* cation A( 1, 1 ), otherwise it will start at the local posi-
8337* tion induced by IA and JA.
8338*
8339* N (global input) INTEGER
8340* On entry, N specifies the global order of the submatrix
8341* sub( A ) to be modified. N must be at least zero.
8342*
8343* ALPHA (global input) REAL
8344* On entry, ALPHA specifies the scalar alpha.
8345*
8346* A (local input/local output) REAL array
8347* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8348* at least Lc( 1, JA+N-1 ). Before entry, this array contains
8349* the local entries of the matrix A. On exit, the local entries
8350* of this array corresponding to the main diagonal of sub( A )
8351* have been updated.
8352*
8353* IA (global input) INTEGER
8354* On entry, IA specifies A's global row index, which points to
8355* the beginning of the submatrix sub( A ).
8356*
8357* JA (global input) INTEGER
8358* On entry, JA specifies A's global column index, which points
8359* to the beginning of the submatrix sub( A ).
8360*
8361* DESCA (global and local input) INTEGER array
8362* On entry, DESCA is an integer array of dimension DLEN_. This
8363* is the array descriptor for the matrix A.
8364*
8365* -- Written on April 1, 1998 by
8366* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8367*
8368* =====================================================================
8369*
8370* .. Parameters ..
8371 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8372 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8373 $ RSRC_
8374 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8375 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8376 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8377 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8378* ..
8379* .. Local Scalars ..
8380 LOGICAL GODOWN, GOLEFT
8381 INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW,
8382 $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP,
8383 $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1,
8384 $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC,
8385 $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS,
8386 $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP
8387 REAL ATMP
8388* ..
8389* .. Local Scalars ..
8390 INTEGER DESCA2( DLEN_ )
8391* ..
8392* .. External Subroutines ..
8393 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8394 $ pb_desctrans
8395* ..
8396* .. Intrinsic Functions ..
8397 INTRINSIC abs, max, min
8398* ..
8399* .. Executable Statements ..
8400*
8401* Convert descriptor
8402*
8403 CALL pb_desctrans( desca, desca2 )
8404*
8405* Get grid parameters
8406*
8407 ictxt = desca2( ctxt_ )
8408 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8409*
8410 IF( n.EQ.0 )
8411 $ RETURN
8412*
8413 CALL pb_ainfog2l( n, n, ia, ja, desca2, nprow, npcol, myrow,
8414 $ mycol, imb1, inb1, np, nq, iia, jja, iarow,
8415 $ iacol, mrrow, mrcol )
8416*
8417* Decide where the entries shall be stored in memory
8418*
8419 IF( inplace ) THEN
8420 iia = 1
8421 jja = 1
8422 END IF
8423*
8424* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8425* ILOW, LOW, IUPP, and UPP.
8426*
8427 mb = desca2( mb_ )
8428 nb = desca2( nb_ )
8429*
8430 CALL pb_binfo( 0, np, nq, imb1, inb1, mb, nb, mrrow, mrcol,
8431 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
8432 $ lnbloc, ilow, low, iupp, upp )
8433*
8434 ioffa = iia - 1
8435 joffa = jja - 1
8436 lda = desca2( lld_ )
8437 ldap1 = lda + 1
8438*
8439 IF( desca2( rsrc_ ).LT.0 ) THEN
8440 pmb = mb
8441 ELSE
8442 pmb = nprow * mb
8443 END IF
8444 IF( desca2( csrc_ ).LT.0 ) THEN
8445 qnb = nb
8446 ELSE
8447 qnb = npcol * nb
8448 END IF
8449*
8450* Handle the first block of rows or columns separately, and update
8451* LCMT00, MBLKS and NBLKS.
8452*
8453 godown = ( lcmt00.GT.iupp )
8454 goleft = ( lcmt00.LT.ilow )
8455*
8456 IF( .NOT.godown .AND. .NOT.goleft ) THEN
8457*
8458* LCMT00 >= ILOW && LCMT00 <= IUPP
8459*
8460 IF( lcmt00.GE.0 ) THEN
8461 ijoffa = ioffa+lcmt00 + ( joffa - 1 ) * lda
8462 DO 10 i = 1, min( inbloc, max( 0, imbloc - lcmt00 ) )
8463 atmp = a( ijoffa + i*ldap1 )
8464 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8465 10 CONTINUE
8466 ELSE
8467 ijoffa = ioffa + ( joffa - lcmt00 - 1 ) * lda
8468 DO 20 i = 1, min( imbloc, max( 0, inbloc + lcmt00 ) )
8469 atmp = a( ijoffa + i*ldap1 )
8470 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8471 20 CONTINUE
8472 END IF
8473 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
8474 godown = .NOT.goleft
8475*
8476 END IF
8477*
8478 IF( godown ) THEN
8479*
8480 lcmt00 = lcmt00 - ( iupp - upp + pmb )
8481 mblks = mblks - 1
8482 ioffa = ioffa + imbloc
8483*
8484 30 CONTINUE
8485 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8486 lcmt00 = lcmt00 - pmb
8487 mblks = mblks - 1
8488 ioffa = ioffa + mb
8489 GO TO 30
8490 END IF
8491*
8492 lcmt = lcmt00
8493 mblkd = mblks
8494 ioffd = ioffa
8495*
8496 mbloc = mb
8497 40 CONTINUE
8498 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
8499 IF( mblkd.EQ.1 )
8500 $ mbloc = lmbloc
8501 IF( lcmt.GE.0 ) THEN
8502 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8503 DO 50 i = 1, min( inbloc, max( 0, mbloc - lcmt ) )
8504 atmp = a( ijoffa + i*ldap1 )
8505 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8506 50 CONTINUE
8507 ELSE
8508 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8509 DO 60 i = 1, min( mbloc, max( 0, inbloc + lcmt ) )
8510 atmp = a( ijoffa + i*ldap1 )
8511 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8512 60 CONTINUE
8513 END IF
8514 lcmt00 = lcmt
8515 lcmt = lcmt - pmb
8516 mblks = mblkd
8517 mblkd = mblkd - 1
8518 ioffa = ioffd
8519 ioffd = ioffd + mbloc
8520 GO TO 40
8521 END IF
8522*
8523 lcmt00 = lcmt00 + low - ilow + qnb
8524 nblks = nblks - 1
8525 joffa = joffa + inbloc
8526*
8527 ELSE IF( goleft ) THEN
8528*
8529 lcmt00 = lcmt00 + low - ilow + qnb
8530 nblks = nblks - 1
8531 joffa = joffa + inbloc
8532*
8533 70 CONTINUE
8534 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
8535 lcmt00 = lcmt00 + qnb
8536 nblks = nblks - 1
8537 joffa = joffa + nb
8538 GO TO 70
8539 END IF
8540*
8541 lcmt = lcmt00
8542 nblkd = nblks
8543 joffd = joffa
8544*
8545 nbloc = nb
8546 80 CONTINUE
8547 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
8548 IF( nblkd.EQ.1 )
8549 $ nbloc = lnbloc
8550 IF( lcmt.GE.0 ) THEN
8551 ijoffa = ioffa + lcmt + ( joffd - 1 ) * lda
8552 DO 90 i = 1, min( nbloc, max( 0, imbloc - lcmt ) )
8553 atmp = a( ijoffa + i*ldap1 )
8554 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8555 90 CONTINUE
8556 ELSE
8557 ijoffa = ioffa + ( joffd - lcmt - 1 ) * lda
8558 DO 100 i = 1, min( imbloc, max( 0, nbloc + lcmt ) )
8559 atmp = a( ijoffa + i*ldap1 )
8560 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8561 100 CONTINUE
8562 END IF
8563 lcmt00 = lcmt
8564 lcmt = lcmt + qnb
8565 nblks = nblkd
8566 nblkd = nblkd - 1
8567 joffa = joffd
8568 joffd = joffd + nbloc
8569 GO TO 80
8570 END IF
8571*
8572 lcmt00 = lcmt00 - ( iupp - upp + pmb )
8573 mblks = mblks - 1
8574 ioffa = ioffa + imbloc
8575*
8576 END IF
8577*
8578 nbloc = nb
8579 110 CONTINUE
8580 IF( nblks.GT.0 ) THEN
8581 IF( nblks.EQ.1 )
8582 $ nbloc = lnbloc
8583 120 CONTINUE
8584 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8585 lcmt00 = lcmt00 - pmb
8586 mblks = mblks - 1
8587 ioffa = ioffa + mb
8588 GO TO 120
8589 END IF
8590*
8591 lcmt = lcmt00
8592 mblkd = mblks
8593 ioffd = ioffa
8594*
8595 mbloc = mb
8596 130 CONTINUE
8597 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
8598 IF( mblkd.EQ.1 )
8599 $ mbloc = lmbloc
8600 IF( lcmt.GE.0 ) THEN
8601 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8602 DO 140 i = 1, min( nbloc, max( 0, mbloc - lcmt ) )
8603 atmp = a( ijoffa + i*ldap1 )
8604 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8605 140 CONTINUE
8606 ELSE
8607 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8608 DO 150 i = 1, min( mbloc, max( 0, nbloc + lcmt ) )
8609 atmp = a( ijoffa + i*ldap1 )
8610 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8611 150 CONTINUE
8612 END IF
8613 lcmt00 = lcmt
8614 lcmt = lcmt - pmb
8615 mblks = mblkd
8616 mblkd = mblkd - 1
8617 ioffa = ioffd
8618 ioffd = ioffd + mbloc
8619 GO TO 130
8620 END IF
8621*
8622 lcmt00 = lcmt00 + qnb
8623 nblks = nblks - 1
8624 joffa = joffa + nbloc
8625 GO TO 110
8626*
8627 END IF
8628*
8629 RETURN
8630*
8631* End of PSLADOM
8632*
8633 END
8634 SUBROUTINE pb_pslaprnt( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8635 $ CMATNM, NOUT, WORK )
8636*
8637* -- PBLAS test routine (version 2.0) --
8638* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8639* and University of California, Berkeley.
8640* April 1, 1998
8641*
8642* .. Scalar Arguments ..
8643 INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT
8644* ..
8645* .. Array Arguments ..
8646 CHARACTER*(*) CMATNM
8647 INTEGER DESCA( * )
8648 REAL A( * ), WORK( * )
8649* ..
8650*
8651* Purpose
8652* =======
8653*
8654* PB_PSLAPRNT prints to the standard output a submatrix sub( A ) deno-
8655* ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by
8656* the process of coordinates (IRPRNT, ICPRNT).
8657*
8658* Notes
8659* =====
8660*
8661* A description vector is associated with each 2D block-cyclicly dis-
8662* tributed matrix. This vector stores the information required to
8663* establish the mapping between a matrix entry and its corresponding
8664* process and memory location.
8665*
8666* In the following comments, the character _ should be read as
8667* "of the distributed matrix". Let A be a generic term for any 2D
8668* block cyclicly distributed matrix. Its description vector is DESCA:
8669*
8670* NOTATION STORED IN EXPLANATION
8671* ---------------- --------------- ------------------------------------
8672* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8673* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8674* the NPROW x NPCOL BLACS process grid
8675* A is distributed over. The context
8676* itself is global, but the handle
8677* (the integer value) may vary.
8678* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8679* ted matrix A, M_A >= 0.
8680* N_A (global) DESCA( N_ ) The number of columns in the distri-
8681* buted matrix A, N_A >= 0.
8682* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8683* block of the matrix A, IMB_A > 0.
8684* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8685* left block of the matrix A,
8686* INB_A > 0.
8687* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8688* bute the last M_A-IMB_A rows of A,
8689* MB_A > 0.
8690* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8691* bute the last N_A-INB_A columns of
8692* A, NB_A > 0.
8693* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8694* row of the matrix A is distributed,
8695* NPROW > RSRC_A >= 0.
8696* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8697* first column of A is distributed.
8698* NPCOL > CSRC_A >= 0.
8699* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8700* array storing the local blocks of
8701* the distributed matrix A,
8702* IF( Lc( 1, N_A ) > 0 )
8703* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8704* ELSE
8705* LLD_A >= 1.
8706*
8707* Let K be the number of rows of a matrix A starting at the global in-
8708* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8709* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8710* receive if these K rows were distributed over NPROW processes. If K
8711* is the number of columns of a matrix A starting at the global index
8712* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8713* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8714* these K columns were distributed over NPCOL processes.
8715*
8716* The values of Lr() and Lc() may be determined via a call to the func-
8717* tion PB_NUMROC:
8718* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8719* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8720*
8721* Arguments
8722* =========
8723*
8724* M (global input) INTEGER
8725* On entry, M specifies the number of rows of the submatrix
8726* sub( A ). M must be at least zero.
8727*
8728* N (global input) INTEGER
8729* On entry, N specifies the number of columns of the submatrix
8730* sub( A ). N must be at least zero.
8731*
8732* A (local input) REAL array
8733* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8734* at least Lc( 1, JA+N-1 ). Before entry, this array contains
8735* the local entries of the matrix A.
8736*
8737* IA (global input) INTEGER
8738* On entry, IA specifies A's global row index, which points to
8739* the beginning of the submatrix sub( A ).
8740*
8741* JA (global input) INTEGER
8742* On entry, JA specifies A's global column index, which points
8743* to the beginning of the submatrix sub( A ).
8744*
8745* DESCA (global and local input) INTEGER array
8746* On entry, DESCA is an integer array of dimension DLEN_. This
8747* is the array descriptor for the matrix A.
8748*
8749* IRPRNT (global input) INTEGER
8750* On entry, IRPRNT specifies the row index of the printing pro-
8751* cess.
8752*
8753* ICPRNT (global input) INTEGER
8754* On entry, ICPRNT specifies the column index of the printing
8755* process.
8756*
8757* CMATNM (global input) CHARACTER*(*)
8758* On entry, CMATNM is the name of the matrix to be printed.
8759*
8760* NOUT (global input) INTEGER
8761* On entry, NOUT specifies the output unit number. When NOUT is
8762* equal to 6, the submatrix is printed on the screen.
8763*
8764* WORK (local workspace) REAL array
8765* On entry, WORK is a work array of dimension at least equal to
8766* MAX( IMB_A, MB_A ).
8767*
8768* -- Written on April 1, 1998 by
8769* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8770*
8771* =====================================================================
8772*
8773* .. Parameters ..
8774 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8775 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8776 $ RSRC_
8777 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8778 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8779 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8780 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8781* ..
8782* .. Local Scalars ..
8783 INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW
8784* ..
8785* .. Local Arrays ..
8786 INTEGER DESCA2( DLEN_ )
8787* ..
8788* .. External Subroutines ..
8789 EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PSLAPRN2
8790* ..
8791* .. Executable Statements ..
8792*
8793* Quick return if possible
8794*
8795 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8796 $ RETURN
8797*
8798* Convert descriptor
8799*
8800 CALL pb_desctrans( desca, desca2 )
8801*
8802 CALL blacs_gridinfo( desca2( ctxt_ ), nprow, npcol, myrow, mycol )
8803*
8804 IF( desca2( rsrc_ ).GE.0 ) THEN
8805 IF( desca2( csrc_ ).GE.0 ) THEN
8806 CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt, icprnt,
8807 $ cmatnm, nout, desca2( rsrc_ ),
8808 $ desca2( csrc_ ), work )
8809 ELSE
8810 DO 10 pcol = 0, npcol - 1
8811 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8812 $ WRITE( nout, * ) 'Colum-replicated array -- ' ,
8813 $ 'copy in process column: ', pcol
8814 CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt,
8815 $ icprnt, cmatnm, nout, desca2( rsrc_ ),
8816 $ pcol, work )
8817 10 CONTINUE
8818 END IF
8819 ELSE
8820 IF( desca2( csrc_ ).GE.0 ) THEN
8821 DO 20 prow = 0, nprow - 1
8822 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8823 $ WRITE( nout, * ) 'Row-replicated array -- ' ,
8824 $ 'copy in process row: ', prow
8825 CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt,
8826 $ icprnt, cmatnm, nout, prow,
8827 $ desca2( csrc_ ), work )
8828 20 CONTINUE
8829 ELSE
8830 DO 40 prow = 0, nprow - 1
8831 DO 30 pcol = 0, npcol - 1
8832 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8833 $ WRITE( nout, * ) 'Replicated array -- ' ,
8834 $ 'copy in process (', prow, ',', pcol, ')'
8835 CALL pb_pslaprn2( m, n, a, ia, ja, desca2, irprnt,
8836 $ icprnt, cmatnm, nout, prow, pcol,
8837 $ work )
8838 30 CONTINUE
8839 40 CONTINUE
8840 END IF
8841 END IF
8842*
8843 RETURN
8844*
8845* End of PB_PSLAPRNT
8846*
8847 END
8848 SUBROUTINE pb_pslaprn2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8849 $ CMATNM, NOUT, PROW, PCOL, WORK )
8850*
8851* -- PBLAS test routine (version 2.0) --
8852* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8853* and University of California, Berkeley.
8854* April 1, 1998
8855*
8856* .. Scalar Arguments ..
8857 INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW
8858* ..
8859* .. Array Arguments ..
8860 CHARACTER*(*) CMATNM
8861 INTEGER DESCA( * )
8862 REAL A( * ), WORK( * )
8863* ..
8864*
8865* .. Parameters ..
8866 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8867 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8868 $ RSRC_
8869 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
8870 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8871 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8872 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8873* ..
8874* .. Local Scalars ..
8875 LOGICAL AISCOLREP, AISROWREP
8876 INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
8877 $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
8878 $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW
8879* ..
8880* .. External Subroutines ..
8881 EXTERNAL blacs_barrier, blacs_gridinfo, pb_infog2l,
8882 $ sgerv2d, sgesd2d
8883* ..
8884* .. Intrinsic Functions ..
8885 INTRINSIC min
8886* ..
8887* .. Executable Statements ..
8888*
8889* Get grid parameters
8890*
8891 ictxt = desca( ctxt_ )
8892 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8893 CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
8894 $ iia, jja, iarow, iacol )
8895 ii = iia
8896 jj = jja
8897 IF( desca( rsrc_ ).LT.0 ) THEN
8898 aisrowrep = .true.
8899 iarow = prow
8900 icurrow = prow
8901 ELSE
8902 aisrowrep = .false.
8903 icurrow = iarow
8904 END IF
8905 IF( desca( csrc_ ).LT.0 ) THEN
8906 aiscolrep = .true.
8907 iacol = pcol
8908 icurcol = pcol
8909 ELSE
8910 aiscolrep = .false.
8911 icurcol = iacol
8912 END IF
8913 lda = desca( lld_ )
8914 ldw = max( desca( imb_ ), desca( mb_ ) )
8915*
8916* Handle the first block of column separately
8917*
8918 jb = desca( inb_ ) - ja + 1
8919 IF( jb.LE.0 )
8920 $ jb = ( (-jb) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
8921 jb = min( jb, n )
8922 jn = ja+jb-1
8923 DO 60 h = 0, jb-1
8924 ib = desca( imb_ ) - ia + 1
8925 IF( ib.LE.0 )
8926 $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
8927 ib = min( ib, m )
8928 in = ia+ib-1
8929 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8930 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8931 DO 10 k = 0, ib-1
8932 WRITE( nout, fmt = 9999 )
8933 $ cmatnm, ia+k, ja+h, a( ii+k+(jj+h-1)*lda )
8934 10 CONTINUE
8935 END IF
8936 ELSE
8937 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8938 CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ), lda,
8939 $ irprnt, icprnt )
8940 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8941 CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow, icurcol )
8942 DO 20 k = 1, ib
8943 WRITE( nout, fmt = 9999 )
8944 $ cmatnm, ia+k-1, ja+h, work( k )
8945 20 CONTINUE
8946 END IF
8947 END IF
8948 IF( myrow.EQ.icurrow )
8949 $ ii = ii + ib
8950 IF( .NOT.aisrowrep )
8951 $ icurrow = mod( icurrow+1, nprow )
8952 CALL blacs_barrier( ictxt, 'All' )
8953*
8954* Loop over remaining block of rows
8955*
8956 DO 50 i = in+1, ia+m-1, desca( mb_ )
8957 ib = min( desca( mb_ ), ia+m-i )
8958 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8959 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8960 DO 30 k = 0, ib-1
8961 WRITE( nout, fmt = 9999 )
8962 $ cmatnm, i+k, ja+h, a( ii+k+(jj+h-1)*lda )
8963 30 CONTINUE
8964 END IF
8965 ELSE
8966 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8967 CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
8968 $ lda, irprnt, icprnt )
8969 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8970 CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow,
8971 $ icurcol )
8972 DO 40 k = 1, ib
8973 WRITE( nout, fmt = 9999 )
8974 $ cmatnm, i+k-1, ja+h, work( k )
8975 40 CONTINUE
8976 END IF
8977 END IF
8978 IF( myrow.EQ.icurrow )
8979 $ ii = ii + ib
8980 IF( .NOT.aisrowrep )
8981 $ icurrow = mod( icurrow+1, nprow )
8982 CALL blacs_barrier( ictxt, 'All' )
8983 50 CONTINUE
8984*
8985 ii = iia
8986 icurrow = iarow
8987 60 CONTINUE
8988*
8989 IF( mycol.EQ.icurcol )
8990 $ jj = jj + jb
8991 IF( .NOT.aiscolrep )
8992 $ icurcol = mod( icurcol+1, npcol )
8993 CALL blacs_barrier( ictxt, 'All' )
8994*
8995* Loop over remaining column blocks
8996*
8997 DO 130 j = jn+1, ja+n-1, desca( nb_ )
8998 jb = min( desca( nb_ ), ja+n-j )
8999 DO 120 h = 0, jb-1
9000 ib = desca( imb_ )-ia+1
9001 IF( ib.LE.0 )
9002 $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
9003 ib = min( ib, m )
9004 in = ia+ib-1
9005 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9006 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9007 DO 70 k = 0, ib-1
9008 WRITE( nout, fmt = 9999 )
9009 $ cmatnm, ia+k, j+h, a( ii+k+(jj+h-1)*lda )
9010 70 CONTINUE
9011 END IF
9012 ELSE
9013 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9014 CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9015 $ lda, irprnt, icprnt )
9016 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9017 CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9018 $ icurcol )
9019 DO 80 k = 1, ib
9020 WRITE( nout, fmt = 9999 )
9021 $ cmatnm, ia+k-1, j+h, work( k )
9022 80 CONTINUE
9023 END IF
9024 END IF
9025 IF( myrow.EQ.icurrow )
9026 $ ii = ii + ib
9027 icurrow = mod( icurrow+1, nprow )
9028 CALL blacs_barrier( ictxt, 'All' )
9029*
9030* Loop over remaining block of rows
9031*
9032 DO 110 i = in+1, ia+m-1, desca( mb_ )
9033 ib = min( desca( mb_ ), ia+m-i )
9034 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9035 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9036 DO 90 k = 0, ib-1
9037 WRITE( nout, fmt = 9999 )
9038 $ cmatnm, i+k, j+h, a( ii+k+(jj+h-1)*lda )
9039 90 CONTINUE
9040 END IF
9041 ELSE
9042 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9043 CALL sgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9044 $ lda, irprnt, icprnt )
9045 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9046 CALL sgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9047 $ icurcol )
9048 DO 100 k = 1, ib
9049 WRITE( nout, fmt = 9999 )
9050 $ cmatnm, i+k-1, j+h, work( k )
9051 100 CONTINUE
9052 END IF
9053 END IF
9054 IF( myrow.EQ.icurrow )
9055 $ ii = ii + ib
9056 IF( .NOT.aisrowrep )
9057 $ icurrow = mod( icurrow+1, nprow )
9058 CALL blacs_barrier( ictxt, 'All' )
9059 110 CONTINUE
9060*
9061 ii = iia
9062 icurrow = iarow
9063 120 CONTINUE
9064*
9065 IF( mycol.EQ.icurcol )
9066 $ jj = jj + jb
9067 IF( .NOT.aiscolrep )
9068 $ icurcol = mod( icurcol+1, npcol )
9069 CALL blacs_barrier( ictxt, 'All' )
9070*
9071 130 CONTINUE
9072*
9073 9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8 )
9074*
9075 RETURN
9076*
9077* End of PB_PSLAPRN2
9078*
9079 END
9080 SUBROUTINE pb_sfillpad( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL )
9081*
9082* -- PBLAS test routine (version 2.0) --
9083* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9084* and University of California, Berkeley.
9085* April 1, 1998
9086*
9087* .. Scalar Arguments ..
9088 INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9089 REAL CHKVAL
9090* ..
9091* .. Array Arguments ..
9092 REAL A( * )
9093* ..
9094*
9095* Purpose
9096* =======
9097*
9098* PB_SFILLPAD surrounds a two dimensional local array with a guard-zone
9099* initialized to the value CHKVAL. The user may later call the routine
9100* PB_SCHEKPAD to discover if the guardzone has been violated. There are
9101* three guardzones. The first is a buffer of size IPRE that is before
9102* the start of the array. The second is the buffer of size IPOST which
9103* is after the end of the array to be padded. Finally, there is a guard
9104* zone inside every column of the array to be padded, in the elements
9105* of A(M+1:LDA, J).
9106*
9107* Arguments
9108* =========
9109*
9110* ICTXT (local input) INTEGER
9111* On entry, ICTXT specifies the BLACS context handle, indica-
9112* ting the global context of the operation. The context itself
9113* is global, but the value of ICTXT is local.
9114*
9115* M (local input) INTEGER
9116* On entry, M specifies the number of rows in the local array
9117* A. M must be at least zero.
9118*
9119* N (local input) INTEGER
9120* On entry, N specifies the number of columns in the local ar-
9121* ray A. N must be at least zero.
9122*
9123* A (local input/local output) REAL array
9124* On entry, A is an array of dimension (LDA,N). On exit, this
9125* array is the padded array.
9126*
9127* LDA (local input) INTEGER
9128* On entry, LDA specifies the leading dimension of the local
9129* array to be padded. LDA must be at least MAX( 1, M ).
9130*
9131* IPRE (local input) INTEGER
9132* On entry, IPRE specifies the size of the guard zone to put
9133* before the start of the padded array.
9134*
9135* IPOST (local input) INTEGER
9136* On entry, IPOST specifies the size of the guard zone to put
9137* after the end of the padded array.
9138*
9139* CHKVAL (local input) REAL
9140* On entry, CHKVAL specifies the value to pad the array with.
9141*
9142* -- Written on April 1, 1998 by
9143* R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9144*
9145* =====================================================================
9146*
9147* .. Local Scalars ..
9148 INTEGER I, J, K
9149* ..
9150* .. Executable Statements ..
9151*
9152* Put check buffer in front of A
9153*
9154 IF( IPRE.GT.0 ) THEN
9155 DO 10 I = 1, ipre
9156 a( i ) = chkval
9157 10 CONTINUE
9158 ELSE
9159 WRITE( *, fmt = '(A)' )
9160 $ 'WARNING no pre-guardzone in PB_SFILLPAD'
9161 END IF
9162*
9163* Put check buffer in back of A
9164*
9165 IF( ipost.GT.0 ) THEN
9166 j = ipre+lda*n+1
9167 DO 20 i = j, j+ipost-1
9168 a( i ) = chkval
9169 20 CONTINUE
9170 ELSE
9171 WRITE( *, fmt = '(A)' )
9172 $ 'WARNING no post-guardzone in PB_SFILLPAD'
9173 END IF
9174*
9175* Put check buffer in all (LDA-M) gaps
9176*
9177 IF( lda.GT.m ) THEN
9178 k = ipre + m + 1
9179 DO 40 j = 1, n
9180 DO 30 i = k, k + ( lda - m ) - 1
9181 a( i ) = chkval
9182 30 CONTINUE
9183 k = k + lda
9184 40 CONTINUE
9185 END IF
9186*
9187 RETURN
9188*
9189* End of PB_SFILLPAD
9190*
9191 END
9192 SUBROUTINE pb_schekpad( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST,
9193 $ CHKVAL )
9194*
9195* -- PBLAS test routine (version 2.0) --
9196* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9197* and University of California, Berkeley.
9198* April 1, 1998
9199*
9200* .. Scalar Arguments ..
9201 INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9202 REAL CHKVAL
9203* ..
9204* .. Array Arguments ..
9205 CHARACTER*(*) MESS
9206 REAL A( * )
9207* ..
9208*
9209* Purpose
9210* =======
9211*
9212* PB_SCHEKPAD checks that the padding around a local array has not been
9213* overwritten since the call to PB_SFILLPAD. Three types of errors are
9214* reported:
9215*
9216* 1) Overwrite in pre-guardzone. This indicates a memory overwrite has
9217* occurred in the first IPRE elements which form a buffer before the
9218* beginning of A. Therefore, the error message:
9219* 'Overwrite in pre-guardzone: loc( 5) = 18.00000'
9220* tells that the 5th element of the IPRE long buffer has been overwrit-
9221* ten with the value 18, where it should still have the value CHKVAL.
9222*
9223* 2) Overwrite in post-guardzone. This indicates a memory overwrite has
9224* occurred in the last IPOST elements which form a buffer after the end
9225* of A. Error reports are refered from the end of A. Therefore,
9226* 'Overwrite in post-guardzone: loc( 19) = 24.00000'
9227* tells that the 19th element after the end of A was overwritten with
9228* the value 24, where it should still have the value of CHKVAL.
9229*
9230* 3) Overwrite in lda-m gap. Tells you elements between M and LDA were
9231* overwritten. So,
9232* 'Overwrite in lda-m gap: A( 12, 3) = 22.00000'
9233* tells that the element at the 12th row and 3rd column of A was over-
9234* written with the value of 22, where it should still have the value of
9235* CHKVAL.
9236*
9237* Arguments
9238* =========
9239*
9240* ICTXT (local input) INTEGER
9241* On entry, ICTXT specifies the BLACS context handle, indica-
9242* ting the global context of the operation. The context itself
9243* is global, but the value of ICTXT is local.
9244*
9245* MESS (local input) CHARACTER*(*)
9246* On entry, MESS is a ttring containing a user-defined message.
9247*
9248* M (local input) INTEGER
9249* On entry, M specifies the number of rows in the local array
9250* A. M must be at least zero.
9251*
9252* N (local input) INTEGER
9253* On entry, N specifies the number of columns in the local ar-
9254* ray A. N must be at least zero.
9255*
9256* A (local input) REAL array
9257* On entry, A is an array of dimension (LDA,N).
9258*
9259* LDA (local input) INTEGER
9260* On entry, LDA specifies the leading dimension of the local
9261* array to be padded. LDA must be at least MAX( 1, M ).
9262*
9263* IPRE (local input) INTEGER
9264* On entry, IPRE specifies the size of the guard zone to put
9265* before the start of the padded array.
9266*
9267* IPOST (local input) INTEGER
9268* On entry, IPOST specifies the size of the guard zone to put
9269* after the end of the padded array.
9270*
9271* CHKVAL (local input) REAL
9272* On entry, CHKVAL specifies the value to pad the array with.
9273*
9274*
9275* -- Written on April 1, 1998 by
9276* R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9277*
9278* =====================================================================
9279*
9280* .. Local Scalars ..
9281 CHARACTER*1 TOP
9282 INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL,
9283 $ NPROW
9284* ..
9285* .. External Subroutines ..
9286 EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET
9287* ..
9288* .. Executable Statements ..
9289*
9290* Get grid parameters
9291*
9292 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
9293 IAM = myrow*npcol + mycol
9294 info = -1
9295*
9296* Check buffer in front of A
9297*
9298 IF( ipre.GT.0 ) THEN
9299 DO 10 i = 1, ipre
9300 IF( a( i ).NE.chkval ) THEN
9301 WRITE( *, fmt = 9998 ) myrow, mycol, mess, ' pre', i,
9302 $ a( i )
9303 info = iam
9304 END IF
9305 10 CONTINUE
9306 ELSE
9307 WRITE( *, fmt = * ) 'WARNING no pre-guardzone in PB_SCHEKPAD'
9308 END IF
9309*
9310* Check buffer after A
9311*
9312 IF( ipost.GT.0 ) THEN
9313 j = ipre+lda*n+1
9314 DO 20 i = j, j+ipost-1
9315 IF( a( i ).NE.chkval ) THEN
9316 WRITE( *, fmt = 9998 ) myrow, mycol, mess, 'post',
9317 $ i-j+1, a( i )
9318 info = iam
9319 END IF
9320 20 CONTINUE
9321 ELSE
9322 WRITE( *, fmt = * )
9323 $ 'WARNING no post-guardzone buffer in PB_SCHEKPAD'
9324 END IF
9325*
9326* Check all (LDA-M) gaps
9327*
9328 IF( lda.GT.m ) THEN
9329 k = ipre + m + 1
9330 DO 40 j = 1, n
9331 DO 30 i = k, k + (lda-m) - 1
9332 IF( a( i ).NE.chkval ) THEN
9333 WRITE( *, fmt = 9997 ) myrow, mycol, mess,
9334 $ i-ipre-lda*(j-1), j, a( i )
9335 info = iam
9336 END IF
9337 30 CONTINUE
9338 k = k + lda
9339 40 CONTINUE
9340 END IF
9341*
9342 CALL pb_topget( ictxt, 'Combine', 'All', top )
9343 CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, idumm, idumm, -1,
9344 $ 0, 0 )
9345 IF( iam.EQ.0 .AND. info.GE.0 ) THEN
9346 WRITE( *, fmt = 9999 ) info / npcol, mod( info, npcol ), mess
9347 END IF
9348*
9349 9999 FORMAT( '{', i5, ',', i5, '}: Memory overwrite in ', a )
9350 9998 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9351 $ a4, '-guardzone: loc(', i3, ') = ', g11.4 )
9352 9997 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9353 $ 'lda-m gap: loc(', i3, ',', i3, ') = ', g11.4 )
9354*
9355 RETURN
9356*
9357* End of PB_SCHEKPAD
9358*
9359 END
9360 SUBROUTINE pb_slaset( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA )
9361*
9362* -- PBLAS test routine (version 2.0) --
9363* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9364* and University of California, Berkeley.
9365* April 1, 1998
9366*
9367* .. Scalar Arguments ..
9368 CHARACTER*1 UPLO
9369 INTEGER IOFFD, LDA, M, N
9370 REAL ALPHA, BETA
9371* ..
9372* .. Array Arguments ..
9373 REAL A( LDA, * )
9374* ..
9375*
9376* Purpose
9377* =======
9378*
9379* PB_SLASET initializes a two-dimensional array A to beta on the diago-
9380* nal specified by IOFFD and alpha on the offdiagonals.
9381*
9382* Arguments
9383* =========
9384*
9385* UPLO (global input) CHARACTER*1
9386* On entry, UPLO specifies which trapezoidal part of the ar-
9387* ray A is to be set as follows:
9388* = 'L' or 'l': Lower triangular part is set; the strictly
9389* upper triangular part of A is not changed,
9390* = 'U' or 'u': Upper triangular part is set; the strictly
9391* lower triangular part of A is not changed,
9392* = 'D' or 'd' Only the diagonal of A is set,
9393* Otherwise: All of the array A is set.
9394*
9395* M (input) INTEGER
9396* On entry, M specifies the number of rows of the array A. M
9397* must be at least zero.
9398*
9399* N (input) INTEGER
9400* On entry, N specifies the number of columns of the array A.
9401* N must be at least zero.
9402*
9403* IOFFD (input) INTEGER
9404* On entry, IOFFD specifies the position of the offdiagonal de-
9405* limiting the upper and lower trapezoidal part of A as follows
9406* (see the notes below):
9407*
9408* IOFFD = 0 specifies the main diagonal A( i, i ),
9409* with i = 1 ... MIN( M, N ),
9410* IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9411* with i = 1 ... MIN( M-IOFFD, N ),
9412* IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9413* with i = 1 ... MIN( M, N+IOFFD ).
9414*
9415* ALPHA (input) REAL
9416* On entry, ALPHA specifies the value to which the offdiagonal
9417* array elements are set to.
9418*
9419* BETA (input) REAL
9420* On entry, BETA specifies the value to which the diagonal ar-
9421* ray elements are set to.
9422*
9423* A (input/output) REAL array
9424* On entry, A is an array of dimension (LDA,N). Before entry
9425* with UPLO = 'U' or 'u', the leading m by n part of the array
9426* A must contain the upper trapezoidal part of the matrix as
9427* specified by IOFFD to be set, and the strictly lower trape-
9428* zoidal part of A is not referenced; When IUPLO = 'L' or 'l',
9429* the leading m by n part of the array A must contain the
9430* lower trapezoidal part of the matrix as specified by IOFFD to
9431* be set, and the strictly upper trapezoidal part of A is
9432* not referenced.
9433*
9434* LDA (input) INTEGER
9435* On entry, LDA specifies the leading dimension of the array A.
9436* LDA must be at least max( 1, M ).
9437*
9438* Notes
9439* =====
9440* N N
9441* ---------------------------- -----------
9442* | d | | |
9443* M | d 'U' | | 'U' |
9444* | 'L' 'D' | |d |
9445* | d | M | d |
9446* ---------------------------- | 'D' |
9447* | d |
9448* IOFFD < 0 | 'L' d |
9449* | d|
9450* N | |
9451* ----------- -----------
9452* | d 'U'|
9453* | d | IOFFD > 0
9454* M | 'D' |
9455* | d| N
9456* | 'L' | ----------------------------
9457* | | | 'U' |
9458* | | |d |
9459* | | | 'D' |
9460* | | | d |
9461* | | |'L' d |
9462* ----------- ----------------------------
9463*
9464* -- Written on April 1, 1998 by
9465* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9466*
9467* =====================================================================
9468*
9469* .. Local Scalars ..
9470 INTEGER I, J, JTMP, MN
9471* ..
9472* .. External Functions ..
9473 LOGICAL LSAME
9474 EXTERNAL LSAME
9475* ..
9476* .. Intrinsic Functions ..
9477 INTRINSIC MAX, MIN
9478* ..
9479* .. Executable Statements ..
9480*
9481* Quick return if possible
9482*
9483 IF( M.LE.0 .OR. N.LE.0 )
9484 $ RETURN
9485*
9486* Start the operations
9487*
9488 IF( LSAME( UPLO, 'L' ) ) THEN
9489*
9490* Set the diagonal to BETA and the strictly lower triangular
9491* part of the array to ALPHA.
9492*
9493 mn = max( 0, -ioffd )
9494 DO 20 j = 1, min( mn, n )
9495 DO 10 i = 1, m
9496 a( i, j ) = alpha
9497 10 CONTINUE
9498 20 CONTINUE
9499 DO 40 j = mn + 1, min( m - ioffd, n )
9500 jtmp = j + ioffd
9501 a( jtmp, j ) = beta
9502 DO 30 i = jtmp + 1, m
9503 a( i, j ) = alpha
9504 30 CONTINUE
9505 40 CONTINUE
9506*
9507 ELSE IF( lsame( uplo, 'U' ) ) THEN
9508*
9509* Set the diagonal to BETA and the strictly upper triangular
9510* part of the array to ALPHA.
9511*
9512 mn = min( m - ioffd, n )
9513 DO 60 j = max( 0, -ioffd ) + 1, mn
9514 jtmp = j + ioffd
9515 DO 50 i = 1, jtmp - 1
9516 a( i, j ) = alpha
9517 50 CONTINUE
9518 a( jtmp, j ) = beta
9519 60 CONTINUE
9520 DO 80 j = max( 0, mn ) + 1, n
9521 DO 70 i = 1, m
9522 a( i, j ) = alpha
9523 70 CONTINUE
9524 80 CONTINUE
9525*
9526 ELSE IF( lsame( uplo, 'D' ) ) THEN
9527*
9528* Set the array to BETA on the diagonal.
9529*
9530 DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9531 a( j + ioffd, j ) = beta
9532 90 CONTINUE
9533*
9534 ELSE
9535*
9536* Set the array to BETA on the diagonal and ALPHA on the
9537* offdiagonal.
9538*
9539 DO 110 j = 1, n
9540 DO 100 i = 1, m
9541 a( i, j ) = alpha
9542 100 CONTINUE
9543 110 CONTINUE
9544 IF( alpha.NE.beta .AND. ioffd.LT.m .AND. ioffd.GT.-n ) THEN
9545 DO 120 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9546 a( j + ioffd, j ) = beta
9547 120 CONTINUE
9548 END IF
9549*
9550 END IF
9551*
9552 RETURN
9553*
9554* End of PB_SLASET
9555*
9556 END
9557 SUBROUTINE pb_slascal( UPLO, M, N, IOFFD, ALPHA, A, LDA )
9558*
9559* -- PBLAS test routine (version 2.0) --
9560* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9561* and University of California, Berkeley.
9562* April 1, 1998
9563*
9564* .. Scalar Arguments ..
9565 CHARACTER*1 UPLO
9566 INTEGER IOFFD, LDA, M, N
9567 REAL ALPHA
9568* ..
9569* .. Array Arguments ..
9570 REAL A( LDA, * )
9571* ..
9572*
9573* Purpose
9574* =======
9575*
9576* PB_SLASCAL scales a two-dimensional array A by the scalar alpha.
9577*
9578* Arguments
9579* =========
9580*
9581* UPLO (input) CHARACTER*1
9582* On entry, UPLO specifies which trapezoidal part of the ar-
9583* ray A is to be scaled as follows:
9584* = 'L' or 'l': the lower trapezoid of A is scaled,
9585* = 'U' or 'u': the upper trapezoid of A is scaled,
9586* = 'D' or 'd': diagonal specified by IOFFD is scaled,
9587* Otherwise: all of the array A is scaled.
9588*
9589* M (input) INTEGER
9590* On entry, M specifies the number of rows of the array A. M
9591* must be at least zero.
9592*
9593* N (input) INTEGER
9594* On entry, N specifies the number of columns of the array A.
9595* N must be at least zero.
9596*
9597* IOFFD (input) INTEGER
9598* On entry, IOFFD specifies the position of the offdiagonal de-
9599* limiting the upper and lower trapezoidal part of A as follows
9600* (see the notes below):
9601*
9602* IOFFD = 0 specifies the main diagonal A( i, i ),
9603* with i = 1 ... MIN( M, N ),
9604* IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9605* with i = 1 ... MIN( M-IOFFD, N ),
9606* IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9607* with i = 1 ... MIN( M, N+IOFFD ).
9608*
9609* ALPHA (input) REAL
9610* On entry, ALPHA specifies the scalar alpha.
9611*
9612* A (input/output) REAL array
9613* On entry, A is an array of dimension (LDA,N). Before entry
9614* with UPLO = 'U' or 'u', the leading m by n part of the array
9615* A must contain the upper trapezoidal part of the matrix as
9616* specified by IOFFD to be scaled, and the strictly lower tra-
9617* pezoidal part of A is not referenced; When UPLO = 'L' or 'l',
9618* the leading m by n part of the array A must contain the lower
9619* trapezoidal part of the matrix as specified by IOFFD to be
9620* scaled, and the strictly upper trapezoidal part of A is not
9621* referenced. On exit, the entries of the trapezoid part of A
9622* determined by UPLO and IOFFD are scaled.
9623*
9624* LDA (input) INTEGER
9625* On entry, LDA specifies the leading dimension of the array A.
9626* LDA must be at least max( 1, M ).
9627*
9628* Notes
9629* =====
9630* N N
9631* ---------------------------- -----------
9632* | d | | |
9633* M | d 'U' | | 'U' |
9634* | 'L' 'D' | |d |
9635* | d | M | d |
9636* ---------------------------- | 'D' |
9637* | d |
9638* IOFFD < 0 | 'L' d |
9639* | d|
9640* N | |
9641* ----------- -----------
9642* | d 'U'|
9643* | d | IOFFD > 0
9644* M | 'D' |
9645* | d| N
9646* | 'L' | ----------------------------
9647* | | | 'U' |
9648* | | |d |
9649* | | | 'D' |
9650* | | | d |
9651* | | |'L' d |
9652* ----------- ----------------------------
9653*
9654* -- Written on April 1, 1998 by
9655* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9656*
9657* =====================================================================
9658*
9659* .. Local Scalars ..
9660 INTEGER I, J, JTMP, MN
9661* ..
9662* .. External Functions ..
9663 LOGICAL LSAME
9664 EXTERNAL LSAME
9665* ..
9666* .. Intrinsic Functions ..
9667 INTRINSIC MAX, MIN
9668* ..
9669* .. Executable Statements ..
9670*
9671* Quick return if possible
9672*
9673 IF( M.LE.0 .OR. N.LE.0 )
9674 $ RETURN
9675*
9676* Start the operations
9677*
9678 IF( LSAME( UPLO, 'L' ) ) THEN
9679*
9680* Scales the lower triangular part of the array by ALPHA.
9681*
9682 MN = max( 0, -ioffd )
9683 DO 20 j = 1, min( mn, n )
9684 DO 10 i = 1, m
9685 a( i, j ) = alpha * a( i, j )
9686 10 CONTINUE
9687 20 CONTINUE
9688 DO 40 j = mn + 1, min( m - ioffd, n )
9689 DO 30 i = j + ioffd, m
9690 a( i, j ) = alpha * a( i, j )
9691 30 CONTINUE
9692 40 CONTINUE
9693*
9694 ELSE IF( lsame( uplo, 'U' ) ) THEN
9695*
9696* Scales the upper triangular part of the array by ALPHA.
9697*
9698 mn = min( m - ioffd, n )
9699 DO 60 j = max( 0, -ioffd ) + 1, mn
9700 DO 50 i = 1, j + ioffd
9701 a( i, j ) = alpha * a( i, j )
9702 50 CONTINUE
9703 60 CONTINUE
9704 DO 80 j = max( 0, mn ) + 1, n
9705 DO 70 i = 1, m
9706 a( i, j ) = alpha * a( i, j )
9707 70 CONTINUE
9708 80 CONTINUE
9709*
9710 ELSE IF( lsame( uplo, 'D' ) ) THEN
9711*
9712* Scales the diagonal entries by ALPHA.
9713*
9714 DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9715 jtmp = j + ioffd
9716 a( jtmp, j ) = alpha * a( jtmp, j )
9717 90 CONTINUE
9718*
9719 ELSE
9720*
9721* Scales the entire array by ALPHA.
9722*
9723 DO 110 j = 1, n
9724 DO 100 i = 1, m
9725 a( i, j ) = alpha * a( i, j )
9726 100 CONTINUE
9727 110 CONTINUE
9728*
9729 END IF
9730*
9731 RETURN
9732*
9733* End of PB_SLASCAL
9734*
9735 END
9736 SUBROUTINE pb_slagen( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS,
9737 $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB,
9738 $ LNBLOC, JMP, IMULADD )
9739*
9740* -- PBLAS test routine (version 2.0) --
9741* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9742* and University of California, Berkeley.
9743* April 1, 1998
9744*
9745* .. Scalar Arguments ..
9746 CHARACTER*1 UPLO, AFORM
9747 INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC,
9748 $ mb, mblks, nb, nblks
9749* ..
9750* .. Array Arguments ..
9751 INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * )
9752 REAL A( LDA, * )
9753* ..
9754*
9755* Purpose
9756* =======
9757*
9758* PB_SLAGEN locally initializes an array A.
9759*
9760* Arguments
9761* =========
9762*
9763* UPLO (global input) CHARACTER*1
9764* On entry, UPLO specifies whether the lower (UPLO='L') trape-
9765* zoidal part or the upper (UPLO='U') trapezoidal part is to be
9766* generated when the matrix to be generated is symmetric or
9767* Hermitian. For all the other values of AFORM, the value of
9768* this input argument is ignored.
9769*
9770* AFORM (global input) CHARACTER*1
9771* On entry, AFORM specifies the type of submatrix to be genera-
9772* ted as follows:
9773* AFORM = 'S', sub( A ) is a symmetric matrix,
9774* AFORM = 'H', sub( A ) is a Hermitian matrix,
9775* AFORM = 'T', sub( A ) is overrwritten with the transpose
9776* of what would normally be generated,
9777* AFORM = 'C', sub( A ) is overwritten with the conjugate
9778* transpose of what would normally be genera-
9779* ted.
9780* AFORM = 'N', a random submatrix is generated.
9781*
9782* A (local output) REAL array
9783* On entry, A is an array of dimension (LLD_A, *). On exit,
9784* this array contains the local entries of the randomly genera-
9785* ted submatrix sub( A ).
9786*
9787* LDA (local input) INTEGER
9788* On entry, LDA specifies the local leading dimension of the
9789* array A. LDA must be at least one.
9790*
9791* LCMT00 (global input) INTEGER
9792* On entry, LCMT00 is the LCM value specifying the off-diagonal
9793* of the underlying matrix of interest. LCMT00=0 specifies the
9794* main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0
9795* specifies superdiagonals.
9796*
9797* IRAN (local input) INTEGER array
9798* On entry, IRAN is an array of dimension 2 containing respec-
9799* tively the 16-lower and 16-higher bits of the encoding of the
9800* entry of the random sequence corresponding locally to the
9801* first local array entry to generate. Usually, this array is
9802* computed by PB_SETLOCRAN.
9803*
9804* MBLKS (local input) INTEGER
9805* On entry, MBLKS specifies the local number of blocks of rows.
9806* MBLKS is at least zero.
9807*
9808* IMBLOC (local input) INTEGER
9809* On entry, IMBLOC specifies the number of rows (size) of the
9810* local uppest blocks. IMBLOC is at least zero.
9811*
9812* MB (global input) INTEGER
9813* On entry, MB specifies the blocking factor used to partition
9814* the rows of the matrix. MB must be at least one.
9815*
9816* LMBLOC (local input) INTEGER
9817* On entry, LMBLOC specifies the number of rows (size) of the
9818* local lowest blocks. LMBLOC is at least zero.
9819*
9820* NBLKS (local input) INTEGER
9821* On entry, NBLKS specifies the local number of blocks of co-
9822* lumns. NBLKS is at least zero.
9823*
9824* INBLOC (local input) INTEGER
9825* On entry, INBLOC specifies the number of columns (size) of
9826* the local leftmost blocks. INBLOC is at least zero.
9827*
9828* NB (global input) INTEGER
9829* On entry, NB specifies the blocking factor used to partition
9830* the the columns of the matrix. NB must be at least one.
9831*
9832* LNBLOC (local input) INTEGER
9833* On entry, LNBLOC specifies the number of columns (size) of
9834* the local rightmost blocks. LNBLOC is at least zero.
9835*
9836* JMP (local input) INTEGER array
9837* On entry, JMP is an array of dimension JMP_LEN containing the
9838* different jump values used by the random matrix generator.
9839*
9840* IMULADD (local input) INTEGER array
9841* On entry, IMULADD is an array of dimension (4, JMP_LEN). The
9842* jth column of this array contains the encoded initial cons-
9843* tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) )
9844* (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j)
9845* contains respectively the 16-lower and 16-higher bits of the
9846* constant a_j, and IMULADD(3:4,j) contains the 16-lower and
9847* 16-higher bits of the constant c_j.
9848*
9849* -- Written on April 1, 1998 by
9850* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9851*
9852* =====================================================================
9853*
9854* .. Parameters ..
9855 INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
9856 $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
9857 $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
9858 PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
9859 $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
9860 $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
9861 $ jmp_nqnb = 10, jmp_nqinbloc = 11,
9862 $ jmp_len = 11 )
9863* ..
9864* .. Local Scalars ..
9865 INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK,
9866 $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP
9867 REAL DUMMY
9868* ..
9869* .. Local Arrays ..
9870 INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 )
9871* ..
9872* .. External Subroutines ..
9873 EXTERNAL PB_JUMPIT
9874* ..
9875* .. External Functions ..
9876 LOGICAL LSAME
9877 REAL PB_SRAND
9878 EXTERNAL LSAME, PB_SRAND
9879* ..
9880* .. Intrinsic Functions ..
9881 INTRINSIC max, min
9882* ..
9883* .. Executable Statements ..
9884*
9885 DO 10 i = 1, 2
9886 ib1( i ) = iran( i )
9887 ib2( i ) = iran( i )
9888 ib3( i ) = iran( i )
9889 10 CONTINUE
9890*
9891 IF( lsame( aform, 'N' ) ) THEN
9892*
9893* Generate random matrix
9894*
9895 jj = 1
9896*
9897 DO 50 jblk = 1, nblks
9898*
9899 IF( jblk.EQ.1 ) THEN
9900 jb = inbloc
9901 ELSE IF( jblk.EQ.nblks ) THEN
9902 jb = lnbloc
9903 ELSE
9904 jb = nb
9905 END IF
9906*
9907 DO 40 jk = jj, jj + jb - 1
9908*
9909 ii = 1
9910*
9911 DO 30 iblk = 1, mblks
9912*
9913 IF( iblk.EQ.1 ) THEN
9914 ib = imbloc
9915 ELSE IF( iblk.EQ.mblks ) THEN
9916 ib = lmbloc
9917 ELSE
9918 ib = mb
9919 END IF
9920*
9921* Blocks are IB by JB
9922*
9923 DO 20 ik = ii, ii + ib - 1
9924 a( ik, jk ) = pb_srand( 0 )
9925 20 CONTINUE
9926*
9927 ii = ii + ib
9928*
9929 IF( iblk.EQ.1 ) THEN
9930*
9931* Jump IMBLOC + ( NPROW - 1 ) * MB rows
9932*
9933 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
9934 $ ib0 )
9935*
9936 ELSE
9937*
9938* Jump NPROW * MB rows
9939*
9940 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1, ib0 )
9941*
9942 END IF
9943*
9944 ib1( 1 ) = ib0( 1 )
9945 ib1( 2 ) = ib0( 2 )
9946*
9947 30 CONTINUE
9948*
9949* Jump one column
9950*
9951 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
9952*
9953 ib1( 1 ) = ib0( 1 )
9954 ib1( 2 ) = ib0( 2 )
9955 ib2( 1 ) = ib0( 1 )
9956 ib2( 2 ) = ib0( 2 )
9957*
9958 40 CONTINUE
9959*
9960 jj = jj + jb
9961*
9962 IF( jblk.EQ.1 ) THEN
9963*
9964* Jump INBLOC + ( NPCOL - 1 ) * NB columns
9965*
9966 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
9967*
9968 ELSE
9969*
9970* Jump NPCOL * NB columns
9971*
9972 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
9973*
9974 END IF
9975*
9976 ib1( 1 ) = ib0( 1 )
9977 ib1( 2 ) = ib0( 2 )
9978 ib2( 1 ) = ib0( 1 )
9979 ib2( 2 ) = ib0( 2 )
9980 ib3( 1 ) = ib0( 1 )
9981 ib3( 2 ) = ib0( 2 )
9982*
9983 50 CONTINUE
9984*
9985 ELSE IF( lsame( aform, 'T' ) .OR. lsame( aform, 'C' ) ) THEN
9986*
9987* Generate the transpose of the matrix that would be normally
9988* generated.
9989*
9990 ii = 1
9991*
9992 DO 90 iblk = 1, mblks
9993*
9994 IF( iblk.EQ.1 ) THEN
9995 ib = imbloc
9996 ELSE IF( iblk.EQ.mblks ) THEN
9997 ib = lmbloc
9998 ELSE
9999 ib = mb
10000 END IF
10001*
10002 DO 80 ik = ii, ii + ib - 1
10003*
10004 jj = 1
10005*
10006 DO 70 jblk = 1, nblks
10007*
10008 IF( jblk.EQ.1 ) THEN
10009 jb = inbloc
10010 ELSE IF( jblk.EQ.nblks ) THEN
10011 jb = lnbloc
10012 ELSE
10013 jb = nb
10014 END IF
10015*
10016* Blocks are IB by JB
10017*
10018 DO 60 jk = jj, jj + jb - 1
10019 a( ik, jk ) = pb_srand( 0 )
10020 60 CONTINUE
10021*
10022 jj = jj + jb
10023*
10024 IF( jblk.EQ.1 ) THEN
10025*
10026* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10027*
10028 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10029 $ ib0 )
10030*
10031 ELSE
10032*
10033* Jump NPCOL * NB columns
10034*
10035 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1, ib0 )
10036*
10037 END IF
10038*
10039 ib1( 1 ) = ib0( 1 )
10040 ib1( 2 ) = ib0( 2 )
10041*
10042 70 CONTINUE
10043*
10044* Jump one row
10045*
10046 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10047*
10048 ib1( 1 ) = ib0( 1 )
10049 ib1( 2 ) = ib0( 2 )
10050 ib2( 1 ) = ib0( 1 )
10051 ib2( 2 ) = ib0( 2 )
10052*
10053 80 CONTINUE
10054*
10055 ii = ii + ib
10056*
10057 IF( iblk.EQ.1 ) THEN
10058*
10059* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10060*
10061 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10062*
10063 ELSE
10064*
10065* Jump NPROW * MB rows
10066*
10067 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10068*
10069 END IF
10070*
10071 ib1( 1 ) = ib0( 1 )
10072 ib1( 2 ) = ib0( 2 )
10073 ib2( 1 ) = ib0( 1 )
10074 ib2( 2 ) = ib0( 2 )
10075 ib3( 1 ) = ib0( 1 )
10076 ib3( 2 ) = ib0( 2 )
10077*
10078 90 CONTINUE
10079*
10080 ELSE IF( ( lsame( aform, 'S' ) ).OR.( lsame( aform, 'H' ) ) ) THEN
10081*
10082* Generate a symmetric matrix
10083*
10084 IF( lsame( uplo, 'L' ) ) THEN
10085*
10086* generate lower trapezoidal part
10087*
10088 jj = 1
10089 lcmtc = lcmt00
10090*
10091 DO 170 jblk = 1, nblks
10092*
10093 IF( jblk.EQ.1 ) THEN
10094 jb = inbloc
10095 low = 1 - inbloc
10096 ELSE IF( jblk.EQ.nblks ) THEN
10097 jb = lnbloc
10098 low = 1 - nb
10099 ELSE
10100 jb = nb
10101 low = 1 - nb
10102 END IF
10103*
10104 DO 160 jk = jj, jj + jb - 1
10105*
10106 ii = 1
10107 lcmtr = lcmtc
10108*
10109 DO 150 iblk = 1, mblks
10110*
10111 IF( iblk.EQ.1 ) THEN
10112 ib = imbloc
10113 upp = imbloc - 1
10114 ELSE IF( iblk.EQ.mblks ) THEN
10115 ib = lmbloc
10116 upp = mb - 1
10117 ELSE
10118 ib = mb
10119 upp = mb - 1
10120 END IF
10121*
10122* Blocks are IB by JB
10123*
10124 IF( lcmtr.GT.upp ) THEN
10125*
10126 DO 100 ik = ii, ii + ib - 1
10127 dummy = pb_srand( 0 )
10128 100 CONTINUE
10129*
10130 ELSE IF( lcmtr.GE.low ) THEN
10131*
10132 jtmp = jk - jj + 1
10133 mnb = max( 0, -lcmtr )
10134*
10135 IF( jtmp.LE.min( mnb, jb ) ) THEN
10136*
10137 DO 110 ik = ii, ii + ib - 1
10138 a( ik, jk ) = pb_srand( 0 )
10139 110 CONTINUE
10140*
10141 ELSE IF( ( jtmp.GE.( mnb + 1 ) ) .AND.
10142 $ ( jtmp.LE.min( ib-lcmtr, jb ) ) ) THEN
10143*
10144 itmp = ii + jtmp + lcmtr - 1
10145*
10146 DO 120 ik = ii, itmp - 1
10147 dummy = pb_srand( 0 )
10148 120 CONTINUE
10149*
10150 DO 130 ik = itmp, ii + ib - 1
10151 a( ik, jk ) = pb_srand( 0 )
10152 130 CONTINUE
10153*
10154 END IF
10155*
10156 ELSE
10157*
10158 DO 140 ik = ii, ii + ib - 1
10159 a( ik, jk ) = pb_srand( 0 )
10160 140 CONTINUE
10161*
10162 END IF
10163*
10164 ii = ii + ib
10165*
10166 IF( iblk.EQ.1 ) THEN
10167*
10168* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10169*
10170 lcmtr = lcmtr - jmp( jmp_npimbloc )
10171 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
10172 $ ib0 )
10173*
10174 ELSE
10175*
10176* Jump NPROW * MB rows
10177*
10178 lcmtr = lcmtr - jmp( jmp_npmb )
10179 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1,
10180 $ ib0 )
10181*
10182 END IF
10183*
10184 ib1( 1 ) = ib0( 1 )
10185 ib1( 2 ) = ib0( 2 )
10186*
10187 150 CONTINUE
10188*
10189* Jump one column
10190*
10191 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
10192*
10193 ib1( 1 ) = ib0( 1 )
10194 ib1( 2 ) = ib0( 2 )
10195 ib2( 1 ) = ib0( 1 )
10196 ib2( 2 ) = ib0( 2 )
10197*
10198 160 CONTINUE
10199*
10200 jj = jj + jb
10201*
10202 IF( jblk.EQ.1 ) THEN
10203*
10204* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10205*
10206 lcmtc = lcmtc + jmp( jmp_nqinbloc )
10207 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
10208*
10209 ELSE
10210*
10211* Jump NPCOL * NB columns
10212*
10213 lcmtc = lcmtc + jmp( jmp_nqnb )
10214 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
10215*
10216 END IF
10217*
10218 ib1( 1 ) = ib0( 1 )
10219 ib1( 2 ) = ib0( 2 )
10220 ib2( 1 ) = ib0( 1 )
10221 ib2( 2 ) = ib0( 2 )
10222 ib3( 1 ) = ib0( 1 )
10223 ib3( 2 ) = ib0( 2 )
10224*
10225 170 CONTINUE
10226*
10227 ELSE
10228*
10229* generate upper trapezoidal part
10230*
10231 ii = 1
10232 lcmtr = lcmt00
10233*
10234 DO 250 iblk = 1, mblks
10235*
10236 IF( iblk.EQ.1 ) THEN
10237 ib = imbloc
10238 upp = imbloc - 1
10239 ELSE IF( iblk.EQ.mblks ) THEN
10240 ib = lmbloc
10241 upp = mb - 1
10242 ELSE
10243 ib = mb
10244 upp = mb - 1
10245 END IF
10246*
10247 DO 240 ik = ii, ii + ib - 1
10248*
10249 jj = 1
10250 lcmtc = lcmtr
10251*
10252 DO 230 jblk = 1, nblks
10253*
10254 IF( jblk.EQ.1 ) THEN
10255 jb = inbloc
10256 low = 1 - inbloc
10257 ELSE IF( jblk.EQ.nblks ) THEN
10258 jb = lnbloc
10259 low = 1 - nb
10260 ELSE
10261 jb = nb
10262 low = 1 - nb
10263 END IF
10264*
10265* Blocks are IB by JB
10266*
10267 IF( lcmtc.LT.low ) THEN
10268*
10269 DO 180 jk = jj, jj + jb - 1
10270 dummy = pb_srand( 0 )
10271 180 CONTINUE
10272*
10273 ELSE IF( lcmtc.LE.upp ) THEN
10274*
10275 itmp = ik - ii + 1
10276 mnb = max( 0, lcmtc )
10277*
10278 IF( itmp.LE.min( mnb, ib ) ) THEN
10279*
10280 DO 190 jk = jj, jj + jb - 1
10281 a( ik, jk ) = pb_srand( 0 )
10282 190 CONTINUE
10283*
10284 ELSE IF( ( itmp.GE.( mnb + 1 ) ) .AND.
10285 $ ( itmp.LE.min( jb+lcmtc, ib ) ) ) THEN
10286*
10287 jtmp = jj + itmp - lcmtc - 1
10288*
10289 DO 200 jk = jj, jtmp - 1
10290 dummy = pb_srand( 0 )
10291 200 CONTINUE
10292*
10293 DO 210 jk = jtmp, jj + jb - 1
10294 a( ik, jk ) = pb_srand( 0 )
10295 210 CONTINUE
10296*
10297 END IF
10298*
10299 ELSE
10300*
10301 DO 220 jk = jj, jj + jb - 1
10302 a( ik, jk ) = pb_srand( 0 )
10303 220 CONTINUE
10304*
10305 END IF
10306*
10307 jj = jj + jb
10308*
10309 IF( jblk.EQ.1 ) THEN
10310*
10311* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10312*
10313 lcmtc = lcmtc + jmp( jmp_nqinbloc )
10314 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10315 $ ib0 )
10316*
10317 ELSE
10318*
10319* Jump NPCOL * NB columns
10320*
10321 lcmtc = lcmtc + jmp( jmp_nqnb )
10322 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
10323 $ ib0 )
10324*
10325 END IF
10326*
10327 ib1( 1 ) = ib0( 1 )
10328 ib1( 2 ) = ib0( 2 )
10329*
10330 230 CONTINUE
10331*
10332* Jump one row
10333*
10334 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10335*
10336 ib1( 1 ) = ib0( 1 )
10337 ib1( 2 ) = ib0( 2 )
10338 ib2( 1 ) = ib0( 1 )
10339 ib2( 2 ) = ib0( 2 )
10340*
10341 240 CONTINUE
10342*
10343 ii = ii + ib
10344*
10345 IF( iblk.EQ.1 ) THEN
10346*
10347* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10348*
10349 lcmtr = lcmtr - jmp( jmp_npimbloc )
10350 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10351*
10352 ELSE
10353*
10354* Jump NPROW * MB rows
10355*
10356 lcmtr = lcmtr - jmp( jmp_npmb )
10357 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10358*
10359 END IF
10360*
10361 ib1( 1 ) = ib0( 1 )
10362 ib1( 2 ) = ib0( 2 )
10363 ib2( 1 ) = ib0( 1 )
10364 ib2( 2 ) = ib0( 2 )
10365 ib3( 1 ) = ib0( 1 )
10366 ib3( 2 ) = ib0( 2 )
10367*
10368 250 CONTINUE
10369*
10370 END IF
10371*
10372 END IF
10373*
10374 RETURN
10375*
10376* End of PB_SLAGEN
10377*
10378 END
10379 REAL function pb_srand( idumm )
10380*
10381* -- PBLAS test routine (version 2.0) --
10382* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10383* and University of California, Berkeley.
10384* April 1, 1998
10385*
10386* .. Scalar Arguments ..
10387 INTEGER idumm
10388* ..
10389*
10390* Purpose
10391* =======
10392*
10393* PB_SRAND generates the next number in the random sequence. This func-
10394* tion ensures that this number will be in the interval ( -1.0, 1.0 ).
10395*
10396* Arguments
10397* =========
10398*
10399* IDUMM (local input) INTEGER
10400* This argument is ignored, but necessary to a FORTRAN 77 func-
10401* tion.
10402*
10403* Further Details
10404* ===============
10405*
10406* On entry, the array IRAND stored in the common block RANCOM contains
10407* the information (2 integers) required to generate the next number in
10408* the sequence X( n ). This number is computed as
10409*
10410* X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10411*
10412* where the constant d is the largest 32 bit positive integer. The
10413* array IRAND is then updated for the generation of the next number
10414* X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10415* The constants a and c should have been preliminarily stored in the
10416* array IACS as 2 pairs of integers. The initial set up of IRAND and
10417* IACS is performed by the routine PB_SETRAN.
10418*
10419* -- Written on April 1, 1998 by
10420* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10421*
10422* =====================================================================
10423*
10424* .. Parameters ..
10425 REAL one, two
10426 PARAMETER ( one = 1.0e+0, two = 2.0e+0 )
10427* ..
10428* .. External Functions ..
10429 REAL pb_sran
10430 EXTERNAL pb_sran
10431* ..
10432* .. Executable Statements ..
10433*
10434 pb_srand = one - two * pb_sran( idumm )
10435*
10436 RETURN
10437*
10438* End of PB_SRAND
10439*
10440 END
10441 REAL function pb_sran( idumm )
10442*
10443* -- PBLAS test routine (version 2.0) --
10444* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10445* and University of California, Berkeley.
10446* April 1, 1998
10447*
10448* .. Scalar Arguments ..
10449 INTEGER idumm
10450* ..
10451*
10452* Purpose
10453* =======
10454*
10455* PB_SRAN generates the next number in the random sequence.
10456*
10457* Arguments
10458* =========
10459*
10460* IDUMM (local input) INTEGER
10461* This argument is ignored, but necessary to a FORTRAN 77 func-
10462* tion.
10463*
10464* Further Details
10465* ===============
10466*
10467* On entry, the array IRAND stored in the common block RANCOM contains
10468* the information (2 integers) required to generate the next number in
10469* the sequence X( n ). This number is computed as
10470*
10471* X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10472*
10473* where the constant d is the largest 32 bit positive integer. The
10474* array IRAND is then updated for the generation of the next number
10475* X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10476* The constants a and c should have been preliminarily stored in the
10477* array IACS as 2 pairs of integers. The initial set up of IRAND and
10478* IACS is performed by the routine PB_SETRAN.
10479*
10480* -- Written on April 1, 1998 by
10481* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10482*
10483* =====================================================================
10484*
10485* .. Parameters ..
10486 REAL divfac, pow16
10487 PARAMETER ( divfac = 2.147483648e+9,
10488 $ pow16 = 6.5536e+4 )
10489* ..
10490* .. Local Arrays ..
10491 INTEGER j( 2 )
10492* ..
10493* .. External Subroutines ..
10494 EXTERNAL pb_ladd, pb_lmul
10495* ..
10496* .. Intrinsic Functions ..
10497 INTRINSIC real
10498* ..
10499* .. Common Blocks ..
10500 INTEGER iacs( 4 ), irand( 2 )
10501 common /rancom/ irand, iacs
10502* ..
10503* .. Save Statements ..
10504 SAVE /rancom/
10505* ..
10506* .. Executable Statements ..
10507*
10508 pb_sran = ( real( irand( 1 ) ) + pow16 * real( irand( 2 ) ) ) /
10509 $ divfac
10510*
10511 CALL pb_lmul( irand, iacs, j )
10512 CALL pb_ladd( j, iacs( 3 ), irand )
10513*
10514 RETURN
10515*
10516* End of PB_SRAN
10517*
10518 END
subroutine pb_ainfog2l(m, n, i, j, desc, nprow, npcol, myrow, mycol, imb1, inb1, mp, nq, ii, jj, prow, pcol, rprow, rpcol)
Definition pblastst.f:2023
subroutine pb_descset2(desc, m, n, imb, inb, mb, nb, rsrc, csrc, ctxt, lld)
Definition pblastst.f:3172
subroutine pb_ladd(j, k, i)
Definition pblastst.f:4480
subroutine pb_binfo(offd, m, n, imb1, inb1, mb, nb, mrrow, mrcol, lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, iupp, upp)
Definition pblastst.f:3577
subroutine pb_setran(iran, iac)
Definition pblastst.f:4759
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
subroutine pb_locinfo(i, inb, nb, myroc, srcproc, nprocs, ilocblk, ilocoff, mydist)
Definition pblastst.f:3910
subroutine pchkpbe(ictxt, nout, sname, infot)
Definition pblastst.f:1084
subroutine pb_chkmat(ictxt, m, mpos0, n, npos0, ia, ja, desca, dpos0, info)
Definition pblastst.f:2742
subroutine pb_lmul(k, j, i)
Definition pblastst.f:4559
subroutine pb_jump(k, muladd, irann, iranm, ima)
Definition pblastst.f:4648
subroutine pb_setlocran(seed, ilocblk, jlocblk, ilocoff, jlocoff, myrdist, mycdist, nprow, npcol, jmp, imuladd, iran)
Definition pblastst.f:4302
subroutine pb_initmuladd(muladd0, jmp, imuladd)
Definition pblastst.f:4196
subroutine pb_desctrans(descin, descout)
Definition pblastst.f:2964
subroutine pb_initjmp(colmaj, nvir, imbvir, inbvir, imbloc, inbloc, mb, nb, rsrc, csrc, nprow, npcol, stride, jmp)
Definition pblastst.f:4045
subroutine pb_jumpit(muladd, irann, iranm)
Definition pblastst.f:4822
real function pb_sran(idumm)
real function pb_srand(idumm)
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pssetpblas(ictxt)
Definition psblastst.f:1478
subroutine psdimee(ictxt, nout, subptr, scode, sname)
Definition psblastst.f:455
subroutine pschkmin(errmax, m, n, a, pa, ia, ja, desca, info)
Definition psblastst.f:3326
subroutine pschkmout(m, n, a, pa, ia, ja, desca, info)
Definition psblastst.f:3627
subroutine pb_slascal(uplo, m, n, ioffd, alpha, a, lda)
Definition psblastst.f:9558
subroutine pschkopt(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition psblastst.f:266
subroutine pschkvin(errmax, n, x, px, ix, jx, descx, incx, info)
Definition psblastst.f:2576
subroutine pslaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition psblastst.f:6863
subroutine pb_pslaprn2(m, n, a, ia, ja, desca, irprnt, icprnt, cmatnm, nout, prow, pcol, work)
Definition psblastst.f:8850
subroutine psmmch(ictxt, transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition psblastst.f:5272
subroutine pb_sfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psblastst.f:9081
subroutine pb_slaset(uplo, m, n, ioffd, alpha, beta, a, lda)
Definition psblastst.f:9361
subroutine psvmch(ictxt, uplo, m, n, alpha, x, ix, jx, descx, incx, y, iy, jy, descy, incy, a, pa, ia, ja, desca, g, err, info)
Definition psblastst.f:4570
subroutine pserraxpby(errbnd, alpha, x, beta, y, prec)
Definition psblastst.f:6684
subroutine pschkmat(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition psblastst.f:1674
subroutine psvecee(ictxt, nout, subptr, scode, sname)
Definition psblastst.f:936
subroutine psoptee(ictxt, nout, subptr, scode, sname)
Definition psblastst.f:2
subroutine psmmch2(ictxt, uplo, trans, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition psblastst.f:5996
subroutine psvprnt(ictxt, nout, n, x, incx, irprnt, icprnt, cvecnm)
Definition psblastst.f:4056
subroutine psladom(inplace, n, alpha, a, ia, ja, desca)
Definition psblastst.f:8244
subroutine pslagen(inplace, aform, diag, offa, m, n, ia, ja, desca, iaseed, a, lda)
Definition psblastst.f:7846
subroutine pslascal(type, m, n, alpha, a, ia, ja, desca)
Definition psblastst.f:7338
subroutine pb_schekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition psblastst.f:9194
subroutine psmatee(ictxt, nout, subptr, scode, sname)
Definition psblastst.f:1190
subroutine pserrset(err, errmax, xtrue, x)
Definition psblastst.f:2456
subroutine psmvch(ictxt, trans, m, n, alpha, a, ia, ja, desca, x, ix, jx, descx, incx, beta, y, py, iy, jy, descy, incy, g, err, info)
Definition psblastst.f:4157
subroutine pb_pslaprnt(m, n, a, ia, ja, desca, irprnt, icprnt, cmatnm, nout, work)
Definition psblastst.f:8636
subroutine psmprnt(ictxt, nout, m, n, a, lda, irprnt, icprnt, cmatnm)
Definition psblastst.f:3949
subroutine pschkdim(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition psblastst.f:759
subroutine psmmch1(ictxt, uplo, trans, n, k, alpha, a, ia, ja, desca, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition psblastst.f:5649
subroutine pb_slagen(uplo, aform, a, lda, lcmt00, iran, mblks, imbloc, mb, lmbloc, nblks, inbloc, nb, lnbloc, jmp, imuladd)
Definition psblastst.f:9739
subroutine pschkvout(n, x, px, ix, jx, descx, incx, info)
Definition psblastst.f:2870
subroutine psmmch3(uplo, trans, m, n, alpha, a, ia, ja, desca, beta, c, pc, ic, jc, descc, err, info)
Definition psblastst.f:6372
subroutine psvmch2(ictxt, uplo, m, n, alpha, x, ix, jx, descx, incx, y, iy, jy, descy, incy, a, pa, ia, ja, desca, g, err, info)
Definition psblastst.f:4919
subroutine pscallsub(subptr, scode)
Definition psblastst.f:2180
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724
real function slamch(cmach)
Definition tools.f:867