ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psludriver.f
Go to the documentation of this file.
1  PROGRAM psludriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * Purpose
9 * ========
10 *
11 * PSLUDRIVER is the main test program for the REAL
12 * SCALAPACK LU routines. This test driver performs an LU factorization
13 * and solve. If the input matrix is non-square, only the factorization
14 * is performed. Condition estimation and iterative refinement are
15 * optionally performed.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 18 lines:
20 * 'SCALAPACK, Version 2.0, LU factorization input file'
21 * 'Intel iPSC/860 hypercube, gamma model.'
22 * 'LU.out' output file name (if any)
23 * 6 device out
24 * 1 number of problems sizes
25 * 31 201 values of M
26 * 31 201 values of N
27 * 1 number of NB's
28 * 2 values of NB
29 * 1 number of NRHS's
30 * 1 values of NRHS
31 * 1 number of NBRHS's
32 * 1 values of NBRHS
33 * 1 number of process grids (ordered pairs of P & Q)
34 * 2 1 4 2 3 8 values of P
35 * 2 4 1 3 2 1 values of Q
36 * 1.0 threshold
37 * T (T or F) Test Cond. Est. and Iter. Ref. Routines
38 *
39 *
40 * Internal Parameters
41 * ===================
42 *
43 * TOTMEM INTEGER, default = 2000000
44 * TOTMEM is a machine-specific parameter indicating the
45 * maximum amount of available memory in bytes.
46 * The user should customize TOTMEM to his platform. Remember
47 * to leave room in memory for the operating system, the BLACS
48 * buffer, etc. For example, on a system with 8 MB of memory
49 * per process (e.g., one processor on an Intel iPSC/860), the
50 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51 * code, BLACS buffer, etc). However, for PVM, we usually set
52 * TOTMEM = 2000000. Some experimenting with the maximum value
53 * of TOTMEM may be required.
54 *
55 * INTGSZ INTEGER, default = 4 bytes.
56 * REALSZ INTEGER, default = 4 bytes.
57 * INTGSZ and REALSZ indicate the length in bytes on the
58 * given platform for an integer and a single precision real.
59 * MEM REAL array, dimension ( TOTMEM / REALSZ )
60 *
61 * All arrays used by SCALAPACK routines are allocated from
62 * this array and referenced by pointers. The integer IPA,
63 * for example, is a pointer to the starting element of MEM for
64 * the matrix A.
65 *
66 * =====================================================================
67 *
68 * .. Parameters ..
69  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70  $ lld_, mb_, m_, nb_, n_, rsrc_
71  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74  INTEGER intgsz, memsiz, ntests, realsz, totmem
75  REAL padval, zero
76  parameter( intgsz = 4, realsz = 4, totmem = 2000000,
77  $ memsiz = totmem / realsz, ntests = 20,
78  $ padval = -9923.0e+0, zero = 0.0e+0 )
79 * ..
80 * .. Local Scalars ..
81  LOGICAL check, est
82  CHARACTER*6 passed
83  CHARACTER*80 outfile
84  INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85  $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86  $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87  $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88  $ lipiv, liwork, lwork, lw2, m, maxmn,
89  $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90  $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91  $ nprocs, nprow, nq, nrhs, worksiz
92  REAL anorm, anorm1, fresid, rcond, sresid, sresid2,
93  $ thresh
94  DOUBLE PRECISION nops, tmflops
95 * ..
96 * .. Local Arrays ..
97  INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98  $ mval( ntests ), nbrval( ntests ),
99  $ nbval( ntests ), nrval( ntests ),
100  $ nval( ntests ), pval( ntests ),
101  $ qval( ntests )
102  REAL mem( memsiz )
103  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
104 * ..
105 * .. External Subroutines ..
106  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
107  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
108  $ blacs_pinfo, descinit, igsum2d, pschekpad,
113 * ..
114 * .. External Functions ..
115  INTEGER iceil, ilcm, numroc
116  REAL pslange
117  EXTERNAL iceil, ilcm, numroc, pslange
118 * ..
119 * .. Intrinsic Functions ..
120  INTRINSIC dble, max, min
121 * ..
122 * .. Data Statements ..
123  DATA kfail, kpass, kskip, ktests / 4*0 /
124 * ..
125 * .. Executable Statements ..
126 *
127 * Get starting information
128 *
129  CALL blacs_pinfo( iam, nprocs )
130  iaseed = 100
131  ibseed = 200
132  CALL psluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
133  $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
134  $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
135  $ est, mem, iam, nprocs )
136  check = ( thresh.GE.0.0e+0 )
137 *
138 * Print headings
139 *
140  IF( iam.EQ.0 ) THEN
141  WRITE( nout, fmt = * )
142  WRITE( nout, fmt = 9995 )
143  WRITE( nout, fmt = 9994 )
144  WRITE( nout, fmt = * )
145  END IF
146 *
147 * Loop over different process grids
148 *
149  DO 50 i = 1, ngrids
150 *
151  nprow = pval( i )
152  npcol = qval( i )
153 *
154 * Make sure grid information is correct
155 *
156  ierr( 1 ) = 0
157  IF( nprow.LT.1 ) THEN
158  IF( iam.EQ.0 )
159  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
160  ierr( 1 ) = 1
161  ELSE IF( npcol.LT.1 ) THEN
162  IF( iam.EQ.0 )
163  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
164  ierr( 1 ) = 1
165  ELSE IF( nprow*npcol.GT.nprocs ) THEN
166  IF( iam.EQ.0 )
167  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
168  ierr( 1 ) = 1
169  END IF
170 *
171  IF( ierr( 1 ).GT.0 ) THEN
172  IF( iam.EQ.0 )
173  $ WRITE( nout, fmt = 9997 ) 'grid'
174  kskip = kskip + 1
175  GO TO 50
176  END IF
177 *
178 * Define process grid
179 *
180  CALL blacs_get( -1, 0, ictxt )
181  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
182  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
183 *
184 * Go to bottom of process grid loop if this case doesn't use my
185 * process
186 *
187  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
188  $ GO TO 50
189 *
190  DO 40 j = 1, nmat
191 *
192  m = mval( j )
193  n = nval( j )
194 *
195 * Make sure matrix information is correct
196 *
197  ierr( 1 ) = 0
198  IF( m.LT.1 ) THEN
199  IF( iam.EQ.0 )
200  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
201  ierr( 1 ) = 1
202  ELSE IF( n.LT.1 ) THEN
203  IF( iam.EQ.0 )
204  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
205  ierr( 1 ) = 1
206  END IF
207 *
208 * Check all processes for an error
209 *
210  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
211 *
212  IF( ierr( 1 ).GT.0 ) THEN
213  IF( iam.EQ.0 )
214  $ WRITE( nout, fmt = 9997 ) 'matrix'
215  kskip = kskip + 1
216  GO TO 40
217  END IF
218 *
219  DO 30 k = 1, nnb
220 *
221  nb = nbval( k )
222 *
223 * Make sure nb is legal
224 *
225  ierr( 1 ) = 0
226  IF( nb.LT.1 ) THEN
227  ierr( 1 ) = 1
228  IF( iam.EQ.0 )
229  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
230  END IF
231 *
232 * Check all processes for an error
233 *
234  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
235 *
236  IF( ierr( 1 ).GT.0 ) THEN
237  IF( iam.EQ.0 )
238  $ WRITE( nout, fmt = 9997 ) 'NB'
239  kskip = kskip + 1
240  GO TO 30
241  END IF
242 *
243 * Padding constants
244 *
245  mp = numroc( m, nb, myrow, 0, nprow )
246  np = numroc( n, nb, myrow, 0, nprow )
247  nq = numroc( n, nb, mycol, 0, npcol )
248  IF( check ) THEN
249  iprepad = max( nb, mp )
250  imidpad = nb
251  ipostpad = max( nb, nq )
252  ELSE
253  iprepad = 0
254  imidpad = 0
255  ipostpad = 0
256  END IF
257 *
258 * Initialize the array descriptor for the matrix A
259 *
260  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
261  $ max( 1, mp )+imidpad, ierr( 1 ) )
262 *
263 * Check all processes for an error
264 *
265  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
266 *
267  IF( ierr( 1 ).LT.0 ) THEN
268  IF( iam.EQ.0 )
269  $ WRITE( nout, fmt = 9997 ) 'descriptor'
270  kskip = kskip + 1
271  GO TO 30
272  END IF
273 *
274 * Assign pointers into MEM for SCALAPACK arrays, A is
275 * allocated starting at position MEM( IPREPAD+1 )
276 *
277  ipa = iprepad+1
278  IF( est .AND. m.EQ.n ) THEN
279  ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
280  ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
281  ELSE
282  ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
283  END IF
284  lipiv = iceil( intgsz*( mp+nb ), realsz )
285  ipw = ippiv + lipiv + ipostpad + iprepad
286 *
287  IF( check ) THEN
288 *
289 * Calculate the amount of workspace required by the
290 * checking routines PSLANGE, PSGETRRV, and
291 * PSLAFCHK
292 *
293  worksiz = max( 2, nq )
294 *
295  worksiz = max( worksiz, mp*desca( nb_ )+
296  $ nq*desca( mb_ ) )
297 *
298  worksiz = max( worksiz, mp * desca( nb_ ) )
299 *
300  worksiz = worksiz + ipostpad
301 *
302  ELSE
303 *
304  worksiz = ipostpad
305 *
306  END IF
307 *
308 * Check for adequate memory for problem size
309 *
310  ierr( 1 ) = 0
311  IF( ipw+worksiz.GT.memsiz ) THEN
312  IF( iam.EQ.0 )
313  $ WRITE( nout, fmt = 9996 ) 'factorization',
314  $ ( ipw+worksiz )*realsz
315  ierr( 1 ) = 1
316  END IF
317 *
318 * Check all processes for an error
319 *
320  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
321 *
322  IF( ierr( 1 ).GT.0 ) THEN
323  IF( iam.EQ.0 )
324  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
325  kskip = kskip + 1
326  GO TO 30
327  END IF
328 *
329 * Generate matrix A of Ax = b
330 *
331  CALL psmatgen( ictxt, 'No transpose', 'No transpose',
332  $ desca( m_ ), desca( n_ ), desca( mb_ ),
333  $ desca( nb_ ), mem( ipa ), desca( lld_ ),
334  $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
335  $ mp, 0, nq, myrow, mycol, nprow, npcol )
336 *
337 * Calculate inf-norm of A for residual error-checking
338 *
339  IF( check ) THEN
340  CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
341  $ desca( lld_ ), iprepad, ipostpad,
342  $ padval )
343  CALL psfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
344  $ lipiv, iprepad, ipostpad, padval )
345  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
346  $ mem( ipw-iprepad ), worksiz-ipostpad,
347  $ iprepad, ipostpad, padval )
348  anorm = pslange( 'I', m, n, mem( ipa ), 1, 1, desca,
349  $ mem( ipw ) )
350  anorm1 = pslange( '1', m, n, mem( ipa ), 1, 1, desca,
351  $ mem( ipw ) )
352  CALL pschekpad( ictxt, 'PSLANGE', mp, nq,
353  $ mem( ipa-iprepad ), desca( lld_ ),
354  $ iprepad, ipostpad, padval )
355  CALL pschekpad( ictxt, 'PSLANGE', worksiz-ipostpad,
356  $ 1, mem( ipw-iprepad ),
357  $ worksiz-ipostpad, iprepad, ipostpad,
358  $ padval )
359  END IF
360 *
361  IF( est .AND. m.EQ.n ) THEN
362  CALL psmatgen( ictxt, 'No transpose', 'No transpose',
363  $ desca( m_ ), desca( n_ ), desca( mb_ ),
364  $ desca( nb_ ), mem( ipa0 ),
365  $ desca( lld_ ), desca( rsrc_ ),
366  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
367  $ myrow, mycol, nprow, npcol )
368  IF( check )
369  $ CALL psfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
370  $ desca( lld_ ), iprepad, ipostpad,
371  $ padval )
372  END IF
373 *
374  CALL slboot()
375  CALL blacs_barrier( ictxt, 'All' )
376  CALL sltimer( 1 )
377 *
378 * Perform LU factorization
379 *
380  CALL psgetrf( m, n, mem( ipa ), 1, 1, desca,
381  $ mem( ippiv ), info )
382 *
383  CALL sltimer( 1 )
384 *
385  IF( info.NE.0 ) THEN
386  IF( iam.EQ.0 )
387  $ WRITE( nout, fmt = * ) 'PSGETRF INFO=', info
388  kfail = kfail + 1
389  rcond = zero
390  GO TO 30
391  END IF
392 *
393  IF( check ) THEN
394 *
395 * Check for memory overwrite in LU factorization
396 *
397  CALL pschekpad( ictxt, 'PSGETRF', mp, nq,
398  $ mem( ipa-iprepad ), desca( lld_ ),
399  $ iprepad, ipostpad, padval )
400  CALL pschekpad( ictxt, 'PSGETRF', lipiv, 1,
401  $ mem( ippiv-iprepad ), lipiv, iprepad,
402  $ ipostpad, padval )
403  END IF
404 *
405  IF( m.NE.n ) THEN
406 *
407 * For non-square matrices, factorization only
408 *
409  nrhs = 0
410  nbrhs = 0
411 *
412  IF( check ) THEN
413 *
414 * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
415 *
416  CALL psgetrrv( m, n, mem( ipa ), 1, 1, desca,
417  $ mem( ippiv ), mem( ipw ) )
418  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
419  $ desca, iaseed, anorm, fresid,
420  $ mem( ipw ) )
421 *
422 * Check for memory overwrite
423 *
424  CALL pschekpad( ictxt, 'PSGETRRV', mp, nq,
425  $ mem( ipa-iprepad ), desca( lld_ ),
426  $ iprepad, ipostpad, padval )
427  CALL pschekpad( ictxt, 'PSGETRRV', lipiv, 1,
428  $ mem( ippiv-iprepad ), lipiv,
429  $ iprepad, ipostpad, padval )
430  CALL pschekpad( ictxt, 'PSGETRRV',
431  $ worksiz-ipostpad, 1,
432  $ mem( ipw-iprepad ),
433  $ worksiz-ipostpad, iprepad,
434  $ ipostpad, padval )
435 *
436 * Test residual and detect NaN result
437 *
438  IF( ( fresid.LE.thresh ) .AND.
439  $ ( (fresid-fresid).EQ.0.0e+0 ) ) THEN
440  kpass = kpass + 1
441  passed = 'PASSED'
442  ELSE
443  kfail = kfail + 1
444  passed = 'FAILED'
445  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
446  $ WRITE( nout, fmt = 9986 ) fresid
447  END IF
448 *
449  ELSE
450 *
451 * Don't perform the checking, only timing
452 *
453  kpass = kpass + 1
454  fresid = fresid - fresid
455  passed = 'BYPASS'
456 *
457  END IF
458 *
459 * Gather maximum of all CPU and WALL clock timings
460 *
461  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
462  $ wtime )
463  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
464  $ ctime )
465 *
466 * Print results
467 *
468  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
469 *
470  maxmn = max( m, n )
471  minmn = min( m, n )
472 *
473 * M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU
474 * factorization when M >= N
475 *
476  nops = dble( maxmn )*( dble( minmn )**2 ) -
477  $ (1.0d+0 / 3.0d+0)*( dble( minmn )**3 ) -
478  $ (1.0d+0 / 2.0d+0)*( dble( minmn )**2 )
479 *
480 * Calculate total megaflops -- factorization only,
481 * -- for WALL and CPU time, and print output
482 *
483 * Print WALL time if machine supports it
484 *
485  IF( wtime( 1 ).GT.0.0d+0 ) THEN
486  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
487  ELSE
488  tmflops = 0.0d+0
489  END IF
490 *
491  wtime( 2 ) = 0.0d+0
492  IF( wtime( 1 ).GE.0.0d+0 )
493  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
494  $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
495  $ wtime( 2 ), tmflops, passed
496 *
497 * Print CPU time if machine supports it
498 *
499  IF( ctime( 1 ).GT.0.0d+0 ) THEN
500  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
501  ELSE
502  tmflops = 0.0d+0
503  END IF
504 *
505  ctime( 2 ) = 0.0d+0
506  IF( ctime( 1 ).GE.0.0d+0 )
507  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
508  $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
509  $ ctime( 2 ), tmflops, passed
510  END IF
511 *
512  ELSE
513 *
514 * If M = N
515 *
516  IF( est ) THEN
517 *
518 * Calculate workspace required for PSGECON
519 *
520  lwork = max( 1, 2*np ) + max( 1, 2*nq ) +
521  $ max( 2, desca( nb_ )*
522  $ max( 1, iceil( nprow-1, npcol ) ),
523  $ nq + desca( nb_ )*
524  $ max( 1, iceil( npcol-1, nprow ) ) )
525  ipw2 = ipw + lwork + ipostpad + iprepad
526  liwork = max( 1, np )
527  lw2 = iceil( liwork*intgsz, realsz ) + ipostpad
528 *
529  ierr( 1 ) = 0
530  IF( ipw2+lw2.GT.memsiz ) THEN
531  IF( iam.EQ.0 )
532  $ WRITE( nout, fmt = 9996 )'cond est',
533  $ ( ipw2+lw2 )*realsz
534  ierr( 1 ) = 1
535  END IF
536 *
537 * Check all processes for an error
538 *
539  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
540  $ -1, 0 )
541 *
542  IF( ierr( 1 ).GT.0 ) THEN
543  IF( iam.EQ.0 )
544  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
545  kskip = kskip + 1
546  GO TO 30
547  END IF
548 *
549  IF( check ) THEN
550  CALL psfillpad( ictxt, lwork, 1,
551  $ mem( ipw-iprepad ), lwork,
552  $ iprepad, ipostpad, padval )
553  CALL psfillpad( ictxt, lw2-ipostpad, 1,
554  $ mem( ipw2-iprepad ),
555  $ lw2-ipostpad, iprepad,
556  $ ipostpad, padval )
557  END IF
558 *
559 * Compute condition number of the matrix
560 *
561  CALL psgecon( '1', n, mem( ipa ), 1, 1, desca,
562  $ anorm1, rcond, mem( ipw ), lwork,
563  $ mem( ipw2 ), liwork, info )
564 *
565  IF( check ) THEN
566  CALL pschekpad( ictxt, 'PSGECON', np, nq,
567  $ mem( ipa-iprepad ),
568  $ desca( lld_ ), iprepad,
569  $ ipostpad, padval )
570  CALL pschekpad( ictxt, 'PSGECON', lwork, 1,
571  $ mem( ipw-iprepad ), lwork,
572  $ iprepad, ipostpad, padval )
573  CALL pschekpad( ictxt, 'PSGECON',
574  $ lw2-ipostpad, 1,
575  $ mem( ipw2-iprepad ),
576  $ lw2-ipostpad, iprepad,
577  $ ipostpad, padval )
578  END IF
579  END IF
580 *
581 * Loop over the different values for NRHS
582 *
583  DO 20 hh = 1, nnr
584 *
585  nrhs = nrval( hh )
586 *
587  DO 10 kk = 1, nnbr
588 *
589  nbrhs = nbrval( kk )
590 *
591 * Initialize Array Descriptor for rhs
592 *
593  CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
594  $ ictxt, max( 1, np )+imidpad,
595  $ ierr( 1 ) )
596 *
597 * Check all processes for an error
598 *
599  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
600  $ -1, 0 )
601 *
602  IF( ierr( 1 ).LT.0 ) THEN
603  IF( iam.EQ.0 )
604  $ WRITE( nout, fmt = 9997 ) 'descriptor'
605  kskip = kskip + 1
606  GO TO 10
607  END IF
608 *
609 * move IPW to allow room for RHS
610 *
611  myrhs = numroc( descb( n_ ), descb( nb_ ),
612  $ mycol, descb( csrc_ ), npcol )
613  ipb = ipw
614 *
615  IF( est ) THEN
616  ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
617  $ iprepad
618  ipferr = ipb0 + descb( lld_ )*myrhs +
619  $ ipostpad + iprepad
620  ipberr = myrhs + ipferr + ipostpad + iprepad
621  ipw = myrhs + ipberr + ipostpad + iprepad
622  ELSE
623  ipw = ipb + descb( lld_ )*myrhs + ipostpad +
624  $ iprepad
625  END IF
626 *
627 * Set worksiz: routines requiring most workspace
628 * is PSLASCHK
629 *
630  IF( check ) THEN
631  lcm = ilcm( nprow, npcol )
632  lcmq = lcm / npcol
633  worksiz = max( worksiz-ipostpad,
634  $ nq * nbrhs + np * nbrhs +
635  $ max( max( nq*nb, 2*nbrhs ),
636  $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
637  $ 0,0,lcmq ) ) )
638  worksiz = ipostpad + worksiz
639  ELSE
640  worksiz = ipostpad
641  END IF
642 *
643  ierr( 1 ) = 0
644  IF( ipw+worksiz.GT.memsiz ) THEN
645  IF( iam.EQ.0 )
646  $ WRITE( nout, fmt = 9996 )'solve',
647  $ ( ipw+worksiz )*realsz
648  ierr( 1 ) = 1
649  END IF
650 *
651 * Check all processes for an error
652 *
653  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
654  $ -1, 0 )
655 *
656  IF( ierr( 1 ).GT.0 ) THEN
657  IF( iam.EQ.0 )
658  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
659  kskip = kskip + 1
660  GO TO 10
661  END IF
662 *
663 * Generate RHS
664 *
665  CALL psmatgen( ictxt, 'No', 'No', descb( m_ ),
666  $ descb( n_ ), descb( mb_ ),
667  $ descb( nb_ ), mem( ipb ),
668  $ descb( lld_ ), descb( rsrc_ ),
669  $ descb( csrc_ ), ibseed, 0, np, 0,
670  $ myrhs, myrow, mycol, nprow,
671  $ npcol )
672 *
673  IF( check )
674  $ CALL psfillpad( ictxt, np, myrhs,
675  $ mem( ipb-iprepad ),
676  $ descb( lld_ ), iprepad,
677  $ ipostpad, padval )
678 *
679  IF( est ) THEN
680  CALL psmatgen( ictxt, 'No', 'No',
681  $ descb( m_ ), descb( n_ ),
682  $ descb( mb_ ), descb( nb_ ),
683  $ mem( ipb0 ), descb( lld_ ),
684  $ descb( rsrc_ ),
685  $ descb( csrc_ ), ibseed, 0, np,
686  $ 0, myrhs, myrow, mycol, nprow,
687  $ npcol )
688  IF( check ) THEN
689  CALL psfillpad( ictxt, np, myrhs,
690  $ mem( ipb0-iprepad ),
691  $ descb( lld_ ), iprepad,
692  $ ipostpad, padval )
693  CALL psfillpad( ictxt, 1, myrhs,
694  $ mem( ipferr-iprepad ), 1,
695  $ iprepad, ipostpad,
696  $ padval )
697  CALL psfillpad( ictxt, 1, myrhs,
698  $ mem( ipberr-iprepad ), 1,
699  $ iprepad, ipostpad,
700  $ padval )
701  END IF
702  END IF
703 *
704  CALL blacs_barrier( ictxt, 'All' )
705  CALL sltimer( 2 )
706 *
707 * Solve linear sytem via LU factorization
708 *
709  CALL psgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
710  $ desca, mem( ippiv ), mem( ipb ),
711  $ 1, 1, descb, info )
712 *
713  CALL sltimer( 2 )
714 *
715  IF( check ) THEN
716 *
717 * check for memory overwrite
718 *
719  CALL pschekpad( ictxt, 'PSGETRS', np, nq,
720  $ mem( ipa-iprepad ),
721  $ desca( lld_ ), iprepad,
722  $ ipostpad, padval )
723  CALL pschekpad( ictxt, 'PSGETRS', lipiv, 1,
724  $ mem( ippiv-iprepad ), lipiv,
725  $ iprepad, ipostpad, padval )
726  CALL pschekpad( ictxt, 'PSGETRS', np,
727  $ myrhs, mem( ipb-iprepad ),
728  $ descb( lld_ ), iprepad,
729  $ ipostpad, padval )
730 *
731  CALL psfillpad( ictxt, worksiz-ipostpad,
732  $ 1, mem( ipw-iprepad ),
733  $ worksiz-ipostpad, iprepad,
734  $ ipostpad, padval )
735 *
736 * check the solution to rhs
737 *
738  CALL pslaschk( 'No', 'N', n, nrhs,
739  $ mem( ipb ), 1, 1, descb,
740  $ iaseed, 1, 1, desca, ibseed,
741  $ anorm, sresid, mem( ipw ) )
742 *
743  IF( iam.EQ.0 .AND. sresid.GT.thresh )
744  $ WRITE( nout, fmt = 9985 ) sresid
745 *
746 * check for memory overwrite
747 *
748  CALL pschekpad( ictxt, 'PSLASCHK', np,
749  $ myrhs, mem( ipb-iprepad ),
750  $ descb( lld_ ), iprepad,
751  $ ipostpad, padval )
752  CALL pschekpad( ictxt, 'PSLASCHK',
753  $ worksiz-ipostpad, 1,
754  $ mem( ipw-iprepad ),
755  $ worksiz-ipostpad,
756  $ iprepad, ipostpad, padval )
757 *
758 * The second test is a NaN trap
759 *
760  IF( sresid.LE.thresh .AND.
761  $ ( sresid-sresid ).EQ.0.0e+0 ) THEN
762  kpass = kpass + 1
763  passed = 'PASSED'
764  ELSE
765  kfail = kfail + 1
766  passed = 'FAILED'
767  END IF
768  ELSE
769  kpass = kpass + 1
770  sresid = sresid - sresid
771  passed = 'BYPASS'
772  END IF
773 *
774  IF( est ) THEN
775 *
776 * Calculate workspace required for PSGERFS
777 *
778  lwork = max( 1, 3*np )
779  ipw2 = ipw + lwork + ipostpad + iprepad
780  liwork = max( 1, np )
781  lw2 = iceil( liwork*intgsz, realsz ) +
782  $ ipostpad
783 *
784  ierr( 1 ) = 0
785  IF( ipw2+lw2.GT.memsiz ) THEN
786  IF( iam.EQ.0 )
787  $ WRITE( nout, fmt = 9996 )
788  $ 'iter ref', ( ipw2+lw2 )*realsz
789  ierr( 1 ) = 1
790  END IF
791 *
792 * Check all processes for an error
793 *
794  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
795  $ ierr, 1, -1, 0 )
796 *
797  IF( ierr( 1 ).GT.0 ) THEN
798  IF( iam.EQ.0 )
799  $ WRITE( nout, fmt = 9997 )
800  $ 'MEMORY'
801  kskip = kskip + 1
802  GO TO 10
803  END IF
804 *
805  IF( check ) THEN
806  CALL psfillpad( ictxt, lwork, 1,
807  $ mem( ipw-iprepad ),
808  $ lwork, iprepad, ipostpad,
809  $ padval )
810  CALL psfillpad( ictxt, lw2-ipostpad, 1,
811  $ mem( ipw2-iprepad ),
812  $ lw2-ipostpad, iprepad,
813  $ ipostpad, padval )
814  END IF
815 *
816 * Use iterative refinement to improve the
817 * computed solution
818 *
819  CALL psgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
820  $ 1, desca, mem( ipa ), 1, 1,
821  $ desca, mem( ippiv ),
822  $ mem( ipb0 ), 1, 1, descb,
823  $ mem( ipb ), 1, 1, descb,
824  $ mem( ipferr ), mem( ipberr ),
825  $ mem( ipw ), lwork, mem( ipw2 ),
826  $ liwork, info )
827 *
828  IF( check ) THEN
829  CALL pschekpad( ictxt, 'PSGERFS', np,
830  $ nq, mem( ipa0-iprepad ),
831  $ desca( lld_ ), iprepad,
832  $ ipostpad, padval )
833  CALL pschekpad( ictxt, 'PSGERFS', np,
834  $ nq, mem( ipa-iprepad ),
835  $ desca( lld_ ), iprepad,
836  $ ipostpad, padval )
837  CALL pschekpad( ictxt, 'PSGERFS', lipiv,
838  $ 1, mem( ippiv-iprepad ),
839  $ lipiv, iprepad,
840  $ ipostpad, padval )
841  CALL pschekpad( ictxt, 'PSGERFS', np,
842  $ myrhs, mem( ipb-iprepad ),
843  $ descb( lld_ ), iprepad,
844  $ ipostpad, padval )
845  CALL pschekpad( ictxt, 'PSGERFS', np,
846  $ myrhs,
847  $ mem( ipb0-iprepad ),
848  $ descb( lld_ ), iprepad,
849  $ ipostpad, padval )
850  CALL pschekpad( ictxt, 'PSGERFS', 1,
851  $ myrhs,
852  $ mem( ipferr-iprepad ), 1,
853  $ iprepad, ipostpad,
854  $ padval )
855  CALL pschekpad( ictxt, 'PSGERFS', 1,
856  $ myrhs,
857  $ mem( ipberr-iprepad ), 1,
858  $ iprepad, ipostpad,
859  $ padval )
860  CALL pschekpad( ictxt, 'PSGERFS', lwork,
861  $ 1, mem( ipw-iprepad ),
862  $ lwork, iprepad, ipostpad,
863  $ padval )
864  CALL pschekpad( ictxt, 'PSGERFS',
865  $ lw2-ipostpad, 1,
866  $ mem( ipw2-iprepad ),
867  $ lw2-ipostpad, iprepad,
868  $ ipostpad, padval )
869 *
870  CALL psfillpad( ictxt, worksiz-ipostpad,
871  $ 1, mem( ipw-iprepad ),
872  $ worksiz-ipostpad, iprepad,
873  $ ipostpad, padval )
874 *
875 * check the solution to rhs
876 *
877  CALL pslaschk( 'No', 'N', n, nrhs,
878  $ mem( ipb ), 1, 1, descb,
879  $ iaseed, 1, 1, desca,
880  $ ibseed, anorm, sresid2,
881  $ mem( ipw ) )
882 *
883  IF( iam.EQ.0 .AND. sresid2.GT.thresh )
884  $ WRITE( nout, fmt = 9985 ) sresid2
885 *
886 * check for memory overwrite
887 *
888  CALL pschekpad( ictxt, 'PSLASCHK', np,
889  $ myrhs, mem( ipb-iprepad ),
890  $ descb( lld_ ), iprepad,
891  $ ipostpad, padval )
892  CALL pschekpad( ictxt, 'PSLASCHK',
893  $ worksiz-ipostpad, 1,
894  $ mem( ipw-iprepad ),
895  $ worksiz-ipostpad, iprepad,
896  $ ipostpad, padval )
897  END IF
898  END IF
899 *
900 * Gather max. of all CPU and WALL clock timings
901 *
902  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
903  $ wtime )
904  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
905  $ ctime )
906 *
907 * Print results
908 *
909  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
910 *
911 * 2/3 N^3 - 1/2 N^2 flops for LU factorization
912 *
913  nops = (2.0d+0/3.0d+0)*( dble(n)**3 ) -
914  $ (1.0d+0/2.0d+0)*( dble(n)**2 )
915 *
916 * nrhs * 2 N^2 flops for LU solve.
917 *
918  nops = nops + 2.0d+0*(dble(n)**2)*dble(nrhs)
919 *
920 * Calculate total megaflops -- factorization
921 * and solve -- for WALL and CPU time, and print
922 * output
923 *
924 * Print WALL time if machine supports it
925 *
926  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
927  $ THEN
928  tmflops = nops /
929  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
930  ELSE
931  tmflops = 0.0d+0
932  END IF
933 *
934 * Print WALL time if supported
935 *
936  IF( wtime( 2 ).GE.0.0d+0 )
937  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
938  $ nb, nrhs, nbrhs, nprow, npcol,
939  $ wtime( 1 ), wtime( 2 ), tmflops,
940  $ passed
941 *
942 * Print CPU time if supported
943 *
944  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
945  $ THEN
946  tmflops = nops /
947  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
948  ELSE
949  tmflops = 0.0d+0
950  END IF
951 *
952  IF( ctime( 2 ).GE.0.0d+0 )
953  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
954  $ nb, nrhs, nbrhs, nprow, npcol,
955  $ ctime( 1 ), ctime( 2 ), tmflops,
956  $ passed
957  END IF
958  10 CONTINUE
959  20 CONTINUE
960 *
961  IF( check.AND.( sresid.GT.thresh ) ) THEN
962 *
963 * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
964 *
965  CALL psgetrrv( m, n, mem( ipa ), 1, 1, desca,
966  $ mem( ippiv ), mem( ipw ) )
967  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
968  $ 1, desca, iaseed, anorm, fresid,
969  $ mem( ipw ) )
970 *
971 * Check for memory overwrite
972 *
973  CALL pschekpad( ictxt, 'PSGETRRV', np, nq,
974  $ mem( ipa-iprepad ), desca( lld_ ),
975  $ iprepad, ipostpad, padval )
976  CALL pschekpad( ictxt, 'PSGETRRV', lipiv,
977  $ 1, mem( ippiv-iprepad ), lipiv,
978  $ iprepad, ipostpad, padval )
979  CALL pschekpad( ictxt, 'PSGETRRV',
980  $ worksiz-ipostpad, 1,
981  $ mem( ipw-iprepad ),
982  $ worksiz-ipostpad, iprepad,
983  $ ipostpad, padval )
984 *
985  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
986  $ WRITE( nout, fmt = 9986 ) fresid
987  END IF
988  END IF
989  30 CONTINUE
990  40 CONTINUE
991  CALL blacs_gridexit( ictxt )
992  50 CONTINUE
993 *
994 * Print ending messages and close output file
995 *
996  60 CONTINUE
997  IF( iam.EQ.0 ) THEN
998  ktests = kpass + kfail + kskip
999  WRITE( nout, fmt = * )
1000  WRITE( nout, fmt = 9992 ) ktests
1001  IF( check ) THEN
1002  WRITE( nout, fmt = 9991 ) kpass
1003  WRITE( nout, fmt = 9989 ) kfail
1004  ELSE
1005  WRITE( nout, fmt = 9990 ) kpass
1006  END IF
1007  WRITE( nout, fmt = 9988 ) kskip
1008  WRITE( nout, fmt = * )
1009  WRITE( nout, fmt = * )
1010  WRITE( nout, fmt = 9987 )
1011  IF( nout.NE.6 .AND. nout.NE.0 )
1012  $ CLOSE( nout )
1013  END IF
1014 *
1015  CALL blacs_exit( 0 )
1016 *
1017  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1018  $ '; It should be at least 1' )
1019  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1020  $ i4 )
1021  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1022  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1023  $ i11 )
1024  9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1025  $ 'Sol Time MFLOPS CHECK' )
1026  9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1027  $ '-------- -------- ------' )
1028  9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1029  $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1030  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1031  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1032  9990 FORMAT( i5, ' tests completed without checking.' )
1033  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1034  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1035  9987 FORMAT( 'END OF TESTS.' )
1036  9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1037  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1038 *
1039  stop
1040 *
1041 * End of PSLUDRIVER
1042 *
1043  END
pslafchk
subroutine pslafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pslafchk.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
psludriver
program psludriver
Definition: psludriver.f:1
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
psgetrs
subroutine psgetrs(TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO)
Definition: psgetrs.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
psluinfo
subroutine psluinfo(SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, EST, WORK, IAM, NPROCS)
Definition: psluinfo.f:5
pschekpad
subroutine pschekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pschekpad.f:3
psgetrf
subroutine psgetrf(M, N, A, IA, JA, DESCA, IPIV, INFO)
Definition: psgetrf.f:2
psgecon
subroutine psgecon(NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psgecon.f:3
psgetrrv
subroutine psgetrrv(M, N, A, IA, JA, DESCA, IPIV, WORK)
Definition: psgetrrv.f:2
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
psmatgen
subroutine psmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: psmatgen.f:4
psgerfs
subroutine psgerfs(TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psgerfs.f:5
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
psfillpad
subroutine psfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psfillpad.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pslaschk
subroutine pslaschk(SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK)
Definition: pslaschk.f:4
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2