ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdludriver.f
Go to the documentation of this file.
1  PROGRAM pdludriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * Purpose
9 * ========
10 *
11 * PDLUDRIVER is the main test program for the DOUBLE PRECISION
12 * SCALAPACK LU routines. This test driver performs an LU factorization
13 * and solve. If the input matrix is non-square, only the factorization
14 * is performed. Condition estimation and iterative refinement are
15 * optionally performed.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 18 lines:
20 * 'SCALAPACK, Version 2.0, LU factorization input file'
21 * 'Intel iPSC/860 hypercube, gamma model.'
22 * 'LU.out' output file name (if any)
23 * 6 device out
24 * 1 number of problems sizes
25 * 31 201 values of M
26 * 31 201 values of N
27 * 1 number of NB's
28 * 2 values of NB
29 * 1 number of NRHS's
30 * 1 values of NRHS
31 * 1 number of NBRHS's
32 * 1 values of NBRHS
33 * 1 number of process grids (ordered pairs of P & Q)
34 * 2 1 4 2 3 8 values of P
35 * 2 4 1 3 2 1 values of Q
36 * 1.0 threshold
37 * T (T or F) Test Cond. Est. and Iter. Ref. Routines
38 *
39 *
40 * Internal Parameters
41 * ===================
42 *
43 * TOTMEM INTEGER, default = 2000000
44 * TOTMEM is a machine-specific parameter indicating the
45 * maximum amount of available memory in bytes.
46 * The user should customize TOTMEM to his platform. Remember
47 * to leave room in memory for the operating system, the BLACS
48 * buffer, etc. For example, on a system with 8 MB of memory
49 * per process (e.g., one processor on an Intel iPSC/860), the
50 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51 * code, BLACS buffer, etc). However, for PVM, we usually set
52 * TOTMEM = 2000000. Some experimenting with the maximum value
53 * of TOTMEM may be required.
54 *
55 * INTGSZ INTEGER, default = 4 bytes.
56 * DBLESZ INTEGER, default = 8 bytes.
57 * INTGSZ and DBLESZ indicate the length in bytes on the
58 * given platform for an integer and a double precision real.
59 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
60 *
61 * All arrays used by SCALAPACK routines are allocated from
62 * this array and referenced by pointers. The integer IPA,
63 * for example, is a pointer to the starting element of MEM for
64 * the matrix A.
65 *
66 * =====================================================================
67 *
68 * .. Parameters ..
69  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70  $ lld_, mb_, m_, nb_, n_, rsrc_
71  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74  INTEGER dblesz, intgsz, memsiz, ntests, totmem
75  DOUBLE PRECISION padval, zero
76  parameter( dblesz = 8, intgsz = 4, totmem = 4000000,
77  $ memsiz = totmem / dblesz, ntests = 20,
78  $ padval = -9923.0d+0, zero = 0.0d+0 )
79 * ..
80 * .. Local Scalars ..
81  LOGICAL check, est
82  CHARACTER*6 passed
83  CHARACTER*80 outfile
84  INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85  $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86  $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87  $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88  $ lipiv, liwork, lwork, lw2, m, maxmn,
89  $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90  $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91  $ nprocs, nprow, nq, nrhs, worksiz
92  REAL thresh
93  DOUBLE PRECISION anorm, anorm1, fresid, nops, rcond,
94  $ sresid, sresid2, tmflops
95 * ..
96 * .. Local Arrays ..
97  INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98  $ mval( ntests ), nbrval( ntests ),
99  $ nbval( ntests ), nrval( ntests ),
100  $ nval( ntests ), pval( ntests ),
101  $ qval( ntests )
102  DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
103 * ..
104 * .. External Subroutines ..
105  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
106  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
107  $ blacs_pinfo, descinit, igsum2d, pdchekpad,
112 * ..
113 * .. External Functions ..
114  INTEGER iceil, ilcm, numroc
115  DOUBLE PRECISION pdlange
116  EXTERNAL iceil, ilcm, numroc, pdlange
117 * ..
118 * .. Intrinsic Functions ..
119  INTRINSIC dble, max, min
120 * ..
121 * .. Data Statements ..
122  DATA kfail, kpass, kskip, ktests / 4*0 /
123 * ..
124 * .. Executable Statements ..
125 *
126 * Get starting information
127 *
128  CALL blacs_pinfo( iam, nprocs )
129  iaseed = 100
130  ibseed = 200
131  CALL pdluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
132  $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
133  $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
134  $ est, mem, iam, nprocs )
135  check = ( thresh.GE.0.0e+0 )
136 *
137 * Print headings
138 *
139  IF( iam.EQ.0 ) THEN
140  WRITE( nout, fmt = * )
141  WRITE( nout, fmt = 9995 )
142  WRITE( nout, fmt = 9994 )
143  WRITE( nout, fmt = * )
144  END IF
145 *
146 * Loop over different process grids
147 *
148  DO 50 i = 1, ngrids
149 *
150  nprow = pval( i )
151  npcol = qval( i )
152 *
153 * Make sure grid information is correct
154 *
155  ierr( 1 ) = 0
156  IF( nprow.LT.1 ) THEN
157  IF( iam.EQ.0 )
158  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
159  ierr( 1 ) = 1
160  ELSE IF( npcol.LT.1 ) THEN
161  IF( iam.EQ.0 )
162  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
163  ierr( 1 ) = 1
164  ELSE IF( nprow*npcol.GT.nprocs ) THEN
165  IF( iam.EQ.0 )
166  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
167  ierr( 1 ) = 1
168  END IF
169 *
170  IF( ierr( 1 ).GT.0 ) THEN
171  IF( iam.EQ.0 )
172  $ WRITE( nout, fmt = 9997 ) 'grid'
173  kskip = kskip + 1
174  GO TO 50
175  END IF
176 *
177 * Define process grid
178 *
179  CALL blacs_get( -1, 0, ictxt )
180  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
181  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
182 *
183 * Go to bottom of process grid loop if this case doesn't use my
184 * process
185 *
186  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
187  $ GO TO 50
188 *
189  DO 40 j = 1, nmat
190 *
191  m = mval( j )
192  n = nval( j )
193 *
194 * Make sure matrix information is correct
195 *
196  ierr( 1 ) = 0
197  IF( m.LT.1 ) THEN
198  IF( iam.EQ.0 )
199  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
200  ierr( 1 ) = 1
201  ELSE IF( n.LT.1 ) THEN
202  IF( iam.EQ.0 )
203  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
204  ierr( 1 ) = 1
205  END IF
206 *
207 * Check all processes for an error
208 *
209  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
210 *
211  IF( ierr( 1 ).GT.0 ) THEN
212  IF( iam.EQ.0 )
213  $ WRITE( nout, fmt = 9997 ) 'matrix'
214  kskip = kskip + 1
215  GO TO 40
216  END IF
217 *
218  DO 30 k = 1, nnb
219 *
220  nb = nbval( k )
221 *
222 * Make sure nb is legal
223 *
224  ierr( 1 ) = 0
225  IF( nb.LT.1 ) THEN
226  ierr( 1 ) = 1
227  IF( iam.EQ.0 )
228  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
229  END IF
230 *
231 * Check all processes for an error
232 *
233  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
234 *
235  IF( ierr( 1 ).GT.0 ) THEN
236  IF( iam.EQ.0 )
237  $ WRITE( nout, fmt = 9997 ) 'NB'
238  kskip = kskip + 1
239  GO TO 30
240  END IF
241 *
242 * Padding constants
243 *
244  mp = numroc( m, nb, myrow, 0, nprow )
245  np = numroc( n, nb, myrow, 0, nprow )
246  nq = numroc( n, nb, mycol, 0, npcol )
247  IF( check ) THEN
248  iprepad = max( nb, mp )
249  imidpad = nb
250  ipostpad = max( nb, nq )
251  ELSE
252  iprepad = 0
253  imidpad = 0
254  ipostpad = 0
255  END IF
256 *
257 * Initialize the array descriptor for the matrix A
258 *
259  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
260  $ max( 1, mp )+imidpad, ierr( 1 ) )
261 *
262 * Check all processes for an error
263 *
264  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
265 *
266  IF( ierr( 1 ).LT.0 ) THEN
267  IF( iam.EQ.0 )
268  $ WRITE( nout, fmt = 9997 ) 'descriptor'
269  kskip = kskip + 1
270  GO TO 30
271  END IF
272 *
273 * Assign pointers into MEM for SCALAPACK arrays, A is
274 * allocated starting at position MEM( IPREPAD+1 )
275 *
276  ipa = iprepad+1
277  IF( est .AND. m.EQ.n ) THEN
278  ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
279  ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
280  ELSE
281  ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
282  END IF
283  lipiv = iceil( intgsz*( mp+nb ), dblesz )
284  ipw = ippiv + lipiv + ipostpad + iprepad
285 *
286  IF( check ) THEN
287 *
288 * Calculate the amount of workspace required by the
289 * checking routines PDLANGE, PDGETRRV, and
290 * PDLAFCHK
291 *
292  worksiz = max( 2, nq )
293 *
294  worksiz = max( worksiz, mp*desca( nb_ )+
295  $ nq*desca( mb_ ) )
296 *
297  worksiz = max( worksiz, mp * desca( nb_ ) )
298 *
299  worksiz = worksiz + ipostpad
300 *
301  ELSE
302 *
303  worksiz = ipostpad
304 *
305  END IF
306 *
307 * Check for adequate memory for problem size
308 *
309  ierr( 1 ) = 0
310  IF( ipw+worksiz.GT.memsiz ) THEN
311  IF( iam.EQ.0 )
312  $ WRITE( nout, fmt = 9996 ) 'factorization',
313  $ ( ipw+worksiz )*dblesz
314  ierr( 1 ) = 1
315  END IF
316 *
317 * Check all processes for an error
318 *
319  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
320 *
321  IF( ierr( 1 ).GT.0 ) THEN
322  IF( iam.EQ.0 )
323  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
324  kskip = kskip + 1
325  GO TO 30
326  END IF
327 *
328 * Generate matrix A of Ax = b
329 *
330  CALL pdmatgen( ictxt, 'No transpose', 'No transpose',
331  $ desca( m_ ), desca( n_ ), desca( mb_ ),
332  $ desca( nb_ ), mem( ipa ), desca( lld_ ),
333  $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
334  $ mp, 0, nq, myrow, mycol, nprow, npcol )
335 *
336 * Calculate inf-norm of A for residual error-checking
337 *
338  IF( check ) THEN
339  CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
340  $ desca( lld_ ), iprepad, ipostpad,
341  $ padval )
342  CALL pdfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
343  $ lipiv, iprepad, ipostpad, padval )
344  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
345  $ mem( ipw-iprepad ), worksiz-ipostpad,
346  $ iprepad, ipostpad, padval )
347  anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1, desca,
348  $ mem( ipw ) )
349  anorm1 = pdlange( '1', m, n, mem( ipa ), 1, 1, desca,
350  $ mem( ipw ) )
351  CALL pdchekpad( ictxt, 'PDLANGE', mp, nq,
352  $ mem( ipa-iprepad ), desca( lld_ ),
353  $ iprepad, ipostpad, padval )
354  CALL pdchekpad( ictxt, 'PDLANGE', worksiz-ipostpad,
355  $ 1, mem( ipw-iprepad ),
356  $ worksiz-ipostpad, iprepad, ipostpad,
357  $ padval )
358  END IF
359 *
360  IF( est .AND. m.EQ.n ) THEN
361  CALL pdmatgen( ictxt, 'No transpose', 'No transpose',
362  $ desca( m_ ), desca( n_ ), desca( mb_ ),
363  $ desca( nb_ ), mem( ipa0 ),
364  $ desca( lld_ ), desca( rsrc_ ),
365  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
366  $ myrow, mycol, nprow, npcol )
367  IF( check )
368  $ CALL pdfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
369  $ desca( lld_ ), iprepad, ipostpad,
370  $ padval )
371  END IF
372 *
373  CALL slboot()
374  CALL blacs_barrier( ictxt, 'All' )
375  CALL sltimer( 1 )
376 *
377 * Perform LU factorization
378 *
379  CALL pdgetrf( m, n, mem( ipa ), 1, 1, desca,
380  $ mem( ippiv ), info )
381 *
382  CALL sltimer( 1 )
383 *
384  IF( info.NE.0 ) THEN
385  IF( iam.EQ.0 )
386  $ WRITE( nout, fmt = * ) 'PDGETRF INFO=', info
387  kfail = kfail + 1
388  rcond = zero
389  GO TO 30
390  END IF
391 *
392  IF( check ) THEN
393 *
394 * Check for memory overwrite in LU factorization
395 *
396  CALL pdchekpad( ictxt, 'PDGETRF', mp, nq,
397  $ mem( ipa-iprepad ), desca( lld_ ),
398  $ iprepad, ipostpad, padval )
399  CALL pdchekpad( ictxt, 'PDGETRF', lipiv, 1,
400  $ mem( ippiv-iprepad ), lipiv, iprepad,
401  $ ipostpad, padval )
402  END IF
403 *
404  IF( m.NE.n ) THEN
405 *
406 * For non-square matrices, factorization only
407 *
408  nrhs = 0
409  nbrhs = 0
410 *
411  IF( check ) THEN
412 *
413 * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
414 *
415  CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,
416  $ mem( ippiv ), mem( ipw ) )
417  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
418  $ desca, iaseed, anorm, fresid,
419  $ mem( ipw ) )
420 *
421 * Check for memory overwrite
422 *
423  CALL pdchekpad( ictxt, 'PDGETRRV', mp, nq,
424  $ mem( ipa-iprepad ), desca( lld_ ),
425  $ iprepad, ipostpad, padval )
426  CALL pdchekpad( ictxt, 'PDGETRRV', lipiv, 1,
427  $ mem( ippiv-iprepad ), lipiv,
428  $ iprepad, ipostpad, padval )
429  CALL pdchekpad( ictxt, 'PDGETRRV',
430  $ worksiz-ipostpad, 1,
431  $ mem( ipw-iprepad ),
432  $ worksiz-ipostpad, iprepad,
433  $ ipostpad, padval )
434 *
435 * Test residual and detect NaN result
436 *
437  IF( ( fresid.LE.thresh ) .AND.
438  $ ( (fresid-fresid).EQ.0.0d+0 ) ) THEN
439  kpass = kpass + 1
440  passed = 'PASSED'
441  ELSE
442  kfail = kfail + 1
443  passed = 'FAILED'
444  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
445  $ WRITE( nout, fmt = 9986 ) fresid
446  END IF
447 *
448  ELSE
449 *
450 * Don't perform the checking, only timing
451 *
452  kpass = kpass + 1
453  fresid = fresid - fresid
454  passed = 'BYPASS'
455 *
456  END IF
457 *
458 * Gather maximum of all CPU and WALL clock timings
459 *
460  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
461  $ wtime )
462  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
463  $ ctime )
464 *
465 * Print results
466 *
467  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
468 *
469  maxmn = max( m, n )
470  minmn = min( m, n )
471 *
472 * M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU
473 * factorization when M >= N
474 *
475  nops = dble( maxmn )*( dble( minmn )**2 ) -
476  $ (1.0d+0 / 3.0d+0)*( dble( minmn )**3 ) -
477  $ (1.0d+0 / 2.0d+0)*( dble( minmn )**2 )
478 *
479 * Calculate total megaflops -- factorization only,
480 * -- for WALL and CPU time, and print output
481 *
482 * Print WALL time if machine supports it
483 *
484  IF( wtime( 1 ).GT.0.0d+0 ) THEN
485  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
486  ELSE
487  tmflops = 0.0d+0
488  END IF
489 *
490  wtime( 2 ) = 0.0d+0
491  IF( wtime( 1 ).GE.0.0d+0 )
492  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
493  $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
494  $ wtime( 2 ), tmflops, passed
495 *
496 * Print CPU time if machine supports it
497 *
498  IF( ctime( 1 ).GT.0.0d+0 ) THEN
499  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
500  ELSE
501  tmflops = 0.0d+0
502  END IF
503 *
504  ctime( 2 ) = 0.0d+0
505  IF( ctime( 1 ).GE.0.0d+0 )
506  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
507  $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
508  $ ctime( 2 ), tmflops, passed
509  END IF
510 *
511  ELSE
512 *
513 * If M = N
514 *
515  IF( est ) THEN
516 *
517 * Calculate workspace required for PDGECON
518 *
519  lwork = max( 1, 2*np ) + max( 1, 2*nq ) +
520  $ max( 2, desca( nb_ )*
521  $ max( 1, iceil( nprow-1, npcol ) ),
522  $ nq + desca( nb_ )*
523  $ max( 1, iceil( npcol-1, nprow ) ) )
524  ipw2 = ipw + lwork + ipostpad + iprepad
525  liwork = max( 1, np )
526  lw2 = iceil( liwork*intgsz, dblesz ) + ipostpad
527 *
528  ierr( 1 ) = 0
529  IF( ipw2+lw2.GT.memsiz ) THEN
530  IF( iam.EQ.0 )
531  $ WRITE( nout, fmt = 9996 )'cond est',
532  $ ( ipw2+lw2 )*dblesz
533  ierr( 1 ) = 1
534  END IF
535 *
536 * Check all processes for an error
537 *
538  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
539  $ -1, 0 )
540 *
541  IF( ierr( 1 ).GT.0 ) THEN
542  IF( iam.EQ.0 )
543  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
544  kskip = kskip + 1
545  GO TO 30
546  END IF
547 *
548  IF( check ) THEN
549  CALL pdfillpad( ictxt, lwork, 1,
550  $ mem( ipw-iprepad ), lwork,
551  $ iprepad, ipostpad, padval )
552  CALL pdfillpad( ictxt, lw2-ipostpad, 1,
553  $ mem( ipw2-iprepad ),
554  $ lw2-ipostpad, iprepad,
555  $ ipostpad, padval )
556  END IF
557 *
558 * Compute condition number of the matrix
559 *
560  CALL pdgecon( '1', n, mem( ipa ), 1, 1, desca,
561  $ anorm1, rcond, mem( ipw ), lwork,
562  $ mem( ipw2 ), liwork, info )
563 *
564  IF( check ) THEN
565  CALL pdchekpad( ictxt, 'PDGECON', np, nq,
566  $ mem( ipa-iprepad ),
567  $ desca( lld_ ), iprepad,
568  $ ipostpad, padval )
569  CALL pdchekpad( ictxt, 'PDGECON', lwork, 1,
570  $ mem( ipw-iprepad ), lwork,
571  $ iprepad, ipostpad, padval )
572  CALL pdchekpad( ictxt, 'PDGECON',
573  $ lw2-ipostpad, 1,
574  $ mem( ipw2-iprepad ),
575  $ lw2-ipostpad, iprepad,
576  $ ipostpad, padval )
577  END IF
578  END IF
579 *
580 * Loop over the different values for NRHS
581 *
582  DO 20 hh = 1, nnr
583 *
584  nrhs = nrval( hh )
585 *
586  DO 10 kk = 1, nnbr
587 *
588  nbrhs = nbrval( kk )
589 *
590 * Initialize Array Descriptor for rhs
591 *
592  CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
593  $ ictxt, max( 1, np )+imidpad,
594  $ ierr( 1 ) )
595 *
596 * Check all processes for an error
597 *
598  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
599  $ -1, 0 )
600 *
601  IF( ierr( 1 ).LT.0 ) THEN
602  IF( iam.EQ.0 )
603  $ WRITE( nout, fmt = 9997 ) 'descriptor'
604  kskip = kskip + 1
605  GO TO 10
606  END IF
607 *
608 * move IPW to allow room for RHS
609 *
610  myrhs = numroc( descb( n_ ), descb( nb_ ),
611  $ mycol, descb( csrc_ ), npcol )
612  ipb = ipw
613 *
614  IF( est ) THEN
615  ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
616  $ iprepad
617  ipferr = ipb0 + descb( lld_ )*myrhs +
618  $ ipostpad + iprepad
619  ipberr = myrhs + ipferr + ipostpad + iprepad
620  ipw = myrhs + ipberr + ipostpad + iprepad
621  ELSE
622  ipw = ipb + descb( lld_ )*myrhs + ipostpad +
623  $ iprepad
624  END IF
625 *
626 * Set worksiz: routines requiring most workspace
627 * is PDLASCHK
628 *
629  IF( check ) THEN
630  lcm = ilcm( nprow, npcol )
631  lcmq = lcm / npcol
632  worksiz = max( worksiz-ipostpad,
633  $ nq * nbrhs + np * nbrhs +
634  $ max( max( nq*nb, 2*nbrhs ),
635  $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
636  $ 0,0,lcmq ) ) )
637  worksiz = ipostpad + worksiz
638  ELSE
639  worksiz = ipostpad
640  END IF
641 *
642  ierr( 1 ) = 0
643  IF( ipw+worksiz.GT.memsiz ) THEN
644  IF( iam.EQ.0 )
645  $ WRITE( nout, fmt = 9996 )'solve',
646  $ ( ipw+worksiz )*dblesz
647  ierr( 1 ) = 1
648  END IF
649 *
650 * Check all processes for an error
651 *
652  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
653  $ -1, 0 )
654 *
655  IF( ierr( 1 ).GT.0 ) THEN
656  IF( iam.EQ.0 )
657  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
658  kskip = kskip + 1
659  GO TO 10
660  END IF
661 *
662 * Generate RHS
663 *
664  CALL pdmatgen( ictxt, 'No', 'No', descb( m_ ),
665  $ descb( n_ ), descb( mb_ ),
666  $ descb( nb_ ), mem( ipb ),
667  $ descb( lld_ ), descb( rsrc_ ),
668  $ descb( csrc_ ), ibseed, 0, np, 0,
669  $ myrhs, myrow, mycol, nprow,
670  $ npcol )
671 *
672  IF( check )
673  $ CALL pdfillpad( ictxt, np, myrhs,
674  $ mem( ipb-iprepad ),
675  $ descb( lld_ ), iprepad,
676  $ ipostpad, padval )
677 *
678  IF( est ) THEN
679  CALL pdmatgen( ictxt, 'No', 'No',
680  $ descb( m_ ), descb( n_ ),
681  $ descb( mb_ ), descb( nb_ ),
682  $ mem( ipb0 ), descb( lld_ ),
683  $ descb( rsrc_ ),
684  $ descb( csrc_ ), ibseed, 0, np,
685  $ 0, myrhs, myrow, mycol, nprow,
686  $ npcol )
687  IF( check ) THEN
688  CALL pdfillpad( ictxt, np, myrhs,
689  $ mem( ipb0-iprepad ),
690  $ descb( lld_ ), iprepad,
691  $ ipostpad, padval )
692  CALL pdfillpad( ictxt, 1, myrhs,
693  $ mem( ipferr-iprepad ), 1,
694  $ iprepad, ipostpad,
695  $ padval )
696  CALL pdfillpad( ictxt, 1, myrhs,
697  $ mem( ipberr-iprepad ), 1,
698  $ iprepad, ipostpad,
699  $ padval )
700  END IF
701  END IF
702 *
703  CALL blacs_barrier( ictxt, 'All' )
704  CALL sltimer( 2 )
705 *
706 * Solve linear sytem via LU factorization
707 *
708  CALL pdgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
709  $ desca, mem( ippiv ), mem( ipb ),
710  $ 1, 1, descb, info )
711 *
712  CALL sltimer( 2 )
713 *
714  IF( check ) THEN
715 *
716 * check for memory overwrite
717 *
718  CALL pdchekpad( ictxt, 'PDGETRS', np, nq,
719  $ mem( ipa-iprepad ),
720  $ desca( lld_ ), iprepad,
721  $ ipostpad, padval )
722  CALL pdchekpad( ictxt, 'PDGETRS', lipiv, 1,
723  $ mem( ippiv-iprepad ), lipiv,
724  $ iprepad, ipostpad, padval )
725  CALL pdchekpad( ictxt, 'PDGETRS', np,
726  $ myrhs, mem( ipb-iprepad ),
727  $ descb( lld_ ), iprepad,
728  $ ipostpad, padval )
729 *
730  CALL pdfillpad( ictxt, worksiz-ipostpad,
731  $ 1, mem( ipw-iprepad ),
732  $ worksiz-ipostpad, iprepad,
733  $ ipostpad, padval )
734 *
735 * check the solution to rhs
736 *
737  CALL pdlaschk( 'No', 'N', n, nrhs,
738  $ mem( ipb ), 1, 1, descb,
739  $ iaseed, 1, 1, desca, ibseed,
740  $ anorm, sresid, mem( ipw ) )
741 *
742  IF( iam.EQ.0 .AND. sresid.GT.thresh )
743  $ WRITE( nout, fmt = 9985 ) sresid
744 *
745 * check for memory overwrite
746 *
747  CALL pdchekpad( ictxt, 'PDLASCHK', np,
748  $ myrhs, mem( ipb-iprepad ),
749  $ descb( lld_ ), iprepad,
750  $ ipostpad, padval )
751  CALL pdchekpad( ictxt, 'PDLASCHK',
752  $ worksiz-ipostpad, 1,
753  $ mem( ipw-iprepad ),
754  $ worksiz-ipostpad,
755  $ iprepad, ipostpad, padval )
756 *
757 * The second test is a NaN trap
758 *
759  IF( sresid.LE.thresh .AND.
760  $ ( sresid-sresid ).EQ.0.0d+0 ) THEN
761  kpass = kpass + 1
762  passed = 'PASSED'
763  ELSE
764  kfail = kfail + 1
765  passed = 'FAILED'
766  END IF
767  ELSE
768  kpass = kpass + 1
769  sresid = sresid - sresid
770  passed = 'BYPASS'
771  END IF
772 *
773  IF( est ) THEN
774 *
775 * Calculate workspace required for PDGERFS
776 *
777  lwork = max( 1, 3*np )
778  ipw2 = ipw + lwork + ipostpad + iprepad
779  liwork = max( 1, np )
780  lw2 = iceil( liwork*intgsz, dblesz ) +
781  $ ipostpad
782 *
783  ierr( 1 ) = 0
784  IF( ipw2+lw2.GT.memsiz ) THEN
785  IF( iam.EQ.0 )
786  $ WRITE( nout, fmt = 9996 )
787  $ 'iter ref', ( ipw2+lw2 )*dblesz
788  ierr( 1 ) = 1
789  END IF
790 *
791 * Check all processes for an error
792 *
793  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
794  $ ierr, 1, -1, 0 )
795 *
796  IF( ierr( 1 ).GT.0 ) THEN
797  IF( iam.EQ.0 )
798  $ WRITE( nout, fmt = 9997 )
799  $ 'MEMORY'
800  kskip = kskip + 1
801  GO TO 10
802  END IF
803 *
804  IF( check ) THEN
805  CALL pdfillpad( ictxt, lwork, 1,
806  $ mem( ipw-iprepad ),
807  $ lwork, iprepad, ipostpad,
808  $ padval )
809  CALL pdfillpad( ictxt, lw2-ipostpad, 1,
810  $ mem( ipw2-iprepad ),
811  $ lw2-ipostpad, iprepad,
812  $ ipostpad, padval )
813  END IF
814 *
815 * Use iterative refinement to improve the
816 * computed solution
817 *
818  CALL pdgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
819  $ 1, desca, mem( ipa ), 1, 1,
820  $ desca, mem( ippiv ),
821  $ mem( ipb0 ), 1, 1, descb,
822  $ mem( ipb ), 1, 1, descb,
823  $ mem( ipferr ), mem( ipberr ),
824  $ mem( ipw ), lwork, mem( ipw2 ),
825  $ liwork, info )
826 *
827  IF( check ) THEN
828  CALL pdchekpad( ictxt, 'PDGERFS', np,
829  $ nq, mem( ipa0-iprepad ),
830  $ desca( lld_ ), iprepad,
831  $ ipostpad, padval )
832  CALL pdchekpad( ictxt, 'PDGERFS', np,
833  $ nq, mem( ipa-iprepad ),
834  $ desca( lld_ ), iprepad,
835  $ ipostpad, padval )
836  CALL pdchekpad( ictxt, 'PDGERFS', lipiv,
837  $ 1, mem( ippiv-iprepad ),
838  $ lipiv, iprepad,
839  $ ipostpad, padval )
840  CALL pdchekpad( ictxt, 'PDGERFS', np,
841  $ myrhs, mem( ipb-iprepad ),
842  $ descb( lld_ ), iprepad,
843  $ ipostpad, padval )
844  CALL pdchekpad( ictxt, 'PDGERFS', np,
845  $ myrhs,
846  $ mem( ipb0-iprepad ),
847  $ descb( lld_ ), iprepad,
848  $ ipostpad, padval )
849  CALL pdchekpad( ictxt, 'PDGERFS', 1,
850  $ myrhs,
851  $ mem( ipferr-iprepad ), 1,
852  $ iprepad, ipostpad,
853  $ padval )
854  CALL pdchekpad( ictxt, 'PDGERFS', 1,
855  $ myrhs,
856  $ mem( ipberr-iprepad ), 1,
857  $ iprepad, ipostpad,
858  $ padval )
859  CALL pdchekpad( ictxt, 'PDGERFS', lwork,
860  $ 1, mem( ipw-iprepad ),
861  $ lwork, iprepad, ipostpad,
862  $ padval )
863  CALL pdchekpad( ictxt, 'PDGERFS',
864  $ lw2-ipostpad, 1,
865  $ mem( ipw2-iprepad ),
866  $ lw2-ipostpad, iprepad,
867  $ ipostpad, padval )
868 *
869  CALL pdfillpad( ictxt, worksiz-ipostpad,
870  $ 1, mem( ipw-iprepad ),
871  $ worksiz-ipostpad, iprepad,
872  $ ipostpad, padval )
873 *
874 * check the solution to rhs
875 *
876  CALL pdlaschk( 'No', 'N', n, nrhs,
877  $ mem( ipb ), 1, 1, descb,
878  $ iaseed, 1, 1, desca,
879  $ ibseed, anorm, sresid2,
880  $ mem( ipw ) )
881 *
882  IF( iam.EQ.0 .AND. sresid2.GT.thresh )
883  $ WRITE( nout, fmt = 9985 ) sresid2
884 *
885 * check for memory overwrite
886 *
887  CALL pdchekpad( ictxt, 'PDLASCHK', np,
888  $ myrhs, mem( ipb-iprepad ),
889  $ descb( lld_ ), iprepad,
890  $ ipostpad, padval )
891  CALL pdchekpad( ictxt, 'PDLASCHK',
892  $ worksiz-ipostpad, 1,
893  $ mem( ipw-iprepad ),
894  $ worksiz-ipostpad, iprepad,
895  $ ipostpad, padval )
896  END IF
897  END IF
898 *
899 * Gather max. of all CPU and WALL clock timings
900 *
901  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
902  $ wtime )
903  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
904  $ ctime )
905 *
906 * Print results
907 *
908  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
909 *
910 * 2/3 N^3 - 1/2 N^2 flops for LU factorization
911 *
912  nops = (2.0d+0/3.0d+0)*( dble(n)**3 ) -
913  $ (1.0d+0/2.0d+0)*( dble(n)**2 )
914 *
915 * nrhs * 2 N^2 flops for LU solve.
916 *
917  nops = nops + 2.0d+0*(dble(n)**2)*dble(nrhs)
918 *
919 * Calculate total megaflops -- factorization
920 * and solve -- for WALL and CPU time, and print
921 * output
922 *
923 * Print WALL time if machine supports it
924 *
925  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
926  $ THEN
927  tmflops = nops /
928  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
929  ELSE
930  tmflops = 0.0d+0
931  END IF
932 *
933 * Print WALL time if supported
934 *
935  IF( wtime( 2 ).GE.0.0d+0 )
936  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
937  $ nb, nrhs, nbrhs, nprow, npcol,
938  $ wtime( 1 ), wtime( 2 ), tmflops,
939  $ passed
940 *
941 * Print CPU time if supported
942 *
943  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
944  $ THEN
945  tmflops = nops /
946  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
947  ELSE
948  tmflops = 0.0d+0
949  END IF
950 *
951  IF( ctime( 2 ).GE.0.0d+0 )
952  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
953  $ nb, nrhs, nbrhs, nprow, npcol,
954  $ ctime( 1 ), ctime( 2 ), tmflops,
955  $ passed
956  END IF
957  10 CONTINUE
958  20 CONTINUE
959 *
960  IF( check.AND.( sresid.GT.thresh ) ) THEN
961 *
962 * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
963 *
964  CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,
965  $ mem( ippiv ), mem( ipw ) )
966  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
967  $ 1, desca, iaseed, anorm, fresid,
968  $ mem( ipw ) )
969 *
970 * Check for memory overwrite
971 *
972  CALL pdchekpad( ictxt, 'PDGETRRV', np, nq,
973  $ mem( ipa-iprepad ), desca( lld_ ),
974  $ iprepad, ipostpad, padval )
975  CALL pdchekpad( ictxt, 'PDGETRRV', lipiv,
976  $ 1, mem( ippiv-iprepad ), lipiv,
977  $ iprepad, ipostpad, padval )
978  CALL pdchekpad( ictxt, 'PDGETRRV',
979  $ worksiz-ipostpad, 1,
980  $ mem( ipw-iprepad ),
981  $ worksiz-ipostpad, iprepad,
982  $ ipostpad, padval )
983 *
984  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
985  $ WRITE( nout, fmt = 9986 ) fresid
986  END IF
987  END IF
988  30 CONTINUE
989  40 CONTINUE
990  CALL blacs_gridexit( ictxt )
991  50 CONTINUE
992 *
993 * Print ending messages and close output file
994 *
995  60 CONTINUE
996  IF( iam.EQ.0 ) THEN
997  ktests = kpass + kfail + kskip
998  WRITE( nout, fmt = * )
999  WRITE( nout, fmt = 9992 ) ktests
1000  IF( check ) THEN
1001  WRITE( nout, fmt = 9991 ) kpass
1002  WRITE( nout, fmt = 9989 ) kfail
1003  ELSE
1004  WRITE( nout, fmt = 9990 ) kpass
1005  END IF
1006  WRITE( nout, fmt = 9988 ) kskip
1007  WRITE( nout, fmt = * )
1008  WRITE( nout, fmt = * )
1009  WRITE( nout, fmt = 9987 )
1010  IF( nout.NE.6 .AND. nout.NE.0 )
1011  $ CLOSE( nout )
1012  END IF
1013 *
1014  CALL blacs_exit( 0 )
1015 *
1016  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1017  $ '; It should be at least 1' )
1018  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1019  $ i4 )
1020  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1021  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1022  $ i11 )
1023  9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1024  $ 'Sol Time MFLOPS CHECK' )
1025  9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1026  $ '-------- -------- ------' )
1027  9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1028  $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1029  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1030  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1031  9990 FORMAT( i5, ' tests completed without checking.' )
1032  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1033  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1034  9987 FORMAT( 'END OF TESTS.' )
1035  9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1036  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1037 *
1038  stop
1039 *
1040 * End of PDLUDRIVER
1041 *
1042  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pdgerfs
subroutine pdgerfs(TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pdgerfs.f:5
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pdlaschk
subroutine pdlaschk(SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK)
Definition: pdlaschk.f:4
pdgecon
subroutine pdgecon(NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pdgecon.f:3
pdgetrs
subroutine pdgetrs(TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO)
Definition: pdgetrs.f:3
pdludriver
program pdludriver
Definition: pdludriver.f:1
pdgetrf
subroutine pdgetrf(M, N, A, IA, JA, DESCA, IPIV, INFO)
Definition: pdgetrf.f:2
pdgetrrv
subroutine pdgetrrv(M, N, A, IA, JA, DESCA, IPIV, WORK)
Definition: pdgetrrv.f:2
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
pdlafchk
subroutine pdlafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pdlafchk.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
pdluinfo
subroutine pdluinfo(SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, EST, WORK, IAM, NPROCS)
Definition: pdluinfo.f:5
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2