SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psludriver.f
Go to the documentation of this file.
1 PROGRAM psludriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* ========
10*
11* PSLUDRIVER is the main test program for the REAL
12* SCALAPACK LU routines. This test driver performs an LU factorization
13* and solve. If the input matrix is non-square, only the factorization
14* is performed. Condition estimation and iterative refinement are
15* optionally performed.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 18 lines:
20* 'SCALAPACK, Version 2.0, LU factorization input file'
21* 'Intel iPSC/860 hypercube, gamma model.'
22* 'LU.out' output file name (if any)
23* 6 device out
24* 1 number of problems sizes
25* 31 201 values of M
26* 31 201 values of N
27* 1 number of NB's
28* 2 values of NB
29* 1 number of NRHS's
30* 1 values of NRHS
31* 1 number of NBRHS's
32* 1 values of NBRHS
33* 1 number of process grids (ordered pairs of P & Q)
34* 2 1 4 2 3 8 values of P
35* 2 4 1 3 2 1 values of Q
36* 1.0 threshold
37* T (T or F) Test Cond. Est. and Iter. Ref. Routines
38*
39*
40* Internal Parameters
41* ===================
42*
43* TOTMEM INTEGER, default = 2000000
44* TOTMEM is a machine-specific parameter indicating the
45* maximum amount of available memory in bytes.
46* The user should customize TOTMEM to his platform. Remember
47* to leave room in memory for the operating system, the BLACS
48* buffer, etc. For example, on a system with 8 MB of memory
49* per process (e.g., one processor on an Intel iPSC/860), the
50* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51* code, BLACS buffer, etc). However, for PVM, we usually set
52* TOTMEM = 2000000. Some experimenting with the maximum value
53* of TOTMEM may be required.
54*
55* INTGSZ INTEGER, default = 4 bytes.
56* REALSZ INTEGER, default = 4 bytes.
57* INTGSZ and REALSZ indicate the length in bytes on the
58* given platform for an integer and a single precision real.
59* MEM REAL array, dimension ( TOTMEM / REALSZ )
60*
61* All arrays used by SCALAPACK routines are allocated from
62* this array and referenced by pointers. The integer IPA,
63* for example, is a pointer to the starting element of MEM for
64* the matrix A.
65*
66* =====================================================================
67*
68* .. Parameters ..
69 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70 $ lld_, mb_, m_, nb_, n_, rsrc_
71 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74 INTEGER intgsz, memsiz, ntests, realsz, totmem
75 REAL padval, zero
76 parameter( intgsz = 4, realsz = 4, totmem = 2000000,
77 $ memsiz = totmem / realsz, ntests = 20,
78 $ padval = -9923.0e+0, zero = 0.0e+0 )
79* ..
80* .. Local Scalars ..
81 LOGICAL check, est
82 CHARACTER*6 passed
83 CHARACTER*80 outfile
84 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88 $ lipiv, liwork, lwork, lw2, m, maxmn,
89 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90 $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91 $ nprocs, nprow, nq, nrhs, worksiz
92 REAL anorm, anorm1, fresid, rcond, sresid, sresid2,
93 $ thresh
94 DOUBLE PRECISION nops, tmflops
95* ..
96* .. Local Arrays ..
97 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98 $ mval( ntests ), nbrval( ntests ),
99 $ nbval( ntests ), nrval( ntests ),
100 $ nval( ntests ), pval( ntests ),
101 $ qval( ntests )
102 REAL mem( memsiz )
103 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
104* ..
105* .. External Subroutines ..
106 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
107 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
108 $ blacs_pinfo, descinit, igsum2d, pschekpad,
113* ..
114* .. External Functions ..
115 INTEGER iceil, ilcm, numroc
116 REAL pslange
117 EXTERNAL iceil, ilcm, numroc, pslange
118* ..
119* .. Intrinsic Functions ..
120 INTRINSIC dble, max, min
121* ..
122* .. Data Statements ..
123 DATA kfail, kpass, kskip, ktests / 4*0 /
124* ..
125* .. Executable Statements ..
126*
127* Get starting information
128*
129 CALL blacs_pinfo( iam, nprocs )
130 iaseed = 100
131 ibseed = 200
132 CALL psluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
133 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
134 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
135 $ est, mem, iam, nprocs )
136 check = ( thresh.GE.0.0e+0 )
137*
138* Print headings
139*
140 IF( iam.EQ.0 ) THEN
141 WRITE( nout, fmt = * )
142 WRITE( nout, fmt = 9995 )
143 WRITE( nout, fmt = 9994 )
144 WRITE( nout, fmt = * )
145 END IF
146*
147* Loop over different process grids
148*
149 DO 50 i = 1, ngrids
150*
151 nprow = pval( i )
152 npcol = qval( i )
153*
154* Make sure grid information is correct
155*
156 ierr( 1 ) = 0
157 IF( nprow.LT.1 ) THEN
158 IF( iam.EQ.0 )
159 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
160 ierr( 1 ) = 1
161 ELSE IF( npcol.LT.1 ) THEN
162 IF( iam.EQ.0 )
163 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
164 ierr( 1 ) = 1
165 ELSE IF( nprow*npcol.GT.nprocs ) THEN
166 IF( iam.EQ.0 )
167 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
168 ierr( 1 ) = 1
169 END IF
170*
171 IF( ierr( 1 ).GT.0 ) THEN
172 IF( iam.EQ.0 )
173 $ WRITE( nout, fmt = 9997 ) 'grid'
174 kskip = kskip + 1
175 GO TO 50
176 END IF
177*
178* Define process grid
179*
180 CALL blacs_get( -1, 0, ictxt )
181 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
182 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
183*
184* Go to bottom of process grid loop if this case doesn't use my
185* process
186*
187 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
188 $ GO TO 50
189*
190 DO 40 j = 1, nmat
191*
192 m = mval( j )
193 n = nval( j )
194*
195* Make sure matrix information is correct
196*
197 ierr( 1 ) = 0
198 IF( m.LT.1 ) THEN
199 IF( iam.EQ.0 )
200 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
201 ierr( 1 ) = 1
202 ELSE IF( n.LT.1 ) THEN
203 IF( iam.EQ.0 )
204 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
205 ierr( 1 ) = 1
206 END IF
207*
208* Check all processes for an error
209*
210 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
211*
212 IF( ierr( 1 ).GT.0 ) THEN
213 IF( iam.EQ.0 )
214 $ WRITE( nout, fmt = 9997 ) 'matrix'
215 kskip = kskip + 1
216 GO TO 40
217 END IF
218*
219 DO 30 k = 1, nnb
220*
221 nb = nbval( k )
222*
223* Make sure nb is legal
224*
225 ierr( 1 ) = 0
226 IF( nb.LT.1 ) THEN
227 ierr( 1 ) = 1
228 IF( iam.EQ.0 )
229 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
230 END IF
231*
232* Check all processes for an error
233*
234 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
235*
236 IF( ierr( 1 ).GT.0 ) THEN
237 IF( iam.EQ.0 )
238 $ WRITE( nout, fmt = 9997 ) 'NB'
239 kskip = kskip + 1
240 GO TO 30
241 END IF
242*
243* Padding constants
244*
245 mp = numroc( m, nb, myrow, 0, nprow )
246 np = numroc( n, nb, myrow, 0, nprow )
247 nq = numroc( n, nb, mycol, 0, npcol )
248 IF( check ) THEN
249 iprepad = max( nb, mp )
250 imidpad = nb
251 ipostpad = max( nb, nq )
252 ELSE
253 iprepad = 0
254 imidpad = 0
255 ipostpad = 0
256 END IF
257*
258* Initialize the array descriptor for the matrix A
259*
260 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
261 $ max( 1, mp )+imidpad, ierr( 1 ) )
262*
263* Check all processes for an error
264*
265 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
266*
267 IF( ierr( 1 ).LT.0 ) THEN
268 IF( iam.EQ.0 )
269 $ WRITE( nout, fmt = 9997 ) 'descriptor'
270 kskip = kskip + 1
271 GO TO 30
272 END IF
273*
274* Assign pointers into MEM for SCALAPACK arrays, A is
275* allocated starting at position MEM( IPREPAD+1 )
276*
277 ipa = iprepad+1
278 IF( est .AND. m.EQ.n ) THEN
279 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
280 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
281 ELSE
282 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
283 END IF
284 lipiv = iceil( intgsz*( mp+nb ), realsz )
285 ipw = ippiv + lipiv + ipostpad + iprepad
286*
287 IF( check ) THEN
288*
289* Calculate the amount of workspace required by the
290* checking routines PSLANGE, PSGETRRV, and
291* PSLAFCHK
292*
293 worksiz = max( 2, nq )
294*
295 worksiz = max( worksiz, mp*desca( nb_ )+
296 $ nq*desca( mb_ ) )
297*
298 worksiz = max( worksiz, mp * desca( nb_ ) )
299*
300 worksiz = worksiz + ipostpad
301*
302 ELSE
303*
304 worksiz = ipostpad
305*
306 END IF
307*
308* Check for adequate memory for problem size
309*
310 ierr( 1 ) = 0
311 IF( ipw+worksiz.GT.memsiz ) THEN
312 IF( iam.EQ.0 )
313 $ WRITE( nout, fmt = 9996 ) 'factorization',
314 $ ( ipw+worksiz )*realsz
315 ierr( 1 ) = 1
316 END IF
317*
318* Check all processes for an error
319*
320 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
321*
322 IF( ierr( 1 ).GT.0 ) THEN
323 IF( iam.EQ.0 )
324 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
325 kskip = kskip + 1
326 GO TO 30
327 END IF
328*
329* Generate matrix A of Ax = b
330*
331 CALL psmatgen( ictxt, 'No transpose', 'No transpose',
332 $ desca( m_ ), desca( n_ ), desca( mb_ ),
333 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
334 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
335 $ mp, 0, nq, myrow, mycol, nprow, npcol )
336*
337* Calculate inf-norm of A for residual error-checking
338*
339 IF( check ) THEN
340 CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
341 $ desca( lld_ ), iprepad, ipostpad,
342 $ padval )
343 CALL psfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
344 $ lipiv, iprepad, ipostpad, padval )
345 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
346 $ mem( ipw-iprepad ), worksiz-ipostpad,
347 $ iprepad, ipostpad, padval )
348 anorm = pslange( 'I', m, n, mem( ipa ), 1, 1, desca,
349 $ mem( ipw ) )
350 anorm1 = pslange( '1', m, n, mem( ipa ), 1, 1, desca,
351 $ mem( ipw ) )
352 CALL pschekpad( ictxt, 'PSLANGE', mp, nq,
353 $ mem( ipa-iprepad ), desca( lld_ ),
354 $ iprepad, ipostpad, padval )
355 CALL pschekpad( ictxt, 'PSLANGE', worksiz-ipostpad,
356 $ 1, mem( ipw-iprepad ),
357 $ worksiz-ipostpad, iprepad, ipostpad,
358 $ padval )
359 END IF
360*
361 IF( est .AND. m.EQ.n ) THEN
362 CALL psmatgen( ictxt, 'No transpose', 'No transpose',
363 $ desca( m_ ), desca( n_ ), desca( mb_ ),
364 $ desca( nb_ ), mem( ipa0 ),
365 $ desca( lld_ ), desca( rsrc_ ),
366 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
367 $ myrow, mycol, nprow, npcol )
368 IF( check )
369 $ CALL psfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
370 $ desca( lld_ ), iprepad, ipostpad,
371 $ padval )
372 END IF
373*
374 CALL slboot()
375 CALL blacs_barrier( ictxt, 'All' )
376 CALL sltimer( 1 )
377*
378* Perform LU factorization
379*
380 CALL psgetrf( m, n, mem( ipa ), 1, 1, desca,
381 $ mem( ippiv ), info )
382*
383 CALL sltimer( 1 )
384*
385 IF( info.NE.0 ) THEN
386 IF( iam.EQ.0 )
387 $ WRITE( nout, fmt = * ) 'PSGETRF INFO=', info
388 kfail = kfail + 1
389 rcond = zero
390 GO TO 30
391 END IF
392*
393 IF( check ) THEN
394*
395* Check for memory overwrite in LU factorization
396*
397 CALL pschekpad( ictxt, 'PSGETRF', mp, nq,
398 $ mem( ipa-iprepad ), desca( lld_ ),
399 $ iprepad, ipostpad, padval )
400 CALL pschekpad( ictxt, 'PSGETRF', lipiv, 1,
401 $ mem( ippiv-iprepad ), lipiv, iprepad,
402 $ ipostpad, padval )
403 END IF
404*
405 IF( m.NE.n ) THEN
406*
407* For non-square matrices, factorization only
408*
409 nrhs = 0
410 nbrhs = 0
411*
412 IF( check ) THEN
413*
414* Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
415*
416 CALL psgetrrv( m, n, mem( ipa ), 1, 1, desca,
417 $ mem( ippiv ), mem( ipw ) )
418 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
419 $ desca, iaseed, anorm, fresid,
420 $ mem( ipw ) )
421*
422* Check for memory overwrite
423*
424 CALL pschekpad( ictxt, 'PSGETRRV', mp, nq,
425 $ mem( ipa-iprepad ), desca( lld_ ),
426 $ iprepad, ipostpad, padval )
427 CALL pschekpad( ictxt, 'PSGETRRV', lipiv, 1,
428 $ mem( ippiv-iprepad ), lipiv,
429 $ iprepad, ipostpad, padval )
430 CALL pschekpad( ictxt, 'PSGETRRV',
431 $ worksiz-ipostpad, 1,
432 $ mem( ipw-iprepad ),
433 $ worksiz-ipostpad, iprepad,
434 $ ipostpad, padval )
435*
436* Test residual and detect NaN result
437*
438 IF( ( fresid.LE.thresh ) .AND.
439 $ ( (fresid-fresid).EQ.0.0e+0 ) ) THEN
440 kpass = kpass + 1
441 passed = 'PASSED'
442 ELSE
443 kfail = kfail + 1
444 passed = 'FAILED'
445 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
446 $ WRITE( nout, fmt = 9986 ) fresid
447 END IF
448*
449 ELSE
450*
451* Don't perform the checking, only timing
452*
453 kpass = kpass + 1
454 fresid = fresid - fresid
455 passed = 'BYPASS'
456*
457 END IF
458*
459* Gather maximum of all CPU and WALL clock timings
460*
461 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
462 $ wtime )
463 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
464 $ ctime )
465*
466* Print results
467*
468 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
469*
470 maxmn = max( m, n )
471 minmn = min( m, n )
472*
473* M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU
474* factorization when M >= N
475*
476 nops = dble( maxmn )*( dble( minmn )**2 ) -
477 $ (1.0d+0 / 3.0d+0)*( dble( minmn )**3 ) -
478 $ (1.0d+0 / 2.0d+0)*( dble( minmn )**2 )
479*
480* Calculate total megaflops -- factorization only,
481* -- for WALL and CPU time, and print output
482*
483* Print WALL time if machine supports it
484*
485 IF( wtime( 1 ).GT.0.0d+0 ) THEN
486 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
487 ELSE
488 tmflops = 0.0d+0
489 END IF
490*
491 wtime( 2 ) = 0.0d+0
492 IF( wtime( 1 ).GE.0.0d+0 )
493 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
494 $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
495 $ wtime( 2 ), tmflops, passed
496*
497* Print CPU time if machine supports it
498*
499 IF( ctime( 1 ).GT.0.0d+0 ) THEN
500 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
501 ELSE
502 tmflops = 0.0d+0
503 END IF
504*
505 ctime( 2 ) = 0.0d+0
506 IF( ctime( 1 ).GE.0.0d+0 )
507 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
508 $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
509 $ ctime( 2 ), tmflops, passed
510 END IF
511*
512 ELSE
513*
514* If M = N
515*
516 IF( est ) THEN
517*
518* Calculate workspace required for PSGECON
519*
520 lwork = max( 1, 2*np ) + max( 1, 2*nq ) +
521 $ max( 2, desca( nb_ )*
522 $ max( 1, iceil( nprow-1, npcol ) ),
523 $ nq + desca( nb_ )*
524 $ max( 1, iceil( npcol-1, nprow ) ) )
525 ipw2 = ipw + lwork + ipostpad + iprepad
526 liwork = max( 1, np )
527 lw2 = iceil( liwork*intgsz, realsz ) + ipostpad
528*
529 ierr( 1 ) = 0
530 IF( ipw2+lw2.GT.memsiz ) THEN
531 IF( iam.EQ.0 )
532 $ WRITE( nout, fmt = 9996 )'cond est',
533 $ ( ipw2+lw2 )*realsz
534 ierr( 1 ) = 1
535 END IF
536*
537* Check all processes for an error
538*
539 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
540 $ -1, 0 )
541*
542 IF( ierr( 1 ).GT.0 ) THEN
543 IF( iam.EQ.0 )
544 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
545 kskip = kskip + 1
546 GO TO 30
547 END IF
548*
549 IF( check ) THEN
550 CALL psfillpad( ictxt, lwork, 1,
551 $ mem( ipw-iprepad ), lwork,
552 $ iprepad, ipostpad, padval )
553 CALL psfillpad( ictxt, lw2-ipostpad, 1,
554 $ mem( ipw2-iprepad ),
555 $ lw2-ipostpad, iprepad,
556 $ ipostpad, padval )
557 END IF
558*
559* Compute condition number of the matrix
560*
561 CALL psgecon( '1', n, mem( ipa ), 1, 1, desca,
562 $ anorm1, rcond, mem( ipw ), lwork,
563 $ mem( ipw2 ), liwork, info )
564*
565 IF( check ) THEN
566 CALL pschekpad( ictxt, 'PSGECON', np, nq,
567 $ mem( ipa-iprepad ),
568 $ desca( lld_ ), iprepad,
569 $ ipostpad, padval )
570 CALL pschekpad( ictxt, 'PSGECON', lwork, 1,
571 $ mem( ipw-iprepad ), lwork,
572 $ iprepad, ipostpad, padval )
573 CALL pschekpad( ictxt, 'PSGECON',
574 $ lw2-ipostpad, 1,
575 $ mem( ipw2-iprepad ),
576 $ lw2-ipostpad, iprepad,
577 $ ipostpad, padval )
578 END IF
579 END IF
580*
581* Loop over the different values for NRHS
582*
583 DO 20 hh = 1, nnr
584*
585 nrhs = nrval( hh )
586*
587 DO 10 kk = 1, nnbr
588*
589 nbrhs = nbrval( kk )
590*
591* Initialize Array Descriptor for rhs
592*
593 CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
594 $ ictxt, max( 1, np )+imidpad,
595 $ ierr( 1 ) )
596*
597* Check all processes for an error
598*
599 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
600 $ -1, 0 )
601*
602 IF( ierr( 1 ).LT.0 ) THEN
603 IF( iam.EQ.0 )
604 $ WRITE( nout, fmt = 9997 ) 'descriptor'
605 kskip = kskip + 1
606 GO TO 10
607 END IF
608*
609* move IPW to allow room for RHS
610*
611 myrhs = numroc( descb( n_ ), descb( nb_ ),
612 $ mycol, descb( csrc_ ), npcol )
613 ipb = ipw
614*
615 IF( est ) THEN
616 ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
617 $ iprepad
618 ipferr = ipb0 + descb( lld_ )*myrhs +
619 $ ipostpad + iprepad
620 ipberr = myrhs + ipferr + ipostpad + iprepad
621 ipw = myrhs + ipberr + ipostpad + iprepad
622 ELSE
623 ipw = ipb + descb( lld_ )*myrhs + ipostpad +
624 $ iprepad
625 END IF
626*
627* Set worksiz: routines requiring most workspace
628* is PSLASCHK
629*
630 IF( check ) THEN
631 lcm = ilcm( nprow, npcol )
632 lcmq = lcm / npcol
633 worksiz = max( worksiz-ipostpad,
634 $ nq * nbrhs + np * nbrhs +
635 $ max( max( nq*nb, 2*nbrhs ),
636 $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
637 $ 0,0,lcmq ) ) )
638 worksiz = ipostpad + worksiz
639 ELSE
640 worksiz = ipostpad
641 END IF
642*
643 ierr( 1 ) = 0
644 IF( ipw+worksiz.GT.memsiz ) THEN
645 IF( iam.EQ.0 )
646 $ WRITE( nout, fmt = 9996 )'solve',
647 $ ( ipw+worksiz )*realsz
648 ierr( 1 ) = 1
649 END IF
650*
651* Check all processes for an error
652*
653 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
654 $ -1, 0 )
655*
656 IF( ierr( 1 ).GT.0 ) THEN
657 IF( iam.EQ.0 )
658 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
659 kskip = kskip + 1
660 GO TO 10
661 END IF
662*
663* Generate RHS
664*
665 CALL psmatgen( ictxt, 'No', 'No', descb( m_ ),
666 $ descb( n_ ), descb( mb_ ),
667 $ descb( nb_ ), mem( ipb ),
668 $ descb( lld_ ), descb( rsrc_ ),
669 $ descb( csrc_ ), ibseed, 0, np, 0,
670 $ myrhs, myrow, mycol, nprow,
671 $ npcol )
672*
673 IF( check )
674 $ CALL psfillpad( ictxt, np, myrhs,
675 $ mem( ipb-iprepad ),
676 $ descb( lld_ ), iprepad,
677 $ ipostpad, padval )
678*
679 IF( est ) THEN
680 CALL psmatgen( ictxt, 'No', 'No',
681 $ descb( m_ ), descb( n_ ),
682 $ descb( mb_ ), descb( nb_ ),
683 $ mem( ipb0 ), descb( lld_ ),
684 $ descb( rsrc_ ),
685 $ descb( csrc_ ), ibseed, 0, np,
686 $ 0, myrhs, myrow, mycol, nprow,
687 $ npcol )
688 IF( check ) THEN
689 CALL psfillpad( ictxt, np, myrhs,
690 $ mem( ipb0-iprepad ),
691 $ descb( lld_ ), iprepad,
692 $ ipostpad, padval )
693 CALL psfillpad( ictxt, 1, myrhs,
694 $ mem( ipferr-iprepad ), 1,
695 $ iprepad, ipostpad,
696 $ padval )
697 CALL psfillpad( ictxt, 1, myrhs,
698 $ mem( ipberr-iprepad ), 1,
699 $ iprepad, ipostpad,
700 $ padval )
701 END IF
702 END IF
703*
704 CALL blacs_barrier( ictxt, 'All' )
705 CALL sltimer( 2 )
706*
707* Solve linear sytem via LU factorization
708*
709 CALL psgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
710 $ desca, mem( ippiv ), mem( ipb ),
711 $ 1, 1, descb, info )
712*
713 CALL sltimer( 2 )
714*
715 IF( check ) THEN
716*
717* check for memory overwrite
718*
719 CALL pschekpad( ictxt, 'PSGETRS', np, nq,
720 $ mem( ipa-iprepad ),
721 $ desca( lld_ ), iprepad,
722 $ ipostpad, padval )
723 CALL pschekpad( ictxt, 'PSGETRS', lipiv, 1,
724 $ mem( ippiv-iprepad ), lipiv,
725 $ iprepad, ipostpad, padval )
726 CALL pschekpad( ictxt, 'PSGETRS', np,
727 $ myrhs, mem( ipb-iprepad ),
728 $ descb( lld_ ), iprepad,
729 $ ipostpad, padval )
730*
731 CALL psfillpad( ictxt, worksiz-ipostpad,
732 $ 1, mem( ipw-iprepad ),
733 $ worksiz-ipostpad, iprepad,
734 $ ipostpad, padval )
735*
736* check the solution to rhs
737*
738 CALL pslaschk( 'No', 'N', n, nrhs,
739 $ mem( ipb ), 1, 1, descb,
740 $ iaseed, 1, 1, desca, ibseed,
741 $ anorm, sresid, mem( ipw ) )
742*
743 IF( iam.EQ.0 .AND. sresid.GT.thresh )
744 $ WRITE( nout, fmt = 9985 ) sresid
745*
746* check for memory overwrite
747*
748 CALL pschekpad( ictxt, 'PSLASCHK', np,
749 $ myrhs, mem( ipb-iprepad ),
750 $ descb( lld_ ), iprepad,
751 $ ipostpad, padval )
752 CALL pschekpad( ictxt, 'PSLASCHK',
753 $ worksiz-ipostpad, 1,
754 $ mem( ipw-iprepad ),
755 $ worksiz-ipostpad,
756 $ iprepad, ipostpad, padval )
757*
758* The second test is a NaN trap
759*
760 IF( sresid.LE.thresh .AND.
761 $ ( sresid-sresid ).EQ.0.0e+0 ) THEN
762 kpass = kpass + 1
763 passed = 'PASSED'
764 ELSE
765 kfail = kfail + 1
766 passed = 'FAILED'
767 END IF
768 ELSE
769 kpass = kpass + 1
770 sresid = sresid - sresid
771 passed = 'BYPASS'
772 END IF
773*
774 IF( est ) THEN
775*
776* Calculate workspace required for PSGERFS
777*
778 lwork = max( 1, 3*np )
779 ipw2 = ipw + lwork + ipostpad + iprepad
780 liwork = max( 1, np )
781 lw2 = iceil( liwork*intgsz, realsz ) +
782 $ ipostpad
783*
784 ierr( 1 ) = 0
785 IF( ipw2+lw2.GT.memsiz ) THEN
786 IF( iam.EQ.0 )
787 $ WRITE( nout, fmt = 9996 )
788 $ 'iter ref', ( ipw2+lw2 )*realsz
789 ierr( 1 ) = 1
790 END IF
791*
792* Check all processes for an error
793*
794 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
795 $ ierr, 1, -1, 0 )
796*
797 IF( ierr( 1 ).GT.0 ) THEN
798 IF( iam.EQ.0 )
799 $ WRITE( nout, fmt = 9997 )
800 $ 'MEMORY'
801 kskip = kskip + 1
802 GO TO 10
803 END IF
804*
805 IF( check ) THEN
806 CALL psfillpad( ictxt, lwork, 1,
807 $ mem( ipw-iprepad ),
808 $ lwork, iprepad, ipostpad,
809 $ padval )
810 CALL psfillpad( ictxt, lw2-ipostpad, 1,
811 $ mem( ipw2-iprepad ),
812 $ lw2-ipostpad, iprepad,
813 $ ipostpad, padval )
814 END IF
815*
816* Use iterative refinement to improve the
817* computed solution
818*
819 CALL psgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
820 $ 1, desca, mem( ipa ), 1, 1,
821 $ desca, mem( ippiv ),
822 $ mem( ipb0 ), 1, 1, descb,
823 $ mem( ipb ), 1, 1, descb,
824 $ mem( ipferr ), mem( ipberr ),
825 $ mem( ipw ), lwork, mem( ipw2 ),
826 $ liwork, info )
827*
828 IF( check ) THEN
829 CALL pschekpad( ictxt, 'PSGERFS', np,
830 $ nq, mem( ipa0-iprepad ),
831 $ desca( lld_ ), iprepad,
832 $ ipostpad, padval )
833 CALL pschekpad( ictxt, 'PSGERFS', np,
834 $ nq, mem( ipa-iprepad ),
835 $ desca( lld_ ), iprepad,
836 $ ipostpad, padval )
837 CALL pschekpad( ictxt, 'PSGERFS', lipiv,
838 $ 1, mem( ippiv-iprepad ),
839 $ lipiv, iprepad,
840 $ ipostpad, padval )
841 CALL pschekpad( ictxt, 'PSGERFS', np,
842 $ myrhs, mem( ipb-iprepad ),
843 $ descb( lld_ ), iprepad,
844 $ ipostpad, padval )
845 CALL pschekpad( ictxt, 'PSGERFS', np,
846 $ myrhs,
847 $ mem( ipb0-iprepad ),
848 $ descb( lld_ ), iprepad,
849 $ ipostpad, padval )
850 CALL pschekpad( ictxt, 'PSGERFS', 1,
851 $ myrhs,
852 $ mem( ipferr-iprepad ), 1,
853 $ iprepad, ipostpad,
854 $ padval )
855 CALL pschekpad( ictxt, 'PSGERFS', 1,
856 $ myrhs,
857 $ mem( ipberr-iprepad ), 1,
858 $ iprepad, ipostpad,
859 $ padval )
860 CALL pschekpad( ictxt, 'PSGERFS', lwork,
861 $ 1, mem( ipw-iprepad ),
862 $ lwork, iprepad, ipostpad,
863 $ padval )
864 CALL pschekpad( ictxt, 'PSGERFS',
865 $ lw2-ipostpad, 1,
866 $ mem( ipw2-iprepad ),
867 $ lw2-ipostpad, iprepad,
868 $ ipostpad, padval )
869*
870 CALL psfillpad( ictxt, worksiz-ipostpad,
871 $ 1, mem( ipw-iprepad ),
872 $ worksiz-ipostpad, iprepad,
873 $ ipostpad, padval )
874*
875* check the solution to rhs
876*
877 CALL pslaschk( 'No', 'N', n, nrhs,
878 $ mem( ipb ), 1, 1, descb,
879 $ iaseed, 1, 1, desca,
880 $ ibseed, anorm, sresid2,
881 $ mem( ipw ) )
882*
883 IF( iam.EQ.0 .AND. sresid2.GT.thresh )
884 $ WRITE( nout, fmt = 9985 ) sresid2
885*
886* check for memory overwrite
887*
888 CALL pschekpad( ictxt, 'PSLASCHK', np,
889 $ myrhs, mem( ipb-iprepad ),
890 $ descb( lld_ ), iprepad,
891 $ ipostpad, padval )
892 CALL pschekpad( ictxt, 'PSLASCHK',
893 $ worksiz-ipostpad, 1,
894 $ mem( ipw-iprepad ),
895 $ worksiz-ipostpad, iprepad,
896 $ ipostpad, padval )
897 END IF
898 END IF
899*
900* Gather max. of all CPU and WALL clock timings
901*
902 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
903 $ wtime )
904 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
905 $ ctime )
906*
907* Print results
908*
909 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
910*
911* 2/3 N^3 - 1/2 N^2 flops for LU factorization
912*
913 nops = (2.0d+0/3.0d+0)*( dble(n)**3 ) -
914 $ (1.0d+0/2.0d+0)*( dble(n)**2 )
915*
916* nrhs * 2 N^2 flops for LU solve.
917*
918 nops = nops + 2.0d+0*(dble(n)**2)*dble(nrhs)
919*
920* Calculate total megaflops -- factorization
921* and solve -- for WALL and CPU time, and print
922* output
923*
924* Print WALL time if machine supports it
925*
926 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
927 $ THEN
928 tmflops = nops /
929 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
930 ELSE
931 tmflops = 0.0d+0
932 END IF
933*
934* Print WALL time if supported
935*
936 IF( wtime( 2 ).GE.0.0d+0 )
937 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
938 $ nb, nrhs, nbrhs, nprow, npcol,
939 $ wtime( 1 ), wtime( 2 ), tmflops,
940 $ passed
941*
942* Print CPU time if supported
943*
944 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
945 $ THEN
946 tmflops = nops /
947 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
948 ELSE
949 tmflops = 0.0d+0
950 END IF
951*
952 IF( ctime( 2 ).GE.0.0d+0 )
953 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
954 $ nb, nrhs, nbrhs, nprow, npcol,
955 $ ctime( 1 ), ctime( 2 ), tmflops,
956 $ passed
957 END IF
958 10 CONTINUE
959 20 CONTINUE
960*
961 IF( check.AND.( sresid.GT.thresh ) ) THEN
962*
963* Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
964*
965 CALL psgetrrv( m, n, mem( ipa ), 1, 1, desca,
966 $ mem( ippiv ), mem( ipw ) )
967 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
968 $ 1, desca, iaseed, anorm, fresid,
969 $ mem( ipw ) )
970*
971* Check for memory overwrite
972*
973 CALL pschekpad( ictxt, 'PSGETRRV', np, nq,
974 $ mem( ipa-iprepad ), desca( lld_ ),
975 $ iprepad, ipostpad, padval )
976 CALL pschekpad( ictxt, 'PSGETRRV', lipiv,
977 $ 1, mem( ippiv-iprepad ), lipiv,
978 $ iprepad, ipostpad, padval )
979 CALL pschekpad( ictxt, 'PSGETRRV',
980 $ worksiz-ipostpad, 1,
981 $ mem( ipw-iprepad ),
982 $ worksiz-ipostpad, iprepad,
983 $ ipostpad, padval )
984*
985 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
986 $ WRITE( nout, fmt = 9986 ) fresid
987 END IF
988 END IF
989 30 CONTINUE
990 40 CONTINUE
991 CALL blacs_gridexit( ictxt )
992 50 CONTINUE
993*
994* Print ending messages and close output file
995*
996 60 CONTINUE
997 IF( iam.EQ.0 ) THEN
998 ktests = kpass + kfail + kskip
999 WRITE( nout, fmt = * )
1000 WRITE( nout, fmt = 9992 ) ktests
1001 IF( check ) THEN
1002 WRITE( nout, fmt = 9991 ) kpass
1003 WRITE( nout, fmt = 9989 ) kfail
1004 ELSE
1005 WRITE( nout, fmt = 9990 ) kpass
1006 END IF
1007 WRITE( nout, fmt = 9988 ) kskip
1008 WRITE( nout, fmt = * )
1009 WRITE( nout, fmt = * )
1010 WRITE( nout, fmt = 9987 )
1011 IF( nout.NE.6 .AND. nout.NE.0 )
1012 $ CLOSE( nout )
1013 END IF
1014*
1015 CALL blacs_exit( 0 )
1016*
1017 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1018 $ '; It should be at least 1' )
1019 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1020 $ i4 )
1021 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1022 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1023 $ i11 )
1024 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1025 $ 'Sol Time MFLOPS CHECK' )
1026 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1027 $ '-------- -------- ------' )
1028 9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1029 $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1030 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1031 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1032 9990 FORMAT( i5, ' tests completed without checking.' )
1033 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1034 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1035 9987 FORMAT( 'END OF TESTS.' )
1036 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1037 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1038*
1039 stop
1040*
1041* End of PSLUDRIVER
1042*
1043 END
subroutine pslafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pslafchk.f:3
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, iwork, liwork, info)
Definition psgecon.f:3
subroutine psgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, iwork, liwork, info)
Definition psgerfs.f:5
subroutine psgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition psgetrf.f:2
subroutine psgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition psgetrrv.f:2
subroutine psgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition psgetrs.f:3
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
subroutine pslaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pslaschk.f:4
program psludriver
Definition psludriver.f:1
subroutine psluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition psluinfo.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267