SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcludriver.f
Go to the documentation of this file.
1 PROGRAM pcludriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* ========
10*
11* PCLUDRIVER is the main test program for the COMPLEX
12* SCALAPACK LU routines. This test driver performs an LU factorization
13* and solve. If the input matrix is non-square, only the factorization
14* is performed. Condition estimation and iterative refinement are
15* optionally performed.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 18 lines:
20* 'SCALAPACK, Version 2.0, LU factorization input file'
21* 'Intel iPSC/860 hypercube, gamma model.'
22* 'LU.out' output file name (if any)
23* 6 device out
24* 1 number of problems sizes
25* 31 201 values of M
26* 31 201 values of N
27* 1 number of NB's
28* 2 values of NB
29* 1 number of NRHS's
30* 1 values of NRHS
31* 1 number of NBRHS's
32* 1 values of NBRHS
33* 1 number of process grids (ordered pairs of P & Q)
34* 2 1 4 2 3 8 values of P
35* 2 4 1 3 2 1 values of Q
36* 1.0 threshold
37* T (T or F) Test Cond. Est. and Iter. Ref. Routines
38*
39*
40* Internal Parameters
41* ===================
42*
43* TOTMEM INTEGER, default = 2000000
44* TOTMEM is a machine-specific parameter indicating the
45* maximum amount of available memory in bytes.
46* The user should customize TOTMEM to his platform. Remember
47* to leave room in memory for the operating system, the BLACS
48* buffer, etc. For example, on a system with 8 MB of memory
49* per process (e.g., one processor on an Intel iPSC/860), the
50* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51* code, BLACS buffer, etc). However, for PVM, we usually set
52* TOTMEM = 2000000. Some experimenting with the maximum value
53* of TOTMEM may be required.
54*
55* INTGSZ INTEGER, default = 4 bytes.
56* CPLXSZ INTEGER, default = 8 bytes.
57* INTGSZ and CPLXSZ indicate the length in bytes on the
58* given platform for an integer and a single precision
59* complex.
60* MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
61*
62* All arrays used by SCALAPACK routines are allocated from
63* this array and referenced by pointers. The integer IPA,
64* for example, is a pointer to the starting element of MEM for
65* the matrix A.
66*
67* =====================================================================
68*
69* .. Parameters ..
70 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
71 $ lld_, mb_, m_, nb_, n_, rsrc_
72 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
73 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
74 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
75 INTEGER cplxsz, intgsz, memsiz, ntests, realsz, totmem
76 REAL zero
77 COMPLEX padval
78 parameter( cplxsz = 8, intgsz = 4, realsz = 4,
79 $ totmem = 4000000,
80 $ memsiz = totmem / cplxsz, ntests = 20,
81 $ padval = ( -9923.0e+0, -9923.0e+0 ),
82 $ zero = 0.0e+0 )
83* ..
84* .. Local Scalars ..
85 LOGICAL check, est
86 CHARACTER*6 passed
87 CHARACTER*80 outfile
88 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
89 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
90 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
91 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
92 $ lipiv, lrwork, lwork, lw2, m, maxmn,
93 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
94 $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
95 $ nprocs, nprow, nq, nrhs, worksiz
96 REAL anorm, anorm1, fresid, rcond, sresid, sresid2,
97 $ thresh
98 DOUBLE PRECISION nops, tmflops
99* ..
100* .. Local Arrays ..
101 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
102 $ mval( ntests ), nbrval( ntests ),
103 $ nbval( ntests ), nrval( ntests ),
104 $ nval( ntests ), pval( ntests ),
105 $ qval( ntests )
106 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
107 COMPLEX mem( memsiz )
108* ..
109* .. External Subroutines ..
110 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
111 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
112 $ blacs_pinfo, descinit, igsum2d, pcchekpad,
117* ..
118* .. External Functions ..
119 INTEGER iceil, ilcm, numroc
120 REAL pclange
121 EXTERNAL iceil, ilcm, numroc, pclange
122* ..
123* .. Intrinsic Functions ..
124 INTRINSIC dble, max, min
125* ..
126* .. Data Statements ..
127 DATA kfail, kpass, kskip, ktests / 4*0 /
128* ..
129* .. Executable Statements ..
130*
131* Get starting information
132*
133 CALL blacs_pinfo( iam, nprocs )
134 iaseed = 100
135 ibseed = 200
136 CALL pcluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
137 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
138 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
139 $ est, mem, iam, nprocs )
140 check = ( thresh.GE.0.0e+0 )
141*
142* Print headings
143*
144 IF( iam.EQ.0 ) THEN
145 WRITE( nout, fmt = * )
146 WRITE( nout, fmt = 9995 )
147 WRITE( nout, fmt = 9994 )
148 WRITE( nout, fmt = * )
149 END IF
150*
151* Loop over different process grids
152*
153 DO 50 i = 1, ngrids
154*
155 nprow = pval( i )
156 npcol = qval( i )
157*
158* Make sure grid information is correct
159*
160 ierr( 1 ) = 0
161 IF( nprow.LT.1 ) THEN
162 IF( iam.EQ.0 )
163 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
164 ierr( 1 ) = 1
165 ELSE IF( npcol.LT.1 ) THEN
166 IF( iam.EQ.0 )
167 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
168 ierr( 1 ) = 1
169 ELSE IF( nprow*npcol.GT.nprocs ) THEN
170 IF( iam.EQ.0 )
171 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
172 ierr( 1 ) = 1
173 END IF
174*
175 IF( ierr( 1 ).GT.0 ) THEN
176 IF( iam.EQ.0 )
177 $ WRITE( nout, fmt = 9997 ) 'grid'
178 kskip = kskip + 1
179 GO TO 50
180 END IF
181*
182* Define process grid
183*
184 CALL blacs_get( -1, 0, ictxt )
185 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
186 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187*
188* Go to bottom of process grid loop if this case doesn't use my
189* process
190*
191 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
192 $ GO TO 50
193*
194 DO 40 j = 1, nmat
195*
196 m = mval( j )
197 n = nval( j )
198*
199* Make sure matrix information is correct
200*
201 ierr( 1 ) = 0
202 IF( m.LT.1 ) THEN
203 IF( iam.EQ.0 )
204 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
205 ierr( 1 ) = 1
206 ELSE IF( n.LT.1 ) THEN
207 IF( iam.EQ.0 )
208 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
209 ierr( 1 ) = 1
210 END IF
211*
212* Check all processes for an error
213*
214 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
215*
216 IF( ierr( 1 ).GT.0 ) THEN
217 IF( iam.EQ.0 )
218 $ WRITE( nout, fmt = 9997 ) 'matrix'
219 kskip = kskip + 1
220 GO TO 40
221 END IF
222*
223 DO 30 k = 1, nnb
224*
225 nb = nbval( k )
226*
227* Make sure nb is legal
228*
229 ierr( 1 ) = 0
230 IF( nb.LT.1 ) THEN
231 ierr( 1 ) = 1
232 IF( iam.EQ.0 )
233 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
234 END IF
235*
236* Check all processes for an error
237*
238 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
239*
240 IF( ierr( 1 ).GT.0 ) THEN
241 IF( iam.EQ.0 )
242 $ WRITE( nout, fmt = 9997 ) 'NB'
243 kskip = kskip + 1
244 GO TO 30
245 END IF
246*
247* Padding constants
248*
249 mp = numroc( m, nb, myrow, 0, nprow )
250 np = numroc( n, nb, myrow, 0, nprow )
251 nq = numroc( n, nb, mycol, 0, npcol )
252 IF( check ) THEN
253 iprepad = max( nb, mp )
254 imidpad = nb
255 ipostpad = max( nb, nq )
256 ELSE
257 iprepad = 0
258 imidpad = 0
259 ipostpad = 0
260 END IF
261*
262* Initialize the array descriptor for the matrix A
263*
264 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
265 $ max( 1, mp )+imidpad, ierr( 1 ) )
266*
267* Check all processes for an error
268*
269 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
270*
271 IF( ierr( 1 ).LT.0 ) THEN
272 IF( iam.EQ.0 )
273 $ WRITE( nout, fmt = 9997 ) 'descriptor'
274 kskip = kskip + 1
275 GO TO 30
276 END IF
277*
278* Assign pointers into MEM for SCALAPACK arrays, A is
279* allocated starting at position MEM( IPREPAD+1 )
280*
281 ipa = iprepad+1
282 IF( est .AND. m.EQ.n ) THEN
283 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
284 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
285 ELSE
286 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
287 END IF
288 lipiv = iceil( intgsz*( mp+nb ), cplxsz )
289 ipw = ippiv + lipiv + ipostpad + iprepad
290*
291 IF( check ) THEN
292*
293* Calculate the amount of workspace required by the
294* checking routines PCLANGE, PCGETRRV, and
295* PCLAFCHK
296*
297 worksiz = max( 2, nq )
298*
299 worksiz = max( worksiz, mp*desca( nb_ )+
300 $ nq*desca( mb_ ) )
301*
302 worksiz = max( worksiz, mp * desca( nb_ ) )
303*
304 worksiz = worksiz + ipostpad
305*
306 ELSE
307*
308 worksiz = ipostpad
309*
310 END IF
311*
312* Check for adequate memory for problem size
313*
314 ierr( 1 ) = 0
315 IF( ipw+worksiz.GT.memsiz ) THEN
316 IF( iam.EQ.0 )
317 $ WRITE( nout, fmt = 9996 ) 'factorization',
318 $ ( ipw+worksiz )*cplxsz
319 ierr( 1 ) = 1
320 END IF
321*
322* Check all processes for an error
323*
324 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
325*
326 IF( ierr( 1 ).GT.0 ) THEN
327 IF( iam.EQ.0 )
328 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
329 kskip = kskip + 1
330 GO TO 30
331 END IF
332*
333* Generate matrix A of Ax = b
334*
335 CALL pcmatgen( ictxt, 'No transpose', 'No transpose',
336 $ desca( m_ ), desca( n_ ), desca( mb_ ),
337 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
338 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
339 $ mp, 0, nq, myrow, mycol, nprow, npcol )
340*
341* Calculate inf-norm of A for residual error-checking
342*
343 IF( check ) THEN
344 CALL pcfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
345 $ desca( lld_ ), iprepad, ipostpad,
346 $ padval )
347 CALL pcfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
348 $ lipiv, iprepad, ipostpad, padval )
349 CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
350 $ mem( ipw-iprepad ), worksiz-ipostpad,
351 $ iprepad, ipostpad, padval )
352 anorm = pclange( 'I', m, n, mem( ipa ), 1, 1, desca,
353 $ mem( ipw ) )
354 anorm1 = pclange( '1', m, n, mem( ipa ), 1, 1, desca,
355 $ mem( ipw ) )
356 CALL pcchekpad( ictxt, 'PCLANGE', mp, nq,
357 $ mem( ipa-iprepad ), desca( lld_ ),
358 $ iprepad, ipostpad, padval )
359 CALL pcchekpad( ictxt, 'PCLANGE', worksiz-ipostpad,
360 $ 1, mem( ipw-iprepad ),
361 $ worksiz-ipostpad, iprepad, ipostpad,
362 $ padval )
363 END IF
364*
365 IF( est .AND. m.EQ.n ) THEN
366 CALL pcmatgen( ictxt, 'No transpose', 'No transpose',
367 $ desca( m_ ), desca( n_ ), desca( mb_ ),
368 $ desca( nb_ ), mem( ipa0 ),
369 $ desca( lld_ ), desca( rsrc_ ),
370 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
371 $ myrow, mycol, nprow, npcol )
372 IF( check )
373 $ CALL pcfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
374 $ desca( lld_ ), iprepad, ipostpad,
375 $ padval )
376 END IF
377*
378 CALL slboot()
379 CALL blacs_barrier( ictxt, 'All' )
380 CALL sltimer( 1 )
381*
382* Perform LU factorization
383*
384 CALL pcgetrf( m, n, mem( ipa ), 1, 1, desca,
385 $ mem( ippiv ), info )
386*
387 CALL sltimer( 1 )
388*
389 IF( info.NE.0 ) THEN
390 IF( iam.EQ.0 )
391 $ WRITE( nout, fmt = * ) 'PCGETRF INFO=', info
392 kfail = kfail + 1
393 rcond = zero
394 GO TO 30
395 END IF
396*
397 IF( check ) THEN
398*
399* Check for memory overwrite in LU factorization
400*
401 CALL pcchekpad( ictxt, 'PCGETRF', mp, nq,
402 $ mem( ipa-iprepad ), desca( lld_ ),
403 $ iprepad, ipostpad, padval )
404 CALL pcchekpad( ictxt, 'PCGETRF', lipiv, 1,
405 $ mem( ippiv-iprepad ), lipiv, iprepad,
406 $ ipostpad, padval )
407 END IF
408*
409 IF( m.NE.n ) THEN
410*
411* For non-square matrices, factorization only
412*
413 nrhs = 0
414 nbrhs = 0
415*
416 IF( check ) THEN
417*
418* Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
419*
420 CALL pcgetrrv( m, n, mem( ipa ), 1, 1, desca,
421 $ mem( ippiv ), mem( ipw ) )
422 CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
423 $ desca, iaseed, anorm, fresid,
424 $ mem( ipw ) )
425*
426* Check for memory overwrite
427*
428 CALL pcchekpad( ictxt, 'PCGETRRV', mp, nq,
429 $ mem( ipa-iprepad ), desca( lld_ ),
430 $ iprepad, ipostpad, padval )
431 CALL pcchekpad( ictxt, 'PCGETRRV', lipiv, 1,
432 $ mem( ippiv-iprepad ), lipiv,
433 $ iprepad, ipostpad, padval )
434 CALL pcchekpad( ictxt, 'PCGETRRV',
435 $ worksiz-ipostpad, 1,
436 $ mem( ipw-iprepad ),
437 $ worksiz-ipostpad, iprepad,
438 $ ipostpad, padval )
439*
440* Test residual and detect NaN result
441*
442 IF( ( fresid.LE.thresh ) .AND.
443 $ ( (fresid-fresid).EQ.0.0e+0 ) ) THEN
444 kpass = kpass + 1
445 passed = 'PASSED'
446 ELSE
447 kfail = kfail + 1
448 passed = 'FAILED'
449 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
450 $ WRITE( nout, fmt = 9986 ) fresid
451 END IF
452*
453 ELSE
454*
455* Don't perform the checking, only timing
456*
457 kpass = kpass + 1
458 fresid = fresid - fresid
459 passed = 'BYPASS'
460*
461 END IF
462*
463* Gather maximum of all CPU and WALL clock timings
464*
465 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
466 $ wtime )
467 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
468 $ ctime )
469*
470* Print results
471*
472 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
473*
474 maxmn = max( m, n )
475 minmn = min( m, n )
476*
477* 4 M N^2 - 4/3 N^3 + 2 M N - 3 N^2 flops for LU
478* factorization M >= N
479*
480 nops = 4.0d+0*dble(maxmn)*(dble(minmn)**2) -
481 $ (4.0d+0 / 3.0d+0)*( dble( minmn )**3 ) +
482 $ (2.0d+0)*dble( maxmn )*dble( minmn ) -
483 $ (3.0d+0)*( dble( minmn )**2 )
484*
485* Calculate total megaflops -- factorization only,
486* -- for WALL and CPU time, and print output
487*
488* Print WALL time if machine supports it
489*
490 IF( wtime( 1 ).GT.0.0d+0 ) THEN
491 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
492 ELSE
493 tmflops = 0.0d+0
494 END IF
495*
496 wtime( 2 ) = 0.0d+0
497 IF( wtime( 1 ).GE.0.0d+0 )
498 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
499 $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
500 $ wtime( 2 ), tmflops, passed
501*
502* Print CPU time if machine supports it
503*
504 IF( ctime( 1 ).GT.0.0d+0 ) THEN
505 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
506 ELSE
507 tmflops = 0.0d+0
508 END IF
509*
510 ctime( 2 ) = 0.0d+0
511 IF( ctime( 1 ).GE.0.0d+0 )
512 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
513 $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
514 $ ctime( 2 ), tmflops, passed
515 END IF
516*
517 ELSE
518*
519* If M = N
520*
521 IF( est ) THEN
522*
523* Calculate workspace required for PCGECON
524*
525 lwork = max( 1, 2*np ) +
526 $ max( 2, desca( nb_ )*
527 $ max( 1, iceil( nprow-1, npcol ) ),
528 $ nq + desca( nb_ )*
529 $ max( 1, iceil( npcol-1, nprow ) ) )
530 ipw2 = ipw + lwork + ipostpad + iprepad
531 lrwork = max( 1, 2*nq )
532 lw2 = iceil( lrwork*realsz, cplxsz ) + ipostpad
533*
534 ierr( 1 ) = 0
535 IF( ipw2+lw2.GT.memsiz ) THEN
536 IF( iam.EQ.0 )
537 $ WRITE( nout, fmt = 9996 )'cond est',
538 $ ( ipw2+lw2 )*cplxsz
539 ierr( 1 ) = 1
540 END IF
541*
542* Check all processes for an error
543*
544 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
545 $ -1, 0 )
546*
547 IF( ierr( 1 ).GT.0 ) THEN
548 IF( iam.EQ.0 )
549 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
550 kskip = kskip + 1
551 GO TO 30
552 END IF
553*
554 IF( check ) THEN
555 CALL pcfillpad( ictxt, lwork, 1,
556 $ mem( ipw-iprepad ), lwork,
557 $ iprepad, ipostpad, padval )
558 CALL pcfillpad( ictxt, lw2-ipostpad, 1,
559 $ mem( ipw2-iprepad ),
560 $ lw2-ipostpad, iprepad,
561 $ ipostpad, padval )
562 END IF
563*
564* Compute condition number of the matrix
565*
566 CALL pcgecon( '1', n, mem( ipa ), 1, 1, desca,
567 $ anorm1, rcond, mem( ipw ), lwork,
568 $ mem( ipw2 ), lrwork, info )
569*
570 IF( check ) THEN
571 CALL pcchekpad( ictxt, 'PCGECON', np, nq,
572 $ mem( ipa-iprepad ),
573 $ desca( lld_ ), iprepad,
574 $ ipostpad, padval )
575 CALL pcchekpad( ictxt, 'PCGECON', lwork, 1,
576 $ mem( ipw-iprepad ), lwork,
577 $ iprepad, ipostpad, padval )
578 CALL pcchekpad( ictxt, 'PCGECON',
579 $ lw2-ipostpad, 1,
580 $ mem( ipw2-iprepad ),
581 $ lw2-ipostpad, iprepad,
582 $ ipostpad, padval )
583 END IF
584 END IF
585*
586* Loop over the different values for NRHS
587*
588 DO 20 hh = 1, nnr
589*
590 nrhs = nrval( hh )
591*
592 DO 10 kk = 1, nnbr
593*
594 nbrhs = nbrval( kk )
595*
596* Initialize Array Descriptor for rhs
597*
598 CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
599 $ ictxt, max( 1, np )+imidpad,
600 $ ierr( 1 ) )
601*
602* Check all processes for an error
603*
604 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
605 $ -1, 0 )
606*
607 IF( ierr( 1 ).LT.0 ) THEN
608 IF( iam.EQ.0 )
609 $ WRITE( nout, fmt = 9997 ) 'descriptor'
610 kskip = kskip + 1
611 GO TO 10
612 END IF
613*
614* move IPW to allow room for RHS
615*
616 myrhs = numroc( descb( n_ ), descb( nb_ ),
617 $ mycol, descb( csrc_ ), npcol )
618 ipb = ipw
619*
620 IF( est ) THEN
621 ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
622 $ iprepad
623 ipferr = ipb0 + descb( lld_ )*myrhs +
624 $ ipostpad + iprepad
625 ipberr = myrhs + ipferr + ipostpad + iprepad
626 ipw = myrhs + ipberr + ipostpad + iprepad
627 ELSE
628 ipw = ipb + descb( lld_ )*myrhs + ipostpad +
629 $ iprepad
630 END IF
631*
632* Set worksiz: routines requiring most workspace
633* is PCLASCHK
634*
635 IF( check ) THEN
636 lcm = ilcm( nprow, npcol )
637 lcmq = lcm / npcol
638 worksiz = max( worksiz-ipostpad,
639 $ nq * nbrhs + np * nbrhs +
640 $ max( max( nq*nb, 2*nbrhs ),
641 $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
642 $ 0,0,lcmq ) ) )
643 worksiz = ipostpad + worksiz
644 ELSE
645 worksiz = ipostpad
646 END IF
647*
648 ierr( 1 ) = 0
649 IF( ipw+worksiz.GT.memsiz ) THEN
650 IF( iam.EQ.0 )
651 $ WRITE( nout, fmt = 9996 )'solve',
652 $ ( ipw+worksiz )*cplxsz
653 ierr( 1 ) = 1
654 END IF
655*
656* Check all processes for an error
657*
658 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
659 $ -1, 0 )
660*
661 IF( ierr( 1 ).GT.0 ) THEN
662 IF( iam.EQ.0 )
663 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
664 kskip = kskip + 1
665 GO TO 10
666 END IF
667*
668* Generate RHS
669*
670 CALL pcmatgen( ictxt, 'No', 'No', descb( m_ ),
671 $ descb( n_ ), descb( mb_ ),
672 $ descb( nb_ ), mem( ipb ),
673 $ descb( lld_ ), descb( rsrc_ ),
674 $ descb( csrc_ ), ibseed, 0, np, 0,
675 $ myrhs, myrow, mycol, nprow,
676 $ npcol )
677*
678 IF( check )
679 $ CALL pcfillpad( ictxt, np, myrhs,
680 $ mem( ipb-iprepad ),
681 $ descb( lld_ ), iprepad,
682 $ ipostpad, padval )
683*
684 IF( est ) THEN
685 CALL pcmatgen( ictxt, 'No', 'No',
686 $ descb( m_ ), descb( n_ ),
687 $ descb( mb_ ), descb( nb_ ),
688 $ mem( ipb0 ), descb( lld_ ),
689 $ descb( rsrc_ ),
690 $ descb( csrc_ ), ibseed, 0, np,
691 $ 0, myrhs, myrow, mycol, nprow,
692 $ npcol )
693 IF( check ) THEN
694 CALL pcfillpad( ictxt, np, myrhs,
695 $ mem( ipb0-iprepad ),
696 $ descb( lld_ ), iprepad,
697 $ ipostpad, padval )
698 CALL pcfillpad( ictxt, 1, myrhs,
699 $ mem( ipferr-iprepad ), 1,
700 $ iprepad, ipostpad,
701 $ padval )
702 CALL pcfillpad( ictxt, 1, myrhs,
703 $ mem( ipberr-iprepad ), 1,
704 $ iprepad, ipostpad,
705 $ padval )
706 END IF
707 END IF
708*
709 CALL blacs_barrier( ictxt, 'All' )
710 CALL sltimer( 2 )
711*
712* Solve linear sytem via LU factorization
713*
714 CALL pcgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
715 $ desca, mem( ippiv ), mem( ipb ),
716 $ 1, 1, descb, info )
717*
718 CALL sltimer( 2 )
719*
720 IF( check ) THEN
721*
722* check for memory overwrite
723*
724 CALL pcchekpad( ictxt, 'PCGETRS', np, nq,
725 $ mem( ipa-iprepad ),
726 $ desca( lld_ ), iprepad,
727 $ ipostpad, padval )
728 CALL pcchekpad( ictxt, 'PCGETRS', lipiv, 1,
729 $ mem( ippiv-iprepad ), lipiv,
730 $ iprepad, ipostpad, padval )
731 CALL pcchekpad( ictxt, 'PCGETRS', np,
732 $ myrhs, mem( ipb-iprepad ),
733 $ descb( lld_ ), iprepad,
734 $ ipostpad, padval )
735*
736 CALL pcfillpad( ictxt, worksiz-ipostpad,
737 $ 1, mem( ipw-iprepad ),
738 $ worksiz-ipostpad, iprepad,
739 $ ipostpad, padval )
740*
741* check the solution to rhs
742*
743 CALL pclaschk( 'No', 'N', n, nrhs,
744 $ mem( ipb ), 1, 1, descb,
745 $ iaseed, 1, 1, desca, ibseed,
746 $ anorm, sresid, mem( ipw ) )
747*
748 IF( iam.EQ.0 .AND. sresid.GT.thresh )
749 $ WRITE( nout, fmt = 9985 ) sresid
750*
751* check for memory overwrite
752*
753 CALL pcchekpad( ictxt, 'PCLASCHK', np,
754 $ myrhs, mem( ipb-iprepad ),
755 $ descb( lld_ ), iprepad,
756 $ ipostpad, padval )
757 CALL pcchekpad( ictxt, 'PCLASCHK',
758 $ worksiz-ipostpad, 1,
759 $ mem( ipw-iprepad ),
760 $ worksiz-ipostpad,
761 $ iprepad, ipostpad, padval )
762*
763* The second test is a NaN trap
764*
765 IF( sresid.LE.thresh .AND.
766 $ ( sresid-sresid ).EQ.0.0e+0 ) THEN
767 kpass = kpass + 1
768 passed = 'PASSED'
769 ELSE
770 kfail = kfail + 1
771 passed = 'FAILED'
772 END IF
773 ELSE
774 kpass = kpass + 1
775 sresid = sresid - sresid
776 passed = 'BYPASS'
777 END IF
778*
779 IF( est ) THEN
780*
781* Calculate workspace required for PCGERFS
782*
783 lwork = max( 1, 2*np )
784 ipw2 = ipw + lwork + ipostpad + iprepad
785 lrwork = max( 1, np )
786 lw2 = iceil( lrwork*realsz, cplxsz ) +
787 $ ipostpad
788*
789 ierr( 1 ) = 0
790 IF( ipw2+lw2.GT.memsiz ) THEN
791 IF( iam.EQ.0 )
792 $ WRITE( nout, fmt = 9996 )
793 $ 'iter ref', ( ipw2+lw2 )*cplxsz
794 ierr( 1 ) = 1
795 END IF
796*
797* Check all processes for an error
798*
799 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
800 $ ierr, 1, -1, 0 )
801*
802 IF( ierr( 1 ).GT.0 ) THEN
803 IF( iam.EQ.0 )
804 $ WRITE( nout, fmt = 9997 )
805 $ 'MEMORY'
806 kskip = kskip + 1
807 GO TO 10
808 END IF
809*
810 IF( check ) THEN
811 CALL pcfillpad( ictxt, lwork, 1,
812 $ mem( ipw-iprepad ),
813 $ lwork, iprepad, ipostpad,
814 $ padval )
815 CALL pcfillpad( ictxt, lw2-ipostpad, 1,
816 $ mem( ipw2-iprepad ),
817 $ lw2-ipostpad, iprepad,
818 $ ipostpad, padval )
819 END IF
820*
821* Use iterative refinement to improve the
822* computed solution
823*
824 CALL pcgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
825 $ 1, desca, mem( ipa ), 1, 1,
826 $ desca, mem( ippiv ),
827 $ mem( ipb0 ), 1, 1, descb,
828 $ mem( ipb ), 1, 1, descb,
829 $ mem( ipferr ), mem( ipberr ),
830 $ mem( ipw ), lwork, mem( ipw2 ),
831 $ lrwork, info )
832*
833 IF( check ) THEN
834 CALL pcchekpad( ictxt, 'PCGERFS', np,
835 $ nq, mem( ipa0-iprepad ),
836 $ desca( lld_ ), iprepad,
837 $ ipostpad, padval )
838 CALL pcchekpad( ictxt, 'PCGERFS', np,
839 $ nq, mem( ipa-iprepad ),
840 $ desca( lld_ ), iprepad,
841 $ ipostpad, padval )
842 CALL pcchekpad( ictxt, 'PCGERFS', lipiv,
843 $ 1, mem( ippiv-iprepad ),
844 $ lipiv, iprepad,
845 $ ipostpad, padval )
846 CALL pcchekpad( ictxt, 'PCGERFS', np,
847 $ myrhs, mem( ipb-iprepad ),
848 $ descb( lld_ ), iprepad,
849 $ ipostpad, padval )
850 CALL pcchekpad( ictxt, 'PCGERFS', np,
851 $ myrhs,
852 $ mem( ipb0-iprepad ),
853 $ descb( lld_ ), iprepad,
854 $ ipostpad, padval )
855 CALL pcchekpad( ictxt, 'PCGERFS', 1,
856 $ myrhs,
857 $ mem( ipferr-iprepad ), 1,
858 $ iprepad, ipostpad,
859 $ padval )
860 CALL pcchekpad( ictxt, 'PCGERFS', 1,
861 $ myrhs,
862 $ mem( ipberr-iprepad ), 1,
863 $ iprepad, ipostpad,
864 $ padval )
865 CALL pcchekpad( ictxt, 'PCGERFS', lwork,
866 $ 1, mem( ipw-iprepad ),
867 $ lwork, iprepad, ipostpad,
868 $ padval )
869 CALL pcchekpad( ictxt, 'PCGERFS',
870 $ lw2-ipostpad, 1,
871 $ mem( ipw2-iprepad ),
872 $ lw2-ipostpad, iprepad,
873 $ ipostpad, padval )
874*
875 CALL pcfillpad( ictxt, worksiz-ipostpad,
876 $ 1, mem( ipw-iprepad ),
877 $ worksiz-ipostpad, iprepad,
878 $ ipostpad, padval )
879*
880* check the solution to rhs
881*
882 CALL pclaschk( 'No', 'N', n, nrhs,
883 $ mem( ipb ), 1, 1, descb,
884 $ iaseed, 1, 1, desca,
885 $ ibseed, anorm, sresid2,
886 $ mem( ipw ) )
887*
888 IF( iam.EQ.0 .AND. sresid2.GT.thresh )
889 $ WRITE( nout, fmt = 9985 ) sresid2
890*
891* check for memory overwrite
892*
893 CALL pcchekpad( ictxt, 'PCLASCHK', np,
894 $ myrhs, mem( ipb-iprepad ),
895 $ descb( lld_ ), iprepad,
896 $ ipostpad, padval )
897 CALL pcchekpad( ictxt, 'PCLASCHK',
898 $ worksiz-ipostpad, 1,
899 $ mem( ipw-iprepad ),
900 $ worksiz-ipostpad, iprepad,
901 $ ipostpad, padval )
902 END IF
903 END IF
904*
905* Gather max. of all CPU and WALL clock timings
906*
907 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
908 $ wtime )
909 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
910 $ ctime )
911*
912* Print results
913*
914 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
915*
916* 8/3 N^3 - N^2 flops for LU factorization
917*
918 nops = (8.0d+0/3.0d+0)*( dble(n)**3 ) -
919 $ dble(n)**2
920*
921* nrhs * 8 N^2 flops for LU solve.
922*
923 nops = nops + 8.0d+0*(dble(n)**2)*dble(nrhs)
924*
925* Calculate total megaflops -- factorization
926* and solve -- for WALL and CPU time, and print
927* output
928*
929* Print WALL time if machine supports it
930*
931 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
932 $ THEN
933 tmflops = nops /
934 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
935 ELSE
936 tmflops = 0.0d+0
937 END IF
938*
939* Print WALL time if supported
940*
941 IF( wtime( 2 ).GE.0.0d+0 )
942 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
943 $ nb, nrhs, nbrhs, nprow, npcol,
944 $ wtime( 1 ), wtime( 2 ), tmflops,
945 $ passed
946*
947* Print CPU time if supported
948*
949 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
950 $ THEN
951 tmflops = nops /
952 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
953 ELSE
954 tmflops = 0.0d+0
955 END IF
956*
957 IF( ctime( 2 ).GE.0.0d+0 )
958 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
959 $ nb, nrhs, nbrhs, nprow, npcol,
960 $ ctime( 1 ), ctime( 2 ), tmflops,
961 $ passed
962 END IF
963 10 CONTINUE
964 20 CONTINUE
965*
966 IF( check.AND.( sresid.GT.thresh ) ) THEN
967*
968* Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
969*
970 CALL pcgetrrv( m, n, mem( ipa ), 1, 1, desca,
971 $ mem( ippiv ), mem( ipw ) )
972 CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
973 $ 1, desca, iaseed, anorm, fresid,
974 $ mem( ipw ) )
975*
976* Check for memory overwrite
977*
978 CALL pcchekpad( ictxt, 'PCGETRRV', np, nq,
979 $ mem( ipa-iprepad ), desca( lld_ ),
980 $ iprepad, ipostpad, padval )
981 CALL pcchekpad( ictxt, 'PCGETRRV', lipiv,
982 $ 1, mem( ippiv-iprepad ), lipiv,
983 $ iprepad, ipostpad, padval )
984 CALL pcchekpad( ictxt, 'PCGETRRV',
985 $ worksiz-ipostpad, 1,
986 $ mem( ipw-iprepad ),
987 $ worksiz-ipostpad, iprepad,
988 $ ipostpad, padval )
989*
990 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
991 $ WRITE( nout, fmt = 9986 ) fresid
992 END IF
993 END IF
994 30 CONTINUE
995 40 CONTINUE
996 CALL blacs_gridexit( ictxt )
997 50 CONTINUE
998*
999* Print ending messages and close output file
1000*
1001 60 CONTINUE
1002 IF( iam.EQ.0 ) THEN
1003 ktests = kpass + kfail + kskip
1004 WRITE( nout, fmt = * )
1005 WRITE( nout, fmt = 9992 ) ktests
1006 IF( check ) THEN
1007 WRITE( nout, fmt = 9991 ) kpass
1008 WRITE( nout, fmt = 9989 ) kfail
1009 ELSE
1010 WRITE( nout, fmt = 9990 ) kpass
1011 END IF
1012 WRITE( nout, fmt = 9988 ) kskip
1013 WRITE( nout, fmt = * )
1014 WRITE( nout, fmt = * )
1015 WRITE( nout, fmt = 9987 )
1016 IF( nout.NE.6 .AND. nout.NE.0 )
1017 $ CLOSE( nout )
1018 END IF
1019*
1020 CALL blacs_exit( 0 )
1021*
1022 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1023 $ '; It should be at least 1' )
1024 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1025 $ i4 )
1026 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1027 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1028 $ i11 )
1029 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1030 $ 'Sol Time MFLOPS CHECK' )
1031 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1032 $ '-------- -------- ------' )
1033 9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1034 $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1035 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1036 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1037 9990 FORMAT( i5, ' tests completed without checking.' )
1038 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1039 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1040 9987 FORMAT( 'END OF TESTS.' )
1041 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1042 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1043*
1044 stop
1045*
1046* End of PCLUDRIVER
1047*
1048 END
subroutine pclafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pclafchk.f:3
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
subroutine pcgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, rwork, lrwork, info)
Definition pcgecon.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pcgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, rwork, lrwork, info)
Definition pcgerfs.f:5
subroutine pcgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition pcgetrf.f:2
subroutine pcgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition pcgetrrv.f:2
subroutine pcgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pcgetrs.f:3
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3
subroutine pclaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pclaschk.f:4
program pcludriver
Definition pcludriver.f:1
subroutine pcluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition pcluinfo.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267