SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcgbdriver.f
Go to the documentation of this file.
1 PROGRAM pcgbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PCGBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by CGB. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTMEM INTEGER, default = 2048.
55* INTMEM is the size of the integer workspace used in this
56* driver as input as the IPIV vector. It represents an
57* upper bound on NB, the blocksize of the data
58* distribution.
59*
60* INTGSZ INTEGER, default = 4 bytes.
61* CPLXSZ INTEGER, default = 8 bytes.
62* INTGSZ and CPLXSZ indicate the length in bytes on the
63* given platform for an integer and a single precision
64* complex.
65* MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
66* All arrays used by ScaLAPACK routines are allocated from
67* this array and referenced by pointers. The integer IPB,
68* for example, is a pointer to the starting element of MEM for
69* the solution vector(s) B.
70*
71* =====================================================================
72*
73* Code Developer: Andrew J. Cleary, University of Tennessee.
74* Current address: Lawrence Livermore National Labs.
75* This version released: August, 2001.
76*
77* =====================================================================
78*
79* .. Parameters ..
80 INTEGER totmem
81 parameter( totmem = 3000000 )
82 INTEGER intmem
83 parameter( intmem = 2048 )
84 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
85 $ lld_, mb_, m_, nb_, n_, rsrc_
86 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
87 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
88 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
89*
90 REAL zero
91 INTEGER cplxsz, memsiz, ntests
92 COMPLEX padval
93 parameter( cplxsz = 8,
94 $ memsiz = totmem / cplxsz, ntests = 20,
95 $ padval = ( -9923.0e+0, -9923.0e+0 ),
96 $ zero = 0.0e+0 )
97 INTEGER int_one
98 parameter( int_one = 1 )
99* ..
100* .. Local Scalars ..
101 LOGICAL check
102 CHARACTER trans
103 CHARACTER*6 passed
104 CHARACTER*80 outfile
105 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
106 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
107 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
108 $ iprepad, ipw, ipw_size, ipw_solve,
109 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
110 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
111 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
112 $ nnr, nout, np, npcol, nprocs, nprocs_real,
113 $ nprow, nq, nrhs, n_first, n_last, worksiz
114 REAL anorm, sresid, thresh
115 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
116* ..
117* .. Local Arrays ..
118 INTEGER ipiv(intmem)
119 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
120 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
121 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
122 $ nrval( ntests ), nval( ntests ),
123 $ pval( ntests ), qval( ntests )
124 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
125 COMPLEX mem( memsiz )
126* ..
127* .. External Subroutines ..
128 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
129 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
130 $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
133 $ sltimer
134* ..
135* .. External Functions ..
136 INTEGER numroc
137 LOGICAL lsame
138 REAL pclange
139 EXTERNAL lsame, numroc, pclange
140* ..
141* .. Intrinsic Functions ..
142 INTRINSIC dble, max, min, mod
143* ..
144* .. Data Statements ..
145 DATA kfail, kpass, kskip, ktests / 4*0 /
146* ..
147*
148*
149*
150* .. Executable Statements ..
151*
152* Get starting information
153*
154 CALL blacs_pinfo( iam, nprocs )
155 iaseed = 100
156 ibseed = 200
157*
158 CALL pcgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
159 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
160 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
161 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
162*
163 check = ( thresh.GE.0.0e+0 )
164*
165* Print headings
166*
167 IF( iam.EQ.0 ) THEN
168 WRITE( nout, fmt = * )
169 WRITE( nout, fmt = 9995 )
170 WRITE( nout, fmt = 9994 )
171 WRITE( nout, fmt = * )
172 END IF
173*
174* Loop over different process grids
175*
176 DO 60 i = 1, ngrids
177*
178 nprow = pval( i )
179 npcol = qval( i )
180*
181* Make sure grid information is correct
182*
183 ierr( 1 ) = 0
184 IF( nprow.LT.1 ) THEN
185 IF( iam.EQ.0 )
186 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
187 ierr( 1 ) = 1
188 ELSE IF( npcol.LT.1 ) THEN
189 IF( iam.EQ.0 )
190 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
191 ierr( 1 ) = 1
192 ELSE IF( nprow*npcol.GT.nprocs ) THEN
193 IF( iam.EQ.0 )
194 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
195 ierr( 1 ) = 1
196 END IF
197*
198 IF( ierr( 1 ).GT.0 ) THEN
199 IF( iam.EQ.0 )
200 $ WRITE( nout, fmt = 9997 ) 'grid'
201 kskip = kskip + 1
202 GO TO 50
203 END IF
204*
205* Define process grid
206*
207 CALL blacs_get( -1, 0, ictxt )
208 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
209*
210*
211* Define transpose process grid
212*
213 CALL blacs_get( -1, 0, ictxtb )
214 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
215*
216* Go to bottom of process grid loop if this case doesn't use my
217* process
218*
219 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
220*
221 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
222 GO TO 50
223 ENDIF
224*
225 DO 40 j = 1, nmat
226*
227 ierr( 1 ) = 0
228*
229 n = nval( j )
230*
231* Make sure matrix information is correct
232*
233 IF( n.LT.1 ) THEN
234 IF( iam.EQ.0 )
235 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
236 ierr( 1 ) = 1
237 END IF
238*
239* Check all processes for an error
240*
241 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
242 $ -1, 0 )
243*
244 IF( ierr( 1 ).GT.0 ) THEN
245 IF( iam.EQ.0 )
246 $ WRITE( nout, fmt = 9997 ) 'size'
247 kskip = kskip + 1
248 GO TO 40
249 END IF
250*
251*
252 DO 45 bw_num = 1, nbw
253*
254 ierr( 1 ) = 0
255*
256 bwl = bwlval( bw_num )
257 IF( bwl.LT.1 ) THEN
258 IF( iam.EQ.0 )
259 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
260 ierr( 1 ) = 1
261 END IF
262*
263 bwu = bwuval( bw_num )
264 IF( bwu.LT.1 ) THEN
265 IF( iam.EQ.0 )
266 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
267 ierr( 1 ) = 1
268 END IF
269*
270 IF( bwl.GT.n-1 ) THEN
271 IF( iam.EQ.0 ) THEN
272 ierr( 1 ) = 1
273 ENDIF
274 END IF
275*
276 IF( bwu.GT.n-1 ) THEN
277 IF( iam.EQ.0 ) THEN
278 ierr( 1 ) = 1
279 ENDIF
280 END IF
281*
282* Check all processes for an error
283*
284 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
285 $ -1, 0 )
286*
287 IF( ierr( 1 ).GT.0 ) THEN
288 kskip = kskip + 1
289 GO TO 45
290 END IF
291*
292 DO 30 k = 1, nnb
293*
294 ierr( 1 ) = 0
295*
296 nb = nbval( k )
297 IF( nb.LT.0 ) THEN
298 nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
299 $ + (bwl+bwu)
300 nb = max( nb, 2*(bwl+bwu) )
301 nb = min( n, nb )
302 END IF
303*
304* Make sure NB is legal
305*
306 ierr( 1 ) = 0
307*
308 IF( nb.GT.intmem ) THEN
309 ierr( 1 ) = 1
310 IF( iam.EQ.0 ) THEN
311 WRITE( nout,* )'You have chosen an '
312 $ ,'NB > INTMEM in the driver.'
313 WRITE(nout, *)'Please edit the driver '
314 $ ,'and increase the value of INTMEM'
315 ENDIF
316 END IF
317*
318* Check all processes for an error
319*
320 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
321 $ -1, 0 )
322*
323 IF( ierr( 1 ).GT.0 ) THEN
324 kskip = kskip + 1
325 GO TO 30
326 END IF
327*
328* Padding constants
329*
330 np = numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
331 $ myrow, 0, nprow )
332 nq = numroc( n, nb, mycol, 0, npcol )
333*
334 IF( check ) THEN
335 iprepad = ((2*bwl+2*bwu+1)+10)
336 imidpad = 10
337 ipostpad = ((2*bwl+2*bwu+1)+10)
338 ELSE
339 iprepad = 0
340 imidpad = 0
341 ipostpad = 0
342 END IF
343*
344* Initialize the array descriptor for the matrix A
345*
346 CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
347 $ (2*bwl+2*bwu+1), nb, 0, 0,
348 $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
349*
350* Convert this to 1D descriptor
351*
352 desca( 1 ) = 501
353 desca( 3 ) = n
354 desca( 4 ) = nb
355 desca( 5 ) = 0
356 desca( 2 ) = ictxt
357 desca( 6 ) = ((2*bwl+2*bwu+1)+10)
358 desca( 7 ) = 0
359*
360 ierr_temp = ierr( 1 )
361 ierr( 1 ) = 0
362 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
363*
364* Check all processes for an error
365*
366 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
367*
368 IF( ierr( 1 ).LT.0 ) THEN
369 IF( iam.EQ.0 )
370 $ WRITE( nout, fmt = 9997 ) 'descriptor'
371 kskip = kskip + 1
372 GO TO 30
373 END IF
374*
375* Assign pointers into MEM for SCALAPACK arrays, A is
376* allocated starting at position MEM( IPREPAD+1 )
377*
378 free_ptr = 1
379 ipb = 0
380*
381* Save room for prepadding
382 free_ptr = free_ptr + iprepad
383*
384 ipa = free_ptr
385 free_ptr = free_ptr + desca2d( lld_ )*
386 $ desca2d( nb_ )
387 $ + ipostpad
388*
389* Add memory for fillin
390* Fillin space needs to store:
391* Fillin spike:
392* Contribution to previous proc's diagonal block of
393* reduced system:
394* Off-diagonal block of reduced system:
395* Diagonal block of reduced system:
396*
397 fillin_size =
398 $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
399*
400* Claim memory for fillin
401*
402 free_ptr = free_ptr + iprepad
403 ip_fillin = free_ptr
404 free_ptr = free_ptr + fillin_size
405*
406* Workspace needed by computational routines:
407*
408 ipw_size = 0
409*
410* factorization:
411*
412 ipw_size = 1
413*
414* Claim memory for IPW
415*
416 ipw = free_ptr
417 free_ptr = free_ptr + ipw_size
418*
419* Check for adequate memory for problem size
420*
421 ierr( 1 ) = 0
422 IF( free_ptr.GT.memsiz ) THEN
423 IF( iam.EQ.0 )
424 $ WRITE( nout, fmt = 9996 )
425 $ 'divide and conquer factorization',
426 $ (free_ptr )*cplxsz
427 ierr( 1 ) = 1
428 END IF
429*
430* Check all processes for an error
431*
432 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
433 $ 1, -1, 0 )
434*
435 IF( ierr( 1 ).GT.0 ) THEN
436 IF( iam.EQ.0 )
437 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
438 kskip = kskip + 1
439 GO TO 30
440 END IF
441*
442* Worksize needed for LAPRNT
443 worksiz = max( ((2*bwl+2*bwu+1)+10), nb )
444*
445 IF( check ) THEN
446*
447* Calculate the amount of workspace required by
448* the checking routines.
449*
450* PCLANGE
451 worksiz = max( worksiz, desca2d( nb_ ) )
452*
453* PCDBLASCHK
454 worksiz = max( worksiz,
455 $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
456 END IF
457*
458 free_ptr = free_ptr + iprepad
459 ip_driver_w = free_ptr
460 free_ptr = free_ptr + worksiz + ipostpad
461*
462*
463* Check for adequate memory for problem size
464*
465 ierr( 1 ) = 0
466 IF( free_ptr.GT.memsiz ) THEN
467 IF( iam.EQ.0 )
468 $ WRITE( nout, fmt = 9996 ) 'factorization',
469 $ ( free_ptr )*cplxsz
470 ierr( 1 ) = 1
471 END IF
472*
473* Check all processes for an error
474*
475 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
476 $ 1, -1, 0 )
477*
478 IF( ierr( 1 ).GT.0 ) THEN
479 IF( iam.EQ.0 )
480 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
481 kskip = kskip + 1
482 GO TO 30
483 END IF
484*
485 CALL pcbmatgen( ictxt, 'G', 'N', bwl, bwu, n,
486 $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
487 $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
488 $ myrow, mycol, nprow, npcol )
489*
490 CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
491 $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
492 $ padval )
493*
494 CALL pcfillpad( ictxt, worksiz, 1,
495 $ mem( ip_driver_w-iprepad ), worksiz,
496 $ iprepad, ipostpad, padval )
497*
498* Calculate norm of A for residual error-checking
499*
500 IF( check ) THEN
501*
502 anorm = pclange( '1', (2*bwl+2*bwu+1),
503 $ n, mem( ipa ), 1, 1,
504 $ desca2d, mem( ip_driver_w ) )
505 CALL pcchekpad( ictxt, 'PCLANGE', np, nq,
506 $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
507 $ iprepad, ipostpad, padval )
508 CALL pcchekpad( ictxt, 'PCLANGE',
509 $ worksiz, 1,
510 $ mem( ip_driver_w-iprepad ), worksiz,
511 $ iprepad, ipostpad, padval )
512 END IF
513*
514*
515 CALL slboot()
516 CALL blacs_barrier( ictxt, 'All' )
517*
518* Perform factorization
519*
520 CALL sltimer( 1 )
521*
522 CALL pcgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
523 $ mem( ip_fillin ), fillin_size, mem( ipw ),
524 $ ipw_size, info )
525*
526 CALL sltimer( 1 )
527*
528 IF( info.NE.0 ) THEN
529 IF( iam.EQ.0 ) THEN
530 WRITE( nout, fmt = * ) 'PCGBTRF INFO=', info
531 ENDIF
532 kfail = kfail + 1
533 GO TO 30
534 END IF
535*
536 IF( check ) THEN
537*
538* Check for memory overwrite in factorization
539*
540 CALL pcchekpad( ictxt, 'PCGBTRF', np,
541 $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
542 $ iprepad, ipostpad, padval )
543 END IF
544*
545*
546* Loop over the different values for NRHS
547*
548 DO 20 hh = 1, nnr
549*
550 ierr( 1 ) = 0
551*
552 nrhs = nrval( hh )
553*
554* Initialize Array Descriptor for rhs
555*
556 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
557 $ ictxtb, nb+10, ierr( 1 ) )
558*
559* Convert this to 1D descriptor
560*
561 descb( 1 ) = 502
562 descb( 3 ) = n
563 descb( 4 ) = nb
564 descb( 5 ) = 0
565 descb( 2 ) = ictxt
566 descb( 6 ) = descb2d( lld_ )
567 descb( 7 ) = 0
568*
569* reset free_ptr to reuse space for right hand sides
570*
571 IF( ipb .GT. 0 ) THEN
572 free_ptr = ipb
573 ENDIF
574*
575 free_ptr = free_ptr + iprepad
576 ipb = free_ptr
577 free_ptr = free_ptr + nrhs*descb2d( lld_ )
578 $ + ipostpad
579*
580* Allocate workspace for workspace in TRS routine:
581*
582 ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
583*
584 ipw_solve = free_ptr
585 free_ptr = free_ptr + ipw_solve_size
586*
587 ierr( 1 ) = 0
588 IF( free_ptr.GT.memsiz ) THEN
589 IF( iam.EQ.0 )
590 $ WRITE( nout, fmt = 9996 )'solve',
591 $ ( free_ptr )*cplxsz
592 ierr( 1 ) = 1
593 END IF
594*
595* Check all processes for an error
596*
597 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
598 $ ierr, 1, -1, 0 )
599*
600 IF( ierr( 1 ).GT.0 ) THEN
601 IF( iam.EQ.0 )
602 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
603 kskip = kskip + 1
604 GO TO 15
605 END IF
606*
607 myrhs_size = numroc( n, nb, mycol, 0, npcol )
608*
609* Generate RHS
610*
611 CALL pcmatgen(ictxtb, 'No', 'No',
612 $ descb2d( m_ ), descb2d( n_ ),
613 $ descb2d( mb_ ), descb2d( nb_ ),
614 $ mem( ipb ),
615 $ descb2d( lld_ ), descb2d( rsrc_ ),
616 $ descb2d( csrc_ ),
617 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
618 $ myrow, npcol, nprow )
619*
620 IF( check ) THEN
621 CALL pcfillpad( ictxtb, nb, nrhs,
622 $ mem( ipb-iprepad ),
623 $ descb2d( lld_ ),
624 $ iprepad, ipostpad,
625 $ padval )
626 CALL pcfillpad( ictxt, worksiz, 1,
627 $ mem( ip_driver_w-iprepad ),
628 $ worksiz, iprepad,
629 $ ipostpad, padval )
630 END IF
631*
632*
633 CALL blacs_barrier( ictxt, 'All')
634 CALL sltimer( 2 )
635*
636* Solve linear system via factorization
637*
638 CALL pcgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
639 $ 1, desca, ipiv, mem( ipb ), 1, descb,
640 $ mem( ip_fillin ), fillin_size,
641 $ mem( ipw_solve ), ipw_solve_size,
642 $ info )
643*
644 CALL sltimer( 2 )
645*
646 IF( info.NE.0 ) THEN
647 IF( iam.EQ.0 )
648 $ WRITE( nout, fmt = * ) 'PCGBTRS INFO=', info
649 kfail = kfail + 1
650 passed = 'FAILED'
651 GO TO 20
652 END IF
653*
654 IF( check ) THEN
655*
656* check for memory overwrite
657*
658 CALL pcchekpad( ictxt, 'PCGBTRS-work',
659 $ worksiz, 1,
660 $ mem( ip_driver_w-iprepad ),
661 $ worksiz, iprepad,
662 $ ipostpad, padval )
663*
664* check the solution to rhs
665*
666 sresid = zero
667*
668 CALL pcdblaschk( 'N', 'N', trans,
669 $ n, bwl, bwu, nrhs,
670 $ mem( ipb ), 1, 1, descb2d,
671 $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
672 $ ibseed, anorm, sresid,
673 $ mem( ip_driver_w ), worksiz )
674*
675 IF( iam.EQ.0 ) THEN
676 IF( sresid.GT.thresh )
677 $ WRITE( nout, fmt = 9985 ) sresid
678 END IF
679*
680* The second test is a NaN trap
681*
682 IF( ( sresid.LE.thresh ).AND.
683 $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
684 kpass = kpass + 1
685 passed = 'PASSED'
686 ELSE
687 kfail = kfail + 1
688 passed = 'FAILED'
689 END IF
690*
691 END IF
692*
693 15 CONTINUE
694* Skipped tests jump to here to print out "SKIPPED"
695*
696* Gather maximum of all CPU and WALL clock timings
697*
698 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
699 $ wtime )
700 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
701 $ ctime )
702*
703* Print results
704*
705 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
706*
707 nops = 0
708 nops2 = 0
709*
710 n_first = nb
711 nprocs_real = ( n-1 )/nb + 1
712 n_last = mod( n-1, nb ) + 1
713*
714* 2 N bwl (bwl+bwu) + N (bwl) flops
715* for LU factorization
716*
717 nops = 2*(dble(n)*dble(bwl)*
718 $ dble((bwl+bwu))) +
719 $ (dble(n)*dble(bwl))
720*
721* nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
722*
723 nops = nops +
724 $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
725 $ *dble(nrhs))
726*
727* Multiply by 4 to get complex count
728*
729 nops = nops * dble(4)
730*
731* Second calc to represent actual hardware speed
732*
733* 2*N_FIRST (bwl+bwu)*bwu Flops for LU
734* factorization in proc 1
735*
736 nops2 = 2*( (dble(n_first)*
737 $ dble((bwl+bwu))*dble(bwu)))
738*
739 IF ( nprocs_real .GT. 1) THEN
740* 8 N_LAST (bwl+bwu)*(bwl+bwu)
741* flops for LU and spike
742* calc in last processor
743*
744 nops2 = nops2 +
745 $ 8*( (dble(n_last)*dble((bwl+bwu))
746 $ *dble((bwl+bwu))) )
747 ENDIF
748*
749 IF ( nprocs_real .GT. 2) THEN
750* 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike
751* calc in other processors
752*
753 nops2 = nops2 + (nprocs_real-2)*
754 $ 8*( (dble(nb)*dble((bwl+bwu))
755 $ *dble((bwl+bwu))) )
756 ENDIF
757*
758* Reduced system
759*
760 nops2 = nops2 +
761 $ 2*( nprocs_real-1 ) *
762 $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
763 IF( nprocs_real .GT. 1 ) THEN
764 nops2 = nops2 +
765 $ 2*( nprocs_real-2 ) *
766 $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
767 ENDIF
768*
769* Solve stage
770*
771* nrhs*2 n_first*
772* (bwl+(bwl+bwu))
773* flops for L,U solve in proc 1.
774*
775 nops2 = nops2 +
776 $ 2*
777 $ dble(n_first)*
778 $ dble(nrhs) *
779 $ ( dble(bwl)+dble((bwl+bwu)))
780*
781 IF ( nprocs_real .GT. 1 ) THEN
782*
783* 2*nrhs*2 n_last
784* ((bwl+bwu)+(bwl+bwu))
785* flops for LU solve in other procs
786*
787 nops2 = nops2 +
788 $ 4*
789 $ (dble(n_last)*(dble((bwl+bwu))+
790 $ dble((bwl+bwu))))*dble(nrhs)
791 ENDIF
792*
793 IF ( nprocs_real .GT. 2 ) THEN
794*
795* 2*nrhs*2 NB
796* ((bwl+bwu)+(bwl+bwu))
797* flops for LU solve in other procs
798*
799 nops2 = nops2 +
800 $ ( nprocs_real-2)*2*
801 $ ( (dble(nb)*(dble((bwl+bwu))+
802 $ dble((bwl+bwu))))*dble(nrhs) )
803 ENDIF
804*
805* Reduced system
806*
807 nops2 = nops2 +
808 $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
809 IF( nprocs_real .GT. 1 ) THEN
810 nops2 = nops2 +
811 $ nrhs*( nprocs_real-2 ) *
812 $ ( 6 * (bwl+bwu)*(bwl+bwu) )
813 ENDIF
814*
815*
816* Multiply by 4 to get complex count
817*
818 nops2 = nops2 * dble(4)
819*
820* Calculate total megaflops - factorization and/or
821* solve -- for WALL and CPU time, and print output
822*
823* Print WALL time if machine supports it
824*
825 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
826 tmflops = nops /
827 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
828 ELSE
829 tmflops = 0.0d+0
830 END IF
831*
832 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
833 tmflops2 = nops2 /
834 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
835 ELSE
836 tmflops2 = 0.0d+0
837 END IF
838*
839 IF( wtime( 2 ).GE.0.0d+0 )
840 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
841 $ n,
842 $ bwl, bwu,
843 $ nb, nrhs, nprow, npcol,
844 $ wtime( 1 ), wtime( 2 ), tmflops,
845 $ tmflops2, passed
846*
847* Print CPU time if machine supports it
848*
849 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
850 tmflops = nops /
851 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
852 ELSE
853 tmflops = 0.0d+0
854 END IF
855*
856 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
857 tmflops2 = nops2 /
858 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
859 ELSE
860 tmflops2 = 0.0d+0
861 END IF
862*
863 IF( ctime( 2 ).GE.0.0d+0 )
864 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
865 $ n,
866 $ bwl, bwu,
867 $ nb, nrhs, nprow, npcol,
868 $ ctime( 1 ), ctime( 2 ), tmflops,
869 $ tmflops2, passed
870*
871 END IF
872 20 CONTINUE
873*
874*
875 30 CONTINUE
876* NNB loop
877*
878 45 CONTINUE
879* BW[] loop
880*
881 40 CONTINUE
882* NMAT loop
883*
884 CALL blacs_gridexit( ictxt )
885 CALL blacs_gridexit( ictxtb )
886*
887 50 CONTINUE
888* NGRIDS DROPOUT
889 60 CONTINUE
890* NGRIDS loop
891*
892* Print ending messages and close output file
893*
894 IF( iam.EQ.0 ) THEN
895 ktests = kpass + kfail + kskip
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = 9992 ) ktests
898 IF( check ) THEN
899 WRITE( nout, fmt = 9991 ) kpass
900 WRITE( nout, fmt = 9989 ) kfail
901 ELSE
902 WRITE( nout, fmt = 9990 ) kpass
903 END IF
904 WRITE( nout, fmt = 9988 ) kskip
905 WRITE( nout, fmt = * )
906 WRITE( nout, fmt = * )
907 WRITE( nout, fmt = 9987 )
908 IF( nout.NE.6 .AND. nout.NE.0 )
909 $ CLOSE ( nout )
910 END IF
911*
912 CALL blacs_exit( 0 )
913*
914 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
915 $ '; It should be at least 1' )
916 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
917 $ i4 )
918 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
919 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
920 $ i11 )
921 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
922 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
923 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
924 $ '-------- -------- -------- ------' )
925 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
926 $ 1x,i4,1x,i4,1x,f9.3,
927 $ f9.4, f9.2, f9.2, 1x, a6 )
928 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
929 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
930 9990 FORMAT( i5, ' tests completed without checking.' )
931 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
932 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
933 9987 FORMAT( 'END OF TESTS.' )
934 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
935 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
936*
937 stop
938*
939* End of PCGBTRS_DRIVER
940*
941 END
942*
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pcbmatgen.f:5
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
subroutine pcdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pcdblaschk.f:4
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
program pcgbdriver
Definition pcgbdriver.f:1
subroutine pcgbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pcgbinfo.f:6
subroutine pcgbtrf(n, bwl, bwu, a, ja, desca, ipiv, af, laf, work, lwork, info)
Definition pcgbtrf.f:3
subroutine pcgbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, ipiv, b, ib, descb, af, laf, work, lwork, info)
Definition pcgbtrs.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724