SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcdtdriver.f
Go to the documentation of this file.
1 PROGRAM pcdtdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PCDTDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by CDT. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* CPLXSZ INTEGER, default = 8 bytes.
56* INTGSZ and CPLXSZ indicate the length in bytes on the
57* given platform for an integer and a single precision
58* complex.
59* MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60* All arrays used by ScaLAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPB,
62* for example, is a pointer to the starting element of MEM for
63* the solution vector(s) B.
64*
65* =====================================================================
66*
67* Code Developer: Andrew J. Cleary, University of Tennessee.
68* Current address: Lawrence Livermore National Labs.
69* This version released: August, 2001.
70*
71* =====================================================================
72*
73* .. Parameters ..
74 INTEGER totmem
75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81*
82 REAL zero
83 INTEGER cplxsz, memsiz, ntests
84 COMPLEX padval
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
88 $ zero = 0.0e+0 )
89 INTEGER int_one
90 parameter( int_one = 1 )
91* ..
92* .. Local Scalars ..
93 LOGICAL check
94 CHARACTER trans
95 CHARACTER*6 passed
96 CHARACTER*80 outfile
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108* ..
109* .. Local Arrays ..
110 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
111 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
112 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
117* ..
118* .. External Subroutines ..
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121 $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
125* ..
126* .. External Functions ..
127 INTEGER numroc
128 LOGICAL lsame
129 REAL pclange
130 EXTERNAL lsame, numroc, pclange
131* ..
132* .. Intrinsic Functions ..
133 INTRINSIC dble, max, min, mod
134* ..
135* .. Data Statements ..
136 DATA kfail, kpass, kskip, ktests / 4*0 /
137* ..
138*
139*
140*
141* .. Executable Statements ..
142*
143* Get starting information
144*
145 CALL blacs_pinfo( iam, nprocs )
146 iaseed = 100
147 ibseed = 200
148*
149 CALL pcdtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
153*
154 check = ( thresh.GE.0.0e+0 )
155*
156* Print headings
157*
158 IF( iam.EQ.0 ) THEN
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
163 END IF
164*
165* Loop over different process grids
166*
167 DO 60 i = 1, ngrids
168*
169 nprow = pval( i )
170 npcol = qval( i )
171*
172* Make sure grid information is correct
173*
174 ierr( 1 ) = 0
175 IF( nprow.LT.1 ) THEN
176 IF( iam.EQ.0 )
177 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178 ierr( 1 ) = 1
179 ELSE IF( npcol.LT.1 ) THEN
180 IF( iam.EQ.0 )
181 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
182 ierr( 1 ) = 1
183 ELSE IF( nprow*npcol.GT.nprocs ) THEN
184 IF( iam.EQ.0 )
185 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
186 ierr( 1 ) = 1
187 END IF
188*
189 IF( ierr( 1 ).GT.0 ) THEN
190 IF( iam.EQ.0 )
191 $ WRITE( nout, fmt = 9997 ) 'grid'
192 kskip = kskip + 1
193 GO TO 50
194 END IF
195*
196* Define process grid
197*
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
200*
201*
202* Define transpose process grid
203*
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
206*
207* Go to bottom of process grid loop if this case doesn't use my
208* process
209*
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
211*
212 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
213 GO TO 50
214 ENDIF
215*
216 DO 40 j = 1, nmat
217*
218 ierr( 1 ) = 0
219*
220 n = nval( j )
221*
222* Make sure matrix information is correct
223*
224 IF( n.LT.1 ) THEN
225 IF( iam.EQ.0 )
226 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
227 ierr( 1 ) = 1
228 END IF
229*
230* Check all processes for an error
231*
232 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
233 $ -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'size'
238 kskip = kskip + 1
239 GO TO 40
240 END IF
241*
242*
243 DO 45 bw_num = 1, nbw
244*
245 ierr( 1 ) = 0
246*
247 bwl = 1
248 IF( bwl.LT.1 ) THEN
249 IF( iam.EQ.0 )
250 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
251 ierr( 1 ) = 1
252 END IF
253*
254 bwu = 1
255 IF( bwu.LT.1 ) THEN
256 IF( iam.EQ.0 )
257 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
258 ierr( 1 ) = 1
259 END IF
260*
261 IF( bwl.GT.n-1 ) THEN
262 IF( iam.EQ.0 ) THEN
263 ierr( 1 ) = 1
264 ENDIF
265 END IF
266*
267 IF( bwu.GT.n-1 ) THEN
268 IF( iam.EQ.0 ) THEN
269 ierr( 1 ) = 1
270 ENDIF
271 END IF
272*
273* Check all processes for an error
274*
275 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
276 $ -1, 0 )
277*
278 IF( ierr( 1 ).GT.0 ) THEN
279 kskip = kskip + 1
280 GO TO 45
281 END IF
282*
283 DO 30 k = 1, nnb
284*
285 ierr( 1 ) = 0
286*
287 nb = nbval( k )
288 IF( nb.LT.0 ) THEN
289 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
290 $ + int_one
291 nb = max( nb, 2*int_one )
292 nb = min( n, nb )
293 END IF
294*
295* Make sure NB is legal
296*
297 ierr( 1 ) = 0
298 IF( nb.LT.min( 2*int_one, n ) ) THEN
299 ierr( 1 ) = 1
300 END IF
301*
302* Check all processes for an error
303*
304 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
305 $ -1, 0 )
306*
307 IF( ierr( 1 ).GT.0 ) THEN
308 kskip = kskip + 1
309 GO TO 30
310 END IF
311*
312* Padding constants
313*
314 np = numroc( (3), (3),
315 $ myrow, 0, nprow )
316 nq = numroc( n, nb, mycol, 0, npcol )
317*
318 IF( check ) THEN
319 iprepad = ((3)+10)
320 imidpad = 10
321 ipostpad = ((3)+10)
322 ELSE
323 iprepad = 0
324 imidpad = 0
325 ipostpad = 0
326 END IF
327*
328* Initialize the array descriptor for the matrix A
329*
330 CALL descinit( desca2d, n, (3),
331 $ nb, 1, 0, 0,
332 $ ictxtb, nb+10, ierr( 1 ) )
333*
334* Convert this to 1D descriptor
335*
336 desca( 1 ) = 501
337 desca( 3 ) = n
338 desca( 4 ) = nb
339 desca( 5 ) = 0
340 desca( 2 ) = ictxt
341 desca( 6 ) = ((3)+10)
342 desca( 7 ) = 0
343*
344 ierr_temp = ierr( 1 )
345 ierr( 1 ) = 0
346 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
347*
348* Check all processes for an error
349*
350 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
351*
352 IF( ierr( 1 ).LT.0 ) THEN
353 IF( iam.EQ.0 )
354 $ WRITE( nout, fmt = 9997 ) 'descriptor'
355 kskip = kskip + 1
356 GO TO 30
357 END IF
358*
359* Assign pointers into MEM for SCALAPACK arrays, A is
360* allocated starting at position MEM( IPREPAD+1 )
361*
362 free_ptr = 1
363 ipb = 0
364*
365* Save room for prepadding
366 free_ptr = free_ptr + iprepad
367*
368 ipa = free_ptr
369 free_ptr = free_ptr + (nb+10)*(3)
370 $ + ipostpad
371*
372* Add memory for fillin
373* Fillin space needs to store:
374* Fillin spike:
375* Contribution to previous proc's diagonal block of
376* reduced system:
377* Off-diagonal block of reduced system:
378* Diagonal block of reduced system:
379*
380 fillin_size =
381 $ (12*npcol+3*nb)
382*
383* Claim memory for fillin
384*
385 free_ptr = free_ptr + iprepad
386 ip_fillin = free_ptr
387 free_ptr = free_ptr + fillin_size
388*
389* Workspace needed by computational routines:
390*
391 ipw_size = 0
392*
393* factorization:
394*
395 ipw_size = 8*npcol
396*
397* Claim memory for IPW
398*
399 ipw = free_ptr
400 free_ptr = free_ptr + ipw_size
401*
402* Check for adequate memory for problem size
403*
404 ierr( 1 ) = 0
405 IF( free_ptr.GT.memsiz ) THEN
406 IF( iam.EQ.0 )
407 $ WRITE( nout, fmt = 9996 )
408 $ 'divide and conquer factorization',
409 $ (free_ptr )*cplxsz
410 ierr( 1 ) = 1
411 END IF
412*
413* Check all processes for an error
414*
415 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
416 $ 1, -1, 0 )
417*
418 IF( ierr( 1 ).GT.0 ) THEN
419 IF( iam.EQ.0 )
420 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
421 kskip = kskip + 1
422 GO TO 30
423 END IF
424*
425* Worksize needed for LAPRNT
426 worksiz = max( ((3)+10), nb )
427*
428 IF( check ) THEN
429*
430* Calculate the amount of workspace required by
431* the checking routines.
432*
433* PCLANGE
434 worksiz = max( worksiz, desca2d( nb_ ) )
435*
436* PCDTLASCHK
437 worksiz = max( worksiz,
438 $ max(5,nb)+2*nb )
439 END IF
440*
441 free_ptr = free_ptr + iprepad
442 ip_driver_w = free_ptr
443 free_ptr = free_ptr + worksiz + ipostpad
444*
445*
446* Check for adequate memory for problem size
447*
448 ierr( 1 ) = 0
449 IF( free_ptr.GT.memsiz ) THEN
450 IF( iam.EQ.0 )
451 $ WRITE( nout, fmt = 9996 ) 'factorization',
452 $ ( free_ptr )*cplxsz
453 ierr( 1 ) = 1
454 END IF
455*
456* Check all processes for an error
457*
458 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
459 $ 1, -1, 0 )
460*
461 IF( ierr( 1 ).GT.0 ) THEN
462 IF( iam.EQ.0 )
463 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
464 kskip = kskip + 1
465 GO TO 30
466 END IF
467*
468 CALL pcbmatgen( ictxt, 'T', 'D', bwl, bwu, n, (3), nb,
469 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
470 $ mycol, nprow, npcol )
471 CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),
472 $ nb+10, iprepad, ipostpad,
473 $ padval )
474*
475 CALL pcfillpad( ictxt, worksiz, 1,
476 $ mem( ip_driver_w-iprepad ), worksiz,
477 $ iprepad, ipostpad, padval )
478*
479* Calculate norm of A for residual error-checking
480*
481 IF( check ) THEN
482*
483 anorm = pclange( 'I', n,
484 $ (3), mem( ipa ), 1, 1,
485 $ desca2d, mem( ip_driver_w ) )
486 CALL pcchekpad( ictxt, 'PCLANGE', nq, np,
487 $ mem( ipa-iprepad ), nb+10,
488 $ iprepad, ipostpad, padval )
489 CALL pcchekpad( ictxt, 'PCLANGE',
490 $ worksiz, 1,
491 $ mem( ip_driver_w-iprepad ), worksiz,
492 $ iprepad, ipostpad, padval )
493 END IF
494*
495*
496 CALL slboot()
497 CALL blacs_barrier( ictxt, 'All' )
498*
499* Perform factorization
500*
501 CALL sltimer( 1 )
502*
503 CALL pcdttrf( n, mem( ipa+2*( nb+10 ) ),
504 $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
505 $ desca, mem( ip_fillin ), fillin_size,
506 $ mem( ipw ), ipw_size, info )
507*
508 CALL sltimer( 1 )
509*
510 IF( info.NE.0 ) THEN
511 IF( iam.EQ.0 ) THEN
512 WRITE( nout, fmt = * ) 'PCDTTRF INFO=', info
513 ENDIF
514 kfail = kfail + 1
515 GO TO 30
516 END IF
517*
518 IF( check ) THEN
519*
520* Check for memory overwrite in factorization
521*
522 CALL pcchekpad( ictxt, 'PCDTTRF', nq,
523 $ np, mem( ipa-iprepad ), nb+10,
524 $ iprepad, ipostpad, padval )
525 END IF
526*
527*
528* Loop over the different values for NRHS
529*
530 DO 20 hh = 1, nnr
531*
532 ierr( 1 ) = 0
533*
534 nrhs = nrval( hh )
535*
536* Initialize Array Descriptor for rhs
537*
538 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
539 $ ictxtb, nb+10, ierr( 1 ) )
540*
541* Convert this to 1D descriptor
542*
543 descb( 1 ) = 502
544 descb( 3 ) = n
545 descb( 4 ) = nb
546 descb( 5 ) = 0
547 descb( 2 ) = ictxt
548 descb( 6 ) = descb2d( lld_ )
549 descb( 7 ) = 0
550*
551* reset free_ptr to reuse space for right hand sides
552*
553 IF( ipb .GT. 0 ) THEN
554 free_ptr = ipb
555 ENDIF
556*
557 free_ptr = free_ptr + iprepad
558 ipb = free_ptr
559 free_ptr = free_ptr + nrhs*descb2d( lld_ )
560 $ + ipostpad
561*
562* Allocate workspace for workspace in TRS routine:
563*
564 ipw_solve_size = 10*npcol+4*nrhs
565*
566 ipw_solve = free_ptr
567 free_ptr = free_ptr + ipw_solve_size
568*
569 ierr( 1 ) = 0
570 IF( free_ptr.GT.memsiz ) THEN
571 IF( iam.EQ.0 )
572 $ WRITE( nout, fmt = 9996 )'solve',
573 $ ( free_ptr )*cplxsz
574 ierr( 1 ) = 1
575 END IF
576*
577* Check all processes for an error
578*
579 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
580 $ ierr, 1, -1, 0 )
581*
582 IF( ierr( 1 ).GT.0 ) THEN
583 IF( iam.EQ.0 )
584 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
585 kskip = kskip + 1
586 GO TO 15
587 END IF
588*
589 myrhs_size = numroc( n, nb, mycol, 0, npcol )
590*
591* Generate RHS
592*
593 CALL pcmatgen(ictxtb, 'No', 'No',
594 $ descb2d( m_ ), descb2d( n_ ),
595 $ descb2d( mb_ ), descb2d( nb_ ),
596 $ mem( ipb ),
597 $ descb2d( lld_ ), descb2d( rsrc_ ),
598 $ descb2d( csrc_ ),
599 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
600 $ myrow, npcol, nprow )
601*
602 IF( check ) THEN
603 CALL pcfillpad( ictxtb, nb, nrhs,
604 $ mem( ipb-iprepad ),
605 $ descb2d( lld_ ),
606 $ iprepad, ipostpad,
607 $ padval )
608 CALL pcfillpad( ictxt, worksiz, 1,
609 $ mem( ip_driver_w-iprepad ),
610 $ worksiz, iprepad,
611 $ ipostpad, padval )
612 END IF
613*
614*
615 CALL blacs_barrier( ictxt, 'All')
616 CALL sltimer( 2 )
617*
618* Solve linear system via factorization
619*
620 CALL pcdttrs( trans, n, nrhs,
621 $ mem( ipa+2*( nb+10 ) ),
622 $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
623 $ 1, desca, mem( ipb ), 1, descb,
624 $ mem( ip_fillin ), fillin_size,
625 $ mem( ipw_solve ), ipw_solve_size,
626 $ info )
627*
628 CALL sltimer( 2 )
629*
630 IF( info.NE.0 ) THEN
631 IF( iam.EQ.0 )
632 $ WRITE( nout, fmt = * ) 'PCDTTRS INFO=', info
633 kfail = kfail + 1
634 passed = 'FAILED'
635 GO TO 20
636 END IF
637*
638 IF( check ) THEN
639*
640* check for memory overwrite
641*
642 CALL pcchekpad( ictxt, 'PCDTTRS-work',
643 $ worksiz, 1,
644 $ mem( ip_driver_w-iprepad ),
645 $ worksiz, iprepad,
646 $ ipostpad, padval )
647*
648* check the solution to rhs
649*
650 sresid = zero
651*
652* Reset descriptor describing A to 1-by-P grid for
653* use in banded utility routines
654*
655 CALL descinit( desca2d, (3), n,
656 $ (3), nb, 0, 0,
657 $ ictxt, (3), ierr( 1 ) )
658 CALL pcdtlaschk( 'N', 'D', trans,
659 $ n, bwl, bwu, nrhs,
660 $ mem( ipb ), 1, 1, descb2d,
661 $ iaseed, mem( ipa ), 1, 1, desca2d,
662 $ ibseed, anorm, sresid,
663 $ mem( ip_driver_w ), worksiz )
664*
665 IF( iam.EQ.0 ) THEN
666 IF( sresid.GT.thresh )
667 $ WRITE( nout, fmt = 9985 ) sresid
668 END IF
669*
670* The second test is a NaN trap
671*
672 IF( ( sresid.LE.thresh ).AND.
673 $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
674 kpass = kpass + 1
675 passed = 'PASSED'
676 ELSE
677 kfail = kfail + 1
678 passed = 'FAILED'
679 END IF
680*
681 END IF
682*
683 15 CONTINUE
684* Skipped tests jump to here to print out "SKIPPED"
685*
686* Gather maximum of all CPU and WALL clock timings
687*
688 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
689 $ wtime )
690 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
691 $ ctime )
692*
693* Print results
694*
695 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
696*
697 nops = 0
698 nops2 = 0
699*
700 n_first = nb
701 nprocs_real = ( n-1 )/nb + 1
702 n_last = mod( n-1, nb ) + 1
703*
704* 2 N bwl INT_ONE + N (bwl) flops
705* for LU factorization
706*
707 nops = 2*(dble(n)*dble(bwl)*
708 $ dble(int_one)) +
709 $ (dble(n)*dble(bwl))
710*
711* nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
712*
713 nops = nops +
714 $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
715 $ *dble(nrhs))
716*
717* Multiply by 4 to get complex count
718*
719 nops = nops * dble(4)
720*
721* Second calc to represent actual hardware speed
722*
723* 2*N_FIRST bwl*bwu Flops for LU
724* factorization in proc 1
725*
726 nops2 = 2*( (dble(n_first)*
727 $ dble(bwl)*dble(bwu)))
728*
729 IF ( nprocs_real .GT. 1) THEN
730* 8 N_LAST bwl*INT_ONE
731* flops for LU and spike
732* calc in last processor
733*
734 nops2 = nops2 +
735 $ 8*( (dble(n_last)*dble(bwl)
736 $ *dble(int_one)) )
737 ENDIF
738*
739 IF ( nprocs_real .GT. 2) THEN
740* 8 NB bwl*INT_ONE flops for LU and spike
741* calc in other processors
742*
743 nops2 = nops2 + (nprocs_real-2)*
744 $ 8*( (dble(nb)*dble(bwl)
745 $ *dble(int_one)) )
746 ENDIF
747*
748* Reduced system
749*
750 nops2 = nops2 +
751 $ 2*( nprocs_real-1 ) *
752 $ ( bwl*int_one*bwl/3 )
753 IF( nprocs_real .GT. 1 ) THEN
754 nops2 = nops2 +
755 $ 2*( nprocs_real-2 ) *
756 $ (2*bwl*int_one*bwl)
757 ENDIF
758*
759* Solve stage
760*
761* nrhs*2 n_first*
762* (bwl+INT_ONE)
763* flops for L,U solve in proc 1.
764*
765 nops2 = nops2 +
766 $ 2*
767 $ dble(n_first)*
768 $ dble(nrhs) *
769 $ ( dble(bwl)+dble(int_one))
770*
771 IF ( nprocs_real .GT. 1 ) THEN
772*
773* 2*nrhs*2 n_last
774* (bwl+INT_ONE)
775* flops for LU solve in other procs
776*
777 nops2 = nops2 +
778 $ 4*
779 $ (dble(n_last)*(dble(bwl)+
780 $ dble(int_one)))*dble(nrhs)
781 ENDIF
782*
783 IF ( nprocs_real .GT. 2 ) THEN
784*
785* 2*nrhs*2 NB
786* (bwl+INT_ONE)
787* flops for LU solve in other procs
788*
789 nops2 = nops2 +
790 $ ( nprocs_real-2)*2*
791 $ ( (dble(nb)*(dble(bwl)+
792 $ dble(int_one)))*dble(nrhs) )
793 ENDIF
794*
795* Reduced system
796*
797 nops2 = nops2 +
798 $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
799 IF( nprocs_real .GT. 1 ) THEN
800 nops2 = nops2 +
801 $ nrhs*( nprocs_real-2 ) *
802 $ ( 6 * bwl*int_one )
803 ENDIF
804*
805*
806* Multiply by 4 to get complex count
807*
808 nops2 = nops2 * dble(4)
809*
810* Calculate total megaflops - factorization and/or
811* solve -- for WALL and CPU time, and print output
812*
813* Print WALL time if machine supports it
814*
815 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
816 tmflops = nops /
817 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
818 ELSE
819 tmflops = 0.0d+0
820 END IF
821*
822 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
823 tmflops2 = nops2 /
824 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
825 ELSE
826 tmflops2 = 0.0d+0
827 END IF
828*
829 IF( wtime( 2 ).GE.0.0d+0 )
830 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
831 $ n,
832 $ bwl, bwu,
833 $ nb, nrhs, nprow, npcol,
834 $ wtime( 1 ), wtime( 2 ), tmflops,
835 $ tmflops2, passed
836*
837* Print CPU time if machine supports it
838*
839 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
840 tmflops = nops /
841 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
842 ELSE
843 tmflops = 0.0d+0
844 END IF
845*
846 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
847 tmflops2 = nops2 /
848 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
849 ELSE
850 tmflops2 = 0.0d+0
851 END IF
852*
853 IF( ctime( 2 ).GE.0.0d+0 )
854 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
855 $ n,
856 $ bwl, bwu,
857 $ nb, nrhs, nprow, npcol,
858 $ ctime( 1 ), ctime( 2 ), tmflops,
859 $ tmflops2, passed
860*
861 END IF
862 20 CONTINUE
863*
864*
865 30 CONTINUE
866* NNB loop
867*
868 45 CONTINUE
869* BW[] loop
870*
871 40 CONTINUE
872* NMAT loop
873*
874 CALL blacs_gridexit( ictxt )
875 CALL blacs_gridexit( ictxtb )
876*
877 50 CONTINUE
878* NGRIDS DROPOUT
879 60 CONTINUE
880* NGRIDS loop
881*
882* Print ending messages and close output file
883*
884 IF( iam.EQ.0 ) THEN
885 ktests = kpass + kfail + kskip
886 WRITE( nout, fmt = * )
887 WRITE( nout, fmt = 9992 ) ktests
888 IF( check ) THEN
889 WRITE( nout, fmt = 9991 ) kpass
890 WRITE( nout, fmt = 9989 ) kfail
891 ELSE
892 WRITE( nout, fmt = 9990 ) kpass
893 END IF
894 WRITE( nout, fmt = 9988 ) kskip
895 WRITE( nout, fmt = * )
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = 9987 )
898 IF( nout.NE.6 .AND. nout.NE.0 )
899 $ CLOSE ( nout )
900 END IF
901*
902 CALL blacs_exit( 0 )
903*
904 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
905 $ '; It should be at least 1' )
906 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
907 $ i4 )
908 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
909 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
910 $ i11 )
911 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
912 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
913 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
914 $ '-------- -------- -------- ------' )
915 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
916 $ 1x,i4,1x,i4,1x,f9.3,
917 $ f9.4, f9.2, f9.2, 1x, a6 )
918 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
919 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
920 9990 FORMAT( i5, ' tests completed without checking.' )
921 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
922 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
923 9987 FORMAT( 'END OF TESTS.' )
924 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
925 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
926*
927 stop
928*
929* End of PCDTTRS_DRIVER
930*
931 END
932*
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pcbmatgen.f:5
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
program pcdtdriver
Definition pcdtdriver.f:1
subroutine pcdtinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pcdtinfo.f:6
subroutine pcdtlaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pcdtlaschk.f:4
subroutine pcdttrf(n, dl, d, du, ja, desca, af, laf, work, lwork, info)
Definition pcdttrf.f:3
subroutine pcdttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pcdttrs.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724