SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzdtdriver.f
Go to the documentation of this file.
1 PROGRAM pzdtdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PZDTDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by ZDT. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* ZPLXSZ INTEGER, default = 16 bytes.
56* INTGSZ and ZPLXSZ indicate the length in bytes on the
57* given platform for an integer and a double precision
58* complex.
59* MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
60* All arrays used by ScaLAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPB,
62* for example, is a pointer to the starting element of MEM for
63* the solution vector(s) B.
64*
65* =====================================================================
66*
67* Code Developer: Andrew J. Cleary, University of Tennessee.
68* Current address: Lawrence Livermore National Labs.
69* This version released: August, 2001.
70*
71* =====================================================================
72*
73* .. Parameters ..
74 INTEGER totmem
75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81*
82 DOUBLE PRECISION zero
83 INTEGER memsiz, ntests, zplxsz
84 COMPLEX*16 padval
85 parameter( zplxsz = 16,
86 $ memsiz = totmem / zplxsz, ntests = 20,
87 $ padval = ( -9923.0d+0, -9923.0d+0 ),
88 $ zero = 0.0d+0 )
89 INTEGER int_one
90 parameter( int_one = 1 )
91* ..
92* .. Local Scalars ..
93 LOGICAL check
94 CHARACTER trans
95 CHARACTER*6 passed
96 CHARACTER*80 outfile
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL thresh
107 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
108 $ tmflops2
109* ..
110* .. Local Arrays ..
111 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114 $ nrval( ntests ), nval( ntests ),
115 $ pval( ntests ), qval( ntests )
116 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 COMPLEX*16 mem( memsiz )
118* ..
119* .. External Subroutines ..
120 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
122 $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
126* ..
127* .. External Functions ..
128 INTEGER numroc
129 LOGICAL lsame
130 DOUBLE PRECISION pzlange
131 EXTERNAL lsame, numroc, pzlange
132* ..
133* .. Intrinsic Functions ..
134 INTRINSIC dble, max, min, mod
135* ..
136* .. Data Statements ..
137 DATA kfail, kpass, kskip, ktests / 4*0 /
138* ..
139*
140*
141*
142* .. Executable Statements ..
143*
144* Get starting information
145*
146 CALL blacs_pinfo( iam, nprocs )
147 iaseed = 100
148 ibseed = 200
149*
150 CALL pzdtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154*
155 check = ( thresh.GE.0.0d+0 )
156*
157* Print headings
158*
159 IF( iam.EQ.0 ) THEN
160 WRITE( nout, fmt = * )
161 WRITE( nout, fmt = 9995 )
162 WRITE( nout, fmt = 9994 )
163 WRITE( nout, fmt = * )
164 END IF
165*
166* Loop over different process grids
167*
168 DO 60 i = 1, ngrids
169*
170 nprow = pval( i )
171 npcol = qval( i )
172*
173* Make sure grid information is correct
174*
175 ierr( 1 ) = 0
176 IF( nprow.LT.1 ) THEN
177 IF( iam.EQ.0 )
178 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
179 ierr( 1 ) = 1
180 ELSE IF( npcol.LT.1 ) THEN
181 IF( iam.EQ.0 )
182 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
183 ierr( 1 ) = 1
184 ELSE IF( nprow*npcol.GT.nprocs ) THEN
185 IF( iam.EQ.0 )
186 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 ierr( 1 ) = 1
188 END IF
189*
190 IF( ierr( 1 ).GT.0 ) THEN
191 IF( iam.EQ.0 )
192 $ WRITE( nout, fmt = 9997 ) 'grid'
193 kskip = kskip + 1
194 GO TO 50
195 END IF
196*
197* Define process grid
198*
199 CALL blacs_get( -1, 0, ictxt )
200 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
201*
202*
203* Define transpose process grid
204*
205 CALL blacs_get( -1, 0, ictxtb )
206 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
207*
208* Go to bottom of process grid loop if this case doesn't use my
209* process
210*
211 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212*
213 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
214 GO TO 50
215 ENDIF
216*
217 DO 40 j = 1, nmat
218*
219 ierr( 1 ) = 0
220*
221 n = nval( j )
222*
223* Make sure matrix information is correct
224*
225 IF( n.LT.1 ) THEN
226 IF( iam.EQ.0 )
227 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
228 ierr( 1 ) = 1
229 END IF
230*
231* Check all processes for an error
232*
233 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
234 $ -1, 0 )
235*
236 IF( ierr( 1 ).GT.0 ) THEN
237 IF( iam.EQ.0 )
238 $ WRITE( nout, fmt = 9997 ) 'size'
239 kskip = kskip + 1
240 GO TO 40
241 END IF
242*
243*
244 DO 45 bw_num = 1, nbw
245*
246 ierr( 1 ) = 0
247*
248 bwl = 1
249 IF( bwl.LT.1 ) THEN
250 IF( iam.EQ.0 )
251 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
252 ierr( 1 ) = 1
253 END IF
254*
255 bwu = 1
256 IF( bwu.LT.1 ) THEN
257 IF( iam.EQ.0 )
258 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
259 ierr( 1 ) = 1
260 END IF
261*
262 IF( bwl.GT.n-1 ) THEN
263 IF( iam.EQ.0 ) THEN
264 ierr( 1 ) = 1
265 ENDIF
266 END IF
267*
268 IF( bwu.GT.n-1 ) THEN
269 IF( iam.EQ.0 ) THEN
270 ierr( 1 ) = 1
271 ENDIF
272 END IF
273*
274* Check all processes for an error
275*
276 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
277 $ -1, 0 )
278*
279 IF( ierr( 1 ).GT.0 ) THEN
280 kskip = kskip + 1
281 GO TO 45
282 END IF
283*
284 DO 30 k = 1, nnb
285*
286 ierr( 1 ) = 0
287*
288 nb = nbval( k )
289 IF( nb.LT.0 ) THEN
290 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
291 $ + int_one
292 nb = max( nb, 2*int_one )
293 nb = min( n, nb )
294 END IF
295*
296* Make sure NB is legal
297*
298 ierr( 1 ) = 0
299 IF( nb.LT.min( 2*int_one, n ) ) THEN
300 ierr( 1 ) = 1
301 END IF
302*
303* Check all processes for an error
304*
305 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
306 $ -1, 0 )
307*
308 IF( ierr( 1 ).GT.0 ) THEN
309 kskip = kskip + 1
310 GO TO 30
311 END IF
312*
313* Padding constants
314*
315 np = numroc( (3), (3),
316 $ myrow, 0, nprow )
317 nq = numroc( n, nb, mycol, 0, npcol )
318*
319 IF( check ) THEN
320 iprepad = ((3)+10)
321 imidpad = 10
322 ipostpad = ((3)+10)
323 ELSE
324 iprepad = 0
325 imidpad = 0
326 ipostpad = 0
327 END IF
328*
329* Initialize the array descriptor for the matrix A
330*
331 CALL descinit( desca2d, n, (3),
332 $ nb, 1, 0, 0,
333 $ ictxtb, nb+10, ierr( 1 ) )
334*
335* Convert this to 1D descriptor
336*
337 desca( 1 ) = 501
338 desca( 3 ) = n
339 desca( 4 ) = nb
340 desca( 5 ) = 0
341 desca( 2 ) = ictxt
342 desca( 6 ) = ((3)+10)
343 desca( 7 ) = 0
344*
345 ierr_temp = ierr( 1 )
346 ierr( 1 ) = 0
347 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
348*
349* Check all processes for an error
350*
351 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
352*
353 IF( ierr( 1 ).LT.0 ) THEN
354 IF( iam.EQ.0 )
355 $ WRITE( nout, fmt = 9997 ) 'descriptor'
356 kskip = kskip + 1
357 GO TO 30
358 END IF
359*
360* Assign pointers into MEM for SCALAPACK arrays, A is
361* allocated starting at position MEM( IPREPAD+1 )
362*
363 free_ptr = 1
364 ipb = 0
365*
366* Save room for prepadding
367 free_ptr = free_ptr + iprepad
368*
369 ipa = free_ptr
370 free_ptr = free_ptr + (nb+10)*(3)
371 $ + ipostpad
372*
373* Add memory for fillin
374* Fillin space needs to store:
375* Fillin spike:
376* Contribution to previous proc's diagonal block of
377* reduced system:
378* Off-diagonal block of reduced system:
379* Diagonal block of reduced system:
380*
381 fillin_size =
382 $ (12*npcol+3*nb)
383*
384* Claim memory for fillin
385*
386 free_ptr = free_ptr + iprepad
387 ip_fillin = free_ptr
388 free_ptr = free_ptr + fillin_size
389*
390* Workspace needed by computational routines:
391*
392 ipw_size = 0
393*
394* factorization:
395*
396 ipw_size = 8*npcol
397*
398* Claim memory for IPW
399*
400 ipw = free_ptr
401 free_ptr = free_ptr + ipw_size
402*
403* Check for adequate memory for problem size
404*
405 ierr( 1 ) = 0
406 IF( free_ptr.GT.memsiz ) THEN
407 IF( iam.EQ.0 )
408 $ WRITE( nout, fmt = 9996 )
409 $ 'divide and conquer factorization',
410 $ (free_ptr )*zplxsz
411 ierr( 1 ) = 1
412 END IF
413*
414* Check all processes for an error
415*
416 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
417 $ 1, -1, 0 )
418*
419 IF( ierr( 1 ).GT.0 ) THEN
420 IF( iam.EQ.0 )
421 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
422 kskip = kskip + 1
423 GO TO 30
424 END IF
425*
426* Worksize needed for LAPRNT
427 worksiz = max( ((3)+10), nb )
428*
429 IF( check ) THEN
430*
431* Calculate the amount of workspace required by
432* the checking routines.
433*
434* PZLANGE
435 worksiz = max( worksiz, desca2d( nb_ ) )
436*
437* PZDTLASCHK
438 worksiz = max( worksiz,
439 $ max(5,nb)+2*nb )
440 END IF
441*
442 free_ptr = free_ptr + iprepad
443 ip_driver_w = free_ptr
444 free_ptr = free_ptr + worksiz + ipostpad
445*
446*
447* Check for adequate memory for problem size
448*
449 ierr( 1 ) = 0
450 IF( free_ptr.GT.memsiz ) THEN
451 IF( iam.EQ.0 )
452 $ WRITE( nout, fmt = 9996 ) 'factorization',
453 $ ( free_ptr )*zplxsz
454 ierr( 1 ) = 1
455 END IF
456*
457* Check all processes for an error
458*
459 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
460 $ 1, -1, 0 )
461*
462 IF( ierr( 1 ).GT.0 ) THEN
463 IF( iam.EQ.0 )
464 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
465 kskip = kskip + 1
466 GO TO 30
467 END IF
468*
469 CALL pzbmatgen( ictxt, 'T', 'D', bwl, bwu, n, (3), nb,
470 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
471 $ mycol, nprow, npcol )
472 CALL pzfillpad( ictxt, nq, np, mem( ipa-iprepad ),
473 $ nb+10, iprepad, ipostpad,
474 $ padval )
475*
476 CALL pzfillpad( ictxt, worksiz, 1,
477 $ mem( ip_driver_w-iprepad ), worksiz,
478 $ iprepad, ipostpad, padval )
479*
480* Calculate norm of A for residual error-checking
481*
482 IF( check ) THEN
483*
484 anorm = pzlange( 'I', n,
485 $ (3), mem( ipa ), 1, 1,
486 $ desca2d, mem( ip_driver_w ) )
487 CALL pzchekpad( ictxt, 'PZLANGE', nq, np,
488 $ mem( ipa-iprepad ), nb+10,
489 $ iprepad, ipostpad, padval )
490 CALL pzchekpad( ictxt, 'PZLANGE',
491 $ worksiz, 1,
492 $ mem( ip_driver_w-iprepad ), worksiz,
493 $ iprepad, ipostpad, padval )
494 END IF
495*
496*
497 CALL slboot()
498 CALL blacs_barrier( ictxt, 'All' )
499*
500* Perform factorization
501*
502 CALL sltimer( 1 )
503*
504 CALL pzdttrf( n, mem( ipa+2*( nb+10 ) ),
505 $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
506 $ desca, mem( ip_fillin ), fillin_size,
507 $ mem( ipw ), ipw_size, info )
508*
509 CALL sltimer( 1 )
510*
511 IF( info.NE.0 ) THEN
512 IF( iam.EQ.0 ) THEN
513 WRITE( nout, fmt = * ) 'PZDTTRF INFO=', info
514 ENDIF
515 kfail = kfail + 1
516 GO TO 30
517 END IF
518*
519 IF( check ) THEN
520*
521* Check for memory overwrite in factorization
522*
523 CALL pzchekpad( ictxt, 'PZDTTRF', nq,
524 $ np, mem( ipa-iprepad ), nb+10,
525 $ iprepad, ipostpad, padval )
526 END IF
527*
528*
529* Loop over the different values for NRHS
530*
531 DO 20 hh = 1, nnr
532*
533 ierr( 1 ) = 0
534*
535 nrhs = nrval( hh )
536*
537* Initialize Array Descriptor for rhs
538*
539 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
540 $ ictxtb, nb+10, ierr( 1 ) )
541*
542* Convert this to 1D descriptor
543*
544 descb( 1 ) = 502
545 descb( 3 ) = n
546 descb( 4 ) = nb
547 descb( 5 ) = 0
548 descb( 2 ) = ictxt
549 descb( 6 ) = descb2d( lld_ )
550 descb( 7 ) = 0
551*
552* reset free_ptr to reuse space for right hand sides
553*
554 IF( ipb .GT. 0 ) THEN
555 free_ptr = ipb
556 ENDIF
557*
558 free_ptr = free_ptr + iprepad
559 ipb = free_ptr
560 free_ptr = free_ptr + nrhs*descb2d( lld_ )
561 $ + ipostpad
562*
563* Allocate workspace for workspace in TRS routine:
564*
565 ipw_solve_size = 10*npcol+4*nrhs
566*
567 ipw_solve = free_ptr
568 free_ptr = free_ptr + ipw_solve_size
569*
570 ierr( 1 ) = 0
571 IF( free_ptr.GT.memsiz ) THEN
572 IF( iam.EQ.0 )
573 $ WRITE( nout, fmt = 9996 )'solve',
574 $ ( free_ptr )*zplxsz
575 ierr( 1 ) = 1
576 END IF
577*
578* Check all processes for an error
579*
580 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
581 $ ierr, 1, -1, 0 )
582*
583 IF( ierr( 1 ).GT.0 ) THEN
584 IF( iam.EQ.0 )
585 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
586 kskip = kskip + 1
587 GO TO 15
588 END IF
589*
590 myrhs_size = numroc( n, nb, mycol, 0, npcol )
591*
592* Generate RHS
593*
594 CALL pzmatgen(ictxtb, 'No', 'No',
595 $ descb2d( m_ ), descb2d( n_ ),
596 $ descb2d( mb_ ), descb2d( nb_ ),
597 $ mem( ipb ),
598 $ descb2d( lld_ ), descb2d( rsrc_ ),
599 $ descb2d( csrc_ ),
600 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
601 $ myrow, npcol, nprow )
602*
603 IF( check ) THEN
604 CALL pzfillpad( ictxtb, nb, nrhs,
605 $ mem( ipb-iprepad ),
606 $ descb2d( lld_ ),
607 $ iprepad, ipostpad,
608 $ padval )
609 CALL pzfillpad( ictxt, worksiz, 1,
610 $ mem( ip_driver_w-iprepad ),
611 $ worksiz, iprepad,
612 $ ipostpad, padval )
613 END IF
614*
615*
616 CALL blacs_barrier( ictxt, 'All')
617 CALL sltimer( 2 )
618*
619* Solve linear system via factorization
620*
621 CALL pzdttrs( trans, n, nrhs,
622 $ mem( ipa+2*( nb+10 ) ),
623 $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
624 $ 1, desca, mem( ipb ), 1, descb,
625 $ mem( ip_fillin ), fillin_size,
626 $ mem( ipw_solve ), ipw_solve_size,
627 $ info )
628*
629 CALL sltimer( 2 )
630*
631 IF( info.NE.0 ) THEN
632 IF( iam.EQ.0 )
633 $ WRITE( nout, fmt = * ) 'PZDTTRS INFO=', info
634 kfail = kfail + 1
635 passed = 'FAILED'
636 GO TO 20
637 END IF
638*
639 IF( check ) THEN
640*
641* check for memory overwrite
642*
643 CALL pzchekpad( ictxt, 'PZDTTRS-work',
644 $ worksiz, 1,
645 $ mem( ip_driver_w-iprepad ),
646 $ worksiz, iprepad,
647 $ ipostpad, padval )
648*
649* check the solution to rhs
650*
651 sresid = zero
652*
653* Reset descriptor describing A to 1-by-P grid for
654* use in banded utility routines
655*
656 CALL descinit( desca2d, (3), n,
657 $ (3), nb, 0, 0,
658 $ ictxt, (3), ierr( 1 ) )
659 CALL pzdtlaschk( 'N', 'D', trans,
660 $ n, bwl, bwu, nrhs,
661 $ mem( ipb ), 1, 1, descb2d,
662 $ iaseed, mem( ipa ), 1, 1, desca2d,
663 $ ibseed, anorm, sresid,
664 $ mem( ip_driver_w ), worksiz )
665*
666 IF( iam.EQ.0 ) THEN
667 IF( sresid.GT.thresh )
668 $ WRITE( nout, fmt = 9985 ) sresid
669 END IF
670*
671* The second test is a NaN trap
672*
673 IF( ( sresid.LE.thresh ).AND.
674 $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
675 kpass = kpass + 1
676 passed = 'PASSED'
677 ELSE
678 kfail = kfail + 1
679 passed = 'FAILED'
680 END IF
681*
682 END IF
683*
684 15 CONTINUE
685* Skipped tests jump to here to print out "SKIPPED"
686*
687* Gather maximum of all CPU and WALL clock timings
688*
689 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
690 $ wtime )
691 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
692 $ ctime )
693*
694* Print results
695*
696 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
697*
698 nops = 0
699 nops2 = 0
700*
701 n_first = nb
702 nprocs_real = ( n-1 )/nb + 1
703 n_last = mod( n-1, nb ) + 1
704*
705* 2 N bwl INT_ONE + N (bwl) flops
706* for LU factorization
707*
708 nops = 2*(dble(n)*dble(bwl)*
709 $ dble(int_one)) +
710 $ (dble(n)*dble(bwl))
711*
712* nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
713*
714 nops = nops +
715 $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
716 $ *dble(nrhs))
717*
718* Multiply by 4 to get complex count
719*
720 nops = nops * dble(4)
721*
722* Second calc to represent actual hardware speed
723*
724* 2*N_FIRST bwl*bwu Flops for LU
725* factorization in proc 1
726*
727 nops2 = 2*( (dble(n_first)*
728 $ dble(bwl)*dble(bwu)))
729*
730 IF ( nprocs_real .GT. 1) THEN
731* 8 N_LAST bwl*INT_ONE
732* flops for LU and spike
733* calc in last processor
734*
735 nops2 = nops2 +
736 $ 8*( (dble(n_last)*dble(bwl)
737 $ *dble(int_one)) )
738 ENDIF
739*
740 IF ( nprocs_real .GT. 2) THEN
741* 8 NB bwl*INT_ONE flops for LU and spike
742* calc in other processors
743*
744 nops2 = nops2 + (nprocs_real-2)*
745 $ 8*( (dble(nb)*dble(bwl)
746 $ *dble(int_one)) )
747 ENDIF
748*
749* Reduced system
750*
751 nops2 = nops2 +
752 $ 2*( nprocs_real-1 ) *
753 $ ( bwl*int_one*bwl/3 )
754 IF( nprocs_real .GT. 1 ) THEN
755 nops2 = nops2 +
756 $ 2*( nprocs_real-2 ) *
757 $ (2*bwl*int_one*bwl)
758 ENDIF
759*
760* Solve stage
761*
762* nrhs*2 n_first*
763* (bwl+INT_ONE)
764* flops for L,U solve in proc 1.
765*
766 nops2 = nops2 +
767 $ 2*
768 $ dble(n_first)*
769 $ dble(nrhs) *
770 $ ( dble(bwl)+dble(int_one))
771*
772 IF ( nprocs_real .GT. 1 ) THEN
773*
774* 2*nrhs*2 n_last
775* (bwl+INT_ONE)
776* flops for LU solve in other procs
777*
778 nops2 = nops2 +
779 $ 4*
780 $ (dble(n_last)*(dble(bwl)+
781 $ dble(int_one)))*dble(nrhs)
782 ENDIF
783*
784 IF ( nprocs_real .GT. 2 ) THEN
785*
786* 2*nrhs*2 NB
787* (bwl+INT_ONE)
788* flops for LU solve in other procs
789*
790 nops2 = nops2 +
791 $ ( nprocs_real-2)*2*
792 $ ( (dble(nb)*(dble(bwl)+
793 $ dble(int_one)))*dble(nrhs) )
794 ENDIF
795*
796* Reduced system
797*
798 nops2 = nops2 +
799 $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
800 IF( nprocs_real .GT. 1 ) THEN
801 nops2 = nops2 +
802 $ nrhs*( nprocs_real-2 ) *
803 $ ( 6 * bwl*int_one )
804 ENDIF
805*
806*
807* Multiply by 4 to get complex count
808*
809 nops2 = nops2 * dble(4)
810*
811* Calculate total megaflops - factorization and/or
812* solve -- for WALL and CPU time, and print output
813*
814* Print WALL time if machine supports it
815*
816 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
817 tmflops = nops /
818 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
819 ELSE
820 tmflops = 0.0d+0
821 END IF
822*
823 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
824 tmflops2 = nops2 /
825 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
826 ELSE
827 tmflops2 = 0.0d+0
828 END IF
829*
830 IF( wtime( 2 ).GE.0.0d+0 )
831 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
832 $ n,
833 $ bwl, bwu,
834 $ nb, nrhs, nprow, npcol,
835 $ wtime( 1 ), wtime( 2 ), tmflops,
836 $ tmflops2, passed
837*
838* Print CPU time if machine supports it
839*
840 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
841 tmflops = nops /
842 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
843 ELSE
844 tmflops = 0.0d+0
845 END IF
846*
847 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
848 tmflops2 = nops2 /
849 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
850 ELSE
851 tmflops2 = 0.0d+0
852 END IF
853*
854 IF( ctime( 2 ).GE.0.0d+0 )
855 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
856 $ n,
857 $ bwl, bwu,
858 $ nb, nrhs, nprow, npcol,
859 $ ctime( 1 ), ctime( 2 ), tmflops,
860 $ tmflops2, passed
861*
862 END IF
863 20 CONTINUE
864*
865*
866 30 CONTINUE
867* NNB loop
868*
869 45 CONTINUE
870* BW[] loop
871*
872 40 CONTINUE
873* NMAT loop
874*
875 CALL blacs_gridexit( ictxt )
876 CALL blacs_gridexit( ictxtb )
877*
878 50 CONTINUE
879* NGRIDS DROPOUT
880 60 CONTINUE
881* NGRIDS loop
882*
883* Print ending messages and close output file
884*
885 IF( iam.EQ.0 ) THEN
886 ktests = kpass + kfail + kskip
887 WRITE( nout, fmt = * )
888 WRITE( nout, fmt = 9992 ) ktests
889 IF( check ) THEN
890 WRITE( nout, fmt = 9991 ) kpass
891 WRITE( nout, fmt = 9989 ) kfail
892 ELSE
893 WRITE( nout, fmt = 9990 ) kpass
894 END IF
895 WRITE( nout, fmt = 9988 ) kskip
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = * )
898 WRITE( nout, fmt = 9987 )
899 IF( nout.NE.6 .AND. nout.NE.0 )
900 $ CLOSE ( nout )
901 END IF
902*
903 CALL blacs_exit( 0 )
904*
905 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
906 $ '; It should be at least 1' )
907 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
908 $ i4 )
909 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
910 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
911 $ i11 )
912 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
913 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
914 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
915 $ '-------- -------- -------- ------' )
916 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
917 $ 1x,i4,1x,i4,1x,f9.3,
918 $ f9.4, f9.2, f9.2, 1x, a6 )
919 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
920 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
921 9990 FORMAT( i5, ' tests completed without checking.' )
922 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
923 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
924 9987 FORMAT( 'END OF TESTS.' )
925 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
926 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
927*
928 stop
929*
930* End of PZDTTRS_DRIVER
931*
932 END
933*
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pzmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pzbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pzbmatgen.f:5
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pzchekpad.f:3
program pzdtdriver
Definition pzdtdriver.f:1
subroutine pzdtinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pzdtinfo.f:6
subroutine pzdtlaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pzdtlaschk.f:4
subroutine pzdttrf(n, dl, d, du, ja, desca, af, laf, work, lwork, info)
Definition pzdttrf.f:3
subroutine pzdttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzdttrs.f:3
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pzfillpad.f:2
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
Definition pzlange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724