ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzdtdriver.f
Go to the documentation of this file.
1  PROGRAM pzdtdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PZDTDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by ZDT. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * ZPLXSZ INTEGER, default = 16 bytes.
56 * INTGSZ and ZPLXSZ indicate the length in bytes on the
57 * given platform for an integer and a double precision
58 * complex.
59 * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
60 * All arrays used by ScaLAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPB,
62 * for example, is a pointer to the starting element of MEM for
63 * the solution vector(s) B.
64 *
65 * =====================================================================
66 *
67 * Code Developer: Andrew J. Cleary, University of Tennessee.
68 * Current address: Lawrence Livermore National Labs.
69 * This version released: August, 2001.
70 *
71 * =====================================================================
72 *
73 * .. Parameters ..
74  INTEGER totmem
75  parameter( totmem = 3000000 )
76  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77  $ lld_, mb_, m_, nb_, n_, rsrc_
78  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81 *
82  DOUBLE PRECISION zero
83  INTEGER memsiz, ntests, zplxsz
84  COMPLEX*16 padval
85  parameter( zplxsz = 16,
86  $ memsiz = totmem / zplxsz, ntests = 20,
87  $ padval = ( -9923.0d+0, -9923.0d+0 ),
88  $ zero = 0.0d+0 )
89  INTEGER int_one
90  parameter( int_one = 1 )
91 * ..
92 * .. Local Scalars ..
93  LOGICAL check
94  CHARACTER trans
95  CHARACTER*6 passed
96  CHARACTER*80 outfile
97  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100  $ iprepad, ipw, ipw_size, ipw_solve,
101  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104  $ nnr, nout, np, npcol, nprocs, nprocs_real,
105  $ nprow, nq, nrhs, n_first, n_last, worksiz
106  REAL thresh
107  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
108  $ tmflops2
109 * ..
110 * .. Local Arrays ..
111  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114  $ nrval( ntests ), nval( ntests ),
115  $ pval( ntests ), qval( ntests )
116  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117  COMPLEX*16 mem( memsiz )
118 * ..
119 * .. External Subroutines ..
120  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
122  $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
125  $ slcombine, sltimer
126 * ..
127 * .. External Functions ..
128  INTEGER numroc
129  LOGICAL lsame
130  DOUBLE PRECISION pzlange
131  EXTERNAL lsame, numroc, pzlange
132 * ..
133 * .. Intrinsic Functions ..
134  INTRINSIC dble, max, min, mod
135 * ..
136 * .. Data Statements ..
137  DATA kfail, kpass, kskip, ktests / 4*0 /
138 * ..
139 *
140 *
141 *
142 * .. Executable Statements ..
143 *
144 * Get starting information
145 *
146  CALL blacs_pinfo( iam, nprocs )
147  iaseed = 100
148  ibseed = 200
149 *
150  CALL pzdtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154 *
155  check = ( thresh.GE.0.0d+0 )
156 *
157 * Print headings
158 *
159  IF( iam.EQ.0 ) THEN
160  WRITE( nout, fmt = * )
161  WRITE( nout, fmt = 9995 )
162  WRITE( nout, fmt = 9994 )
163  WRITE( nout, fmt = * )
164  END IF
165 *
166 * Loop over different process grids
167 *
168  DO 60 i = 1, ngrids
169 *
170  nprow = pval( i )
171  npcol = qval( i )
172 *
173 * Make sure grid information is correct
174 *
175  ierr( 1 ) = 0
176  IF( nprow.LT.1 ) THEN
177  IF( iam.EQ.0 )
178  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
179  ierr( 1 ) = 1
180  ELSE IF( npcol.LT.1 ) THEN
181  IF( iam.EQ.0 )
182  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
183  ierr( 1 ) = 1
184  ELSE IF( nprow*npcol.GT.nprocs ) THEN
185  IF( iam.EQ.0 )
186  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187  ierr( 1 ) = 1
188  END IF
189 *
190  IF( ierr( 1 ).GT.0 ) THEN
191  IF( iam.EQ.0 )
192  $ WRITE( nout, fmt = 9997 ) 'grid'
193  kskip = kskip + 1
194  GO TO 50
195  END IF
196 *
197 * Define process grid
198 *
199  CALL blacs_get( -1, 0, ictxt )
200  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
201 *
202 *
203 * Define transpose process grid
204 *
205  CALL blacs_get( -1, 0, ictxtb )
206  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
207 *
208 * Go to bottom of process grid loop if this case doesn't use my
209 * process
210 *
211  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 *
213  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
214  GO TO 50
215  ENDIF
216 *
217  DO 40 j = 1, nmat
218 *
219  ierr( 1 ) = 0
220 *
221  n = nval( j )
222 *
223 * Make sure matrix information is correct
224 *
225  IF( n.LT.1 ) THEN
226  IF( iam.EQ.0 )
227  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
228  ierr( 1 ) = 1
229  END IF
230 *
231 * Check all processes for an error
232 *
233  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
234  $ -1, 0 )
235 *
236  IF( ierr( 1 ).GT.0 ) THEN
237  IF( iam.EQ.0 )
238  $ WRITE( nout, fmt = 9997 ) 'size'
239  kskip = kskip + 1
240  GO TO 40
241  END IF
242 *
243 *
244  DO 45 bw_num = 1, nbw
245 *
246  ierr( 1 ) = 0
247 *
248  bwl = 1
249  IF( bwl.LT.1 ) THEN
250  IF( iam.EQ.0 )
251  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
252  ierr( 1 ) = 1
253  END IF
254 *
255  bwu = 1
256  IF( bwu.LT.1 ) THEN
257  IF( iam.EQ.0 )
258  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
259  ierr( 1 ) = 1
260  END IF
261 *
262  IF( bwl.GT.n-1 ) THEN
263  IF( iam.EQ.0 ) THEN
264  ierr( 1 ) = 1
265  ENDIF
266  END IF
267 *
268  IF( bwu.GT.n-1 ) THEN
269  IF( iam.EQ.0 ) THEN
270  ierr( 1 ) = 1
271  ENDIF
272  END IF
273 *
274 * Check all processes for an error
275 *
276  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
277  $ -1, 0 )
278 *
279  IF( ierr( 1 ).GT.0 ) THEN
280  kskip = kskip + 1
281  GO TO 45
282  END IF
283 *
284  DO 30 k = 1, nnb
285 *
286  ierr( 1 ) = 0
287 *
288  nb = nbval( k )
289  IF( nb.LT.0 ) THEN
290  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
291  $ + int_one
292  nb = max( nb, 2*int_one )
293  nb = min( n, nb )
294  END IF
295 *
296 * Make sure NB is legal
297 *
298  ierr( 1 ) = 0
299  IF( nb.LT.min( 2*int_one, n ) ) THEN
300  ierr( 1 ) = 1
301  END IF
302 *
303 * Check all processes for an error
304 *
305  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
306  $ -1, 0 )
307 *
308  IF( ierr( 1 ).GT.0 ) THEN
309  kskip = kskip + 1
310  GO TO 30
311  END IF
312 *
313 * Padding constants
314 *
315  np = numroc( (3), (3),
316  $ myrow, 0, nprow )
317  nq = numroc( n, nb, mycol, 0, npcol )
318 *
319  IF( check ) THEN
320  iprepad = ((3)+10)
321  imidpad = 10
322  ipostpad = ((3)+10)
323  ELSE
324  iprepad = 0
325  imidpad = 0
326  ipostpad = 0
327  END IF
328 *
329 * Initialize the array descriptor for the matrix A
330 *
331  CALL descinit( desca2d, n, (3),
332  $ nb, 1, 0, 0,
333  $ ictxtb, nb+10, ierr( 1 ) )
334 *
335 * Convert this to 1D descriptor
336 *
337  desca( 1 ) = 501
338  desca( 3 ) = n
339  desca( 4 ) = nb
340  desca( 5 ) = 0
341  desca( 2 ) = ictxt
342  desca( 6 ) = ((3)+10)
343  desca( 7 ) = 0
344 *
345  ierr_temp = ierr( 1 )
346  ierr( 1 ) = 0
347  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
348 *
349 * Check all processes for an error
350 *
351  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
352 *
353  IF( ierr( 1 ).LT.0 ) THEN
354  IF( iam.EQ.0 )
355  $ WRITE( nout, fmt = 9997 ) 'descriptor'
356  kskip = kskip + 1
357  GO TO 30
358  END IF
359 *
360 * Assign pointers into MEM for SCALAPACK arrays, A is
361 * allocated starting at position MEM( IPREPAD+1 )
362 *
363  free_ptr = 1
364  ipb = 0
365 *
366 * Save room for prepadding
367  free_ptr = free_ptr + iprepad
368 *
369  ipa = free_ptr
370  free_ptr = free_ptr + (nb+10)*(3)
371  $ + ipostpad
372 *
373 * Add memory for fillin
374 * Fillin space needs to store:
375 * Fillin spike:
376 * Contribution to previous proc's diagonal block of
377 * reduced system:
378 * Off-diagonal block of reduced system:
379 * Diagonal block of reduced system:
380 *
381  fillin_size =
382  $ (12*npcol+3*nb)
383 *
384 * Claim memory for fillin
385 *
386  free_ptr = free_ptr + iprepad
387  ip_fillin = free_ptr
388  free_ptr = free_ptr + fillin_size
389 *
390 * Workspace needed by computational routines:
391 *
392  ipw_size = 0
393 *
394 * factorization:
395 *
396  ipw_size = 8*npcol
397 *
398 * Claim memory for IPW
399 *
400  ipw = free_ptr
401  free_ptr = free_ptr + ipw_size
402 *
403 * Check for adequate memory for problem size
404 *
405  ierr( 1 ) = 0
406  IF( free_ptr.GT.memsiz ) THEN
407  IF( iam.EQ.0 )
408  $ WRITE( nout, fmt = 9996 )
409  $ 'divide and conquer factorization',
410  $ (free_ptr )*zplxsz
411  ierr( 1 ) = 1
412  END IF
413 *
414 * Check all processes for an error
415 *
416  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
417  $ 1, -1, 0 )
418 *
419  IF( ierr( 1 ).GT.0 ) THEN
420  IF( iam.EQ.0 )
421  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
422  kskip = kskip + 1
423  GO TO 30
424  END IF
425 *
426 * Worksize needed for LAPRNT
427  worksiz = max( ((3)+10), nb )
428 *
429  IF( check ) THEN
430 *
431 * Calculate the amount of workspace required by
432 * the checking routines.
433 *
434 * PZLANGE
435  worksiz = max( worksiz, desca2d( nb_ ) )
436 *
437 * PZDTLASCHK
438  worksiz = max( worksiz,
439  $ max(5,nb)+2*nb )
440  END IF
441 *
442  free_ptr = free_ptr + iprepad
443  ip_driver_w = free_ptr
444  free_ptr = free_ptr + worksiz + ipostpad
445 *
446 *
447 * Check for adequate memory for problem size
448 *
449  ierr( 1 ) = 0
450  IF( free_ptr.GT.memsiz ) THEN
451  IF( iam.EQ.0 )
452  $ WRITE( nout, fmt = 9996 ) 'factorization',
453  $ ( free_ptr )*zplxsz
454  ierr( 1 ) = 1
455  END IF
456 *
457 * Check all processes for an error
458 *
459  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
460  $ 1, -1, 0 )
461 *
462  IF( ierr( 1 ).GT.0 ) THEN
463  IF( iam.EQ.0 )
464  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
465  kskip = kskip + 1
466  GO TO 30
467  END IF
468 *
469  CALL pzbmatgen( ictxt, 'T', 'D', bwl, bwu, n, (3), nb,
470  $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
471  $ mycol, nprow, npcol )
472  CALL pzfillpad( ictxt, nq, np, mem( ipa-iprepad ),
473  $ nb+10, iprepad, ipostpad,
474  $ padval )
475 *
476  CALL pzfillpad( ictxt, worksiz, 1,
477  $ mem( ip_driver_w-iprepad ), worksiz,
478  $ iprepad, ipostpad, padval )
479 *
480 * Calculate norm of A for residual error-checking
481 *
482  IF( check ) THEN
483 *
484  anorm = pzlange( 'I', n,
485  $ (3), mem( ipa ), 1, 1,
486  $ desca2d, mem( ip_driver_w ) )
487  CALL pzchekpad( ictxt, 'PZLANGE', nq, np,
488  $ mem( ipa-iprepad ), nb+10,
489  $ iprepad, ipostpad, padval )
490  CALL pzchekpad( ictxt, 'PZLANGE',
491  $ worksiz, 1,
492  $ mem( ip_driver_w-iprepad ), worksiz,
493  $ iprepad, ipostpad, padval )
494  END IF
495 *
496 *
497  CALL slboot()
498  CALL blacs_barrier( ictxt, 'All' )
499 *
500 * Perform factorization
501 *
502  CALL sltimer( 1 )
503 *
504  CALL pzdttrf( n, mem( ipa+2*( nb+10 ) ),
505  $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
506  $ desca, mem( ip_fillin ), fillin_size,
507  $ mem( ipw ), ipw_size, info )
508 *
509  CALL sltimer( 1 )
510 *
511  IF( info.NE.0 ) THEN
512  IF( iam.EQ.0 ) THEN
513  WRITE( nout, fmt = * ) 'PZDTTRF INFO=', info
514  ENDIF
515  kfail = kfail + 1
516  GO TO 30
517  END IF
518 *
519  IF( check ) THEN
520 *
521 * Check for memory overwrite in factorization
522 *
523  CALL pzchekpad( ictxt, 'PZDTTRF', nq,
524  $ np, mem( ipa-iprepad ), nb+10,
525  $ iprepad, ipostpad, padval )
526  END IF
527 *
528 *
529 * Loop over the different values for NRHS
530 *
531  DO 20 hh = 1, nnr
532 *
533  ierr( 1 ) = 0
534 *
535  nrhs = nrval( hh )
536 *
537 * Initialize Array Descriptor for rhs
538 *
539  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
540  $ ictxtb, nb+10, ierr( 1 ) )
541 *
542 * Convert this to 1D descriptor
543 *
544  descb( 1 ) = 502
545  descb( 3 ) = n
546  descb( 4 ) = nb
547  descb( 5 ) = 0
548  descb( 2 ) = ictxt
549  descb( 6 ) = descb2d( lld_ )
550  descb( 7 ) = 0
551 *
552 * reset free_ptr to reuse space for right hand sides
553 *
554  IF( ipb .GT. 0 ) THEN
555  free_ptr = ipb
556  ENDIF
557 *
558  free_ptr = free_ptr + iprepad
559  ipb = free_ptr
560  free_ptr = free_ptr + nrhs*descb2d( lld_ )
561  $ + ipostpad
562 *
563 * Allocate workspace for workspace in TRS routine:
564 *
565  ipw_solve_size = 10*npcol+4*nrhs
566 *
567  ipw_solve = free_ptr
568  free_ptr = free_ptr + ipw_solve_size
569 *
570  ierr( 1 ) = 0
571  IF( free_ptr.GT.memsiz ) THEN
572  IF( iam.EQ.0 )
573  $ WRITE( nout, fmt = 9996 )'solve',
574  $ ( free_ptr )*zplxsz
575  ierr( 1 ) = 1
576  END IF
577 *
578 * Check all processes for an error
579 *
580  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
581  $ ierr, 1, -1, 0 )
582 *
583  IF( ierr( 1 ).GT.0 ) THEN
584  IF( iam.EQ.0 )
585  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
586  kskip = kskip + 1
587  GO TO 15
588  END IF
589 *
590  myrhs_size = numroc( n, nb, mycol, 0, npcol )
591 *
592 * Generate RHS
593 *
594  CALL pzmatgen(ictxtb, 'No', 'No',
595  $ descb2d( m_ ), descb2d( n_ ),
596  $ descb2d( mb_ ), descb2d( nb_ ),
597  $ mem( ipb ),
598  $ descb2d( lld_ ), descb2d( rsrc_ ),
599  $ descb2d( csrc_ ),
600  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
601  $ myrow, npcol, nprow )
602 *
603  IF( check ) THEN
604  CALL pzfillpad( ictxtb, nb, nrhs,
605  $ mem( ipb-iprepad ),
606  $ descb2d( lld_ ),
607  $ iprepad, ipostpad,
608  $ padval )
609  CALL pzfillpad( ictxt, worksiz, 1,
610  $ mem( ip_driver_w-iprepad ),
611  $ worksiz, iprepad,
612  $ ipostpad, padval )
613  END IF
614 *
615 *
616  CALL blacs_barrier( ictxt, 'All')
617  CALL sltimer( 2 )
618 *
619 * Solve linear system via factorization
620 *
621  CALL pzdttrs( trans, n, nrhs,
622  $ mem( ipa+2*( nb+10 ) ),
623  $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
624  $ 1, desca, mem( ipb ), 1, descb,
625  $ mem( ip_fillin ), fillin_size,
626  $ mem( ipw_solve ), ipw_solve_size,
627  $ info )
628 *
629  CALL sltimer( 2 )
630 *
631  IF( info.NE.0 ) THEN
632  IF( iam.EQ.0 )
633  $ WRITE( nout, fmt = * ) 'PZDTTRS INFO=', info
634  kfail = kfail + 1
635  passed = 'FAILED'
636  GO TO 20
637  END IF
638 *
639  IF( check ) THEN
640 *
641 * check for memory overwrite
642 *
643  CALL pzchekpad( ictxt, 'PZDTTRS-work',
644  $ worksiz, 1,
645  $ mem( ip_driver_w-iprepad ),
646  $ worksiz, iprepad,
647  $ ipostpad, padval )
648 *
649 * check the solution to rhs
650 *
651  sresid = zero
652 *
653 * Reset descriptor describing A to 1-by-P grid for
654 * use in banded utility routines
655 *
656  CALL descinit( desca2d, (3), n,
657  $ (3), nb, 0, 0,
658  $ ictxt, (3), ierr( 1 ) )
659  CALL pzdtlaschk( 'N', 'D', trans,
660  $ n, bwl, bwu, nrhs,
661  $ mem( ipb ), 1, 1, descb2d,
662  $ iaseed, mem( ipa ), 1, 1, desca2d,
663  $ ibseed, anorm, sresid,
664  $ mem( ip_driver_w ), worksiz )
665 *
666  IF( iam.EQ.0 ) THEN
667  IF( sresid.GT.thresh )
668  $ WRITE( nout, fmt = 9985 ) sresid
669  END IF
670 *
671 * The second test is a NaN trap
672 *
673  IF( ( sresid.LE.thresh ).AND.
674  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
675  kpass = kpass + 1
676  passed = 'PASSED'
677  ELSE
678  kfail = kfail + 1
679  passed = 'FAILED'
680  END IF
681 *
682  END IF
683 *
684  15 CONTINUE
685 * Skipped tests jump to here to print out "SKIPPED"
686 *
687 * Gather maximum of all CPU and WALL clock timings
688 *
689  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
690  $ wtime )
691  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
692  $ ctime )
693 *
694 * Print results
695 *
696  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
697 *
698  nops = 0
699  nops2 = 0
700 *
701  n_first = nb
702  nprocs_real = ( n-1 )/nb + 1
703  n_last = mod( n-1, nb ) + 1
704 *
705 * 2 N bwl INT_ONE + N (bwl) flops
706 * for LU factorization
707 *
708  nops = 2*(dble(n)*dble(bwl)*
709  $ dble(int_one)) +
710  $ (dble(n)*dble(bwl))
711 *
712 * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
713 *
714  nops = nops +
715  $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
716  $ *dble(nrhs))
717 *
718 * Multiply by 4 to get complex count
719 *
720  nops = nops * dble(4)
721 *
722 * Second calc to represent actual hardware speed
723 *
724 * 2*N_FIRST bwl*bwu Flops for LU
725 * factorization in proc 1
726 *
727  nops2 = 2*( (dble(n_first)*
728  $ dble(bwl)*dble(bwu)))
729 *
730  IF ( nprocs_real .GT. 1) THEN
731 * 8 N_LAST bwl*INT_ONE
732 * flops for LU and spike
733 * calc in last processor
734 *
735  nops2 = nops2 +
736  $ 8*( (dble(n_last)*dble(bwl)
737  $ *dble(int_one)) )
738  ENDIF
739 *
740  IF ( nprocs_real .GT. 2) THEN
741 * 8 NB bwl*INT_ONE flops for LU and spike
742 * calc in other processors
743 *
744  nops2 = nops2 + (nprocs_real-2)*
745  $ 8*( (dble(nb)*dble(bwl)
746  $ *dble(int_one)) )
747  ENDIF
748 *
749 * Reduced system
750 *
751  nops2 = nops2 +
752  $ 2*( nprocs_real-1 ) *
753  $ ( bwl*int_one*bwl/3 )
754  IF( nprocs_real .GT. 1 ) THEN
755  nops2 = nops2 +
756  $ 2*( nprocs_real-2 ) *
757  $ (2*bwl*int_one*bwl)
758  ENDIF
759 *
760 * Solve stage
761 *
762 * nrhs*2 n_first*
763 * (bwl+INT_ONE)
764 * flops for L,U solve in proc 1.
765 *
766  nops2 = nops2 +
767  $ 2*
768  $ dble(n_first)*
769  $ dble(nrhs) *
770  $ ( dble(bwl)+dble(int_one))
771 *
772  IF ( nprocs_real .GT. 1 ) THEN
773 *
774 * 2*nrhs*2 n_last
775 * (bwl+INT_ONE)
776 * flops for LU solve in other procs
777 *
778  nops2 = nops2 +
779  $ 4*
780  $ (dble(n_last)*(dble(bwl)+
781  $ dble(int_one)))*dble(nrhs)
782  ENDIF
783 *
784  IF ( nprocs_real .GT. 2 ) THEN
785 *
786 * 2*nrhs*2 NB
787 * (bwl+INT_ONE)
788 * flops for LU solve in other procs
789 *
790  nops2 = nops2 +
791  $ ( nprocs_real-2)*2*
792  $ ( (dble(nb)*(dble(bwl)+
793  $ dble(int_one)))*dble(nrhs) )
794  ENDIF
795 *
796 * Reduced system
797 *
798  nops2 = nops2 +
799  $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
800  IF( nprocs_real .GT. 1 ) THEN
801  nops2 = nops2 +
802  $ nrhs*( nprocs_real-2 ) *
803  $ ( 6 * bwl*int_one )
804  ENDIF
805 *
806 *
807 * Multiply by 4 to get complex count
808 *
809  nops2 = nops2 * dble(4)
810 *
811 * Calculate total megaflops - factorization and/or
812 * solve -- for WALL and CPU time, and print output
813 *
814 * Print WALL time if machine supports it
815 *
816  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
817  tmflops = nops /
818  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
819  ELSE
820  tmflops = 0.0d+0
821  END IF
822 *
823  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
824  tmflops2 = nops2 /
825  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
826  ELSE
827  tmflops2 = 0.0d+0
828  END IF
829 *
830  IF( wtime( 2 ).GE.0.0d+0 )
831  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
832  $ n,
833  $ bwl, bwu,
834  $ nb, nrhs, nprow, npcol,
835  $ wtime( 1 ), wtime( 2 ), tmflops,
836  $ tmflops2, passed
837 *
838 * Print CPU time if machine supports it
839 *
840  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
841  tmflops = nops /
842  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
843  ELSE
844  tmflops = 0.0d+0
845  END IF
846 *
847  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
848  tmflops2 = nops2 /
849  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
850  ELSE
851  tmflops2 = 0.0d+0
852  END IF
853 *
854  IF( ctime( 2 ).GE.0.0d+0 )
855  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
856  $ n,
857  $ bwl, bwu,
858  $ nb, nrhs, nprow, npcol,
859  $ ctime( 1 ), ctime( 2 ), tmflops,
860  $ tmflops2, passed
861 *
862  END IF
863  20 CONTINUE
864 *
865 *
866  30 CONTINUE
867 * NNB loop
868 *
869  45 CONTINUE
870 * BW[] loop
871 *
872  40 CONTINUE
873 * NMAT loop
874 *
875  CALL blacs_gridexit( ictxt )
876  CALL blacs_gridexit( ictxtb )
877 *
878  50 CONTINUE
879 * NGRIDS DROPOUT
880  60 CONTINUE
881 * NGRIDS loop
882 *
883 * Print ending messages and close output file
884 *
885  IF( iam.EQ.0 ) THEN
886  ktests = kpass + kfail + kskip
887  WRITE( nout, fmt = * )
888  WRITE( nout, fmt = 9992 ) ktests
889  IF( check ) THEN
890  WRITE( nout, fmt = 9991 ) kpass
891  WRITE( nout, fmt = 9989 ) kfail
892  ELSE
893  WRITE( nout, fmt = 9990 ) kpass
894  END IF
895  WRITE( nout, fmt = 9988 ) kskip
896  WRITE( nout, fmt = * )
897  WRITE( nout, fmt = * )
898  WRITE( nout, fmt = 9987 )
899  IF( nout.NE.6 .AND. nout.NE.0 )
900  $ CLOSE ( nout )
901  END IF
902 *
903  CALL blacs_exit( 0 )
904 *
905  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
906  $ '; It should be at least 1' )
907  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
908  $ i4 )
909  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
910  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
911  $ i11 )
912  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
913  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
914  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
915  $ '-------- -------- -------- ------' )
916  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
917  $ 1x,i4,1x,i4,1x,f9.3,
918  $ f9.4, f9.2, f9.2, 1x, a6 )
919  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
920  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
921  9990 FORMAT( i5, ' tests completed without checking.' )
922  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
923  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
924  9987 FORMAT( 'END OF TESTS.' )
925  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
926  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
927 *
928  stop
929 *
930 * End of PZDTTRS_DRIVER
931 *
932  END
933 *
pzdttrf
subroutine pzdttrf(N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pzdttrf.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pzdtlaschk
subroutine pzdtlaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pzdtlaschk.f:4
pzdtinfo
subroutine pzdtinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pzdtinfo.f:6
pzdtdriver
program pzdtdriver
Definition: pzdtdriver.f:1
pzbmatgen
subroutine pzbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzbmatgen.f:5
pzmatgen
subroutine pzmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pzdttrs
subroutine pzdttrs(TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pzdttrs.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pzchekpad
subroutine pzchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzchekpad.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
pzfillpad
subroutine pzfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzfillpad.f:2