ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pddbdriver.f
Go to the documentation of this file.
1  PROGRAM pddbdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PDDBDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by DDB. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * DBLESZ INTEGER, default = 8 bytes.
56 * INTGSZ and DBLESZ indicate the length in bytes on the
57 * given platform for an integer and a double precision real.
58 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
59 * All arrays used by ScaLAPACK routines are allocated from
60 * this array and referenced by pointers. The integer IPB,
61 * for example, is a pointer to the starting element of MEM for
62 * the solution vector(s) B.
63 *
64 * =====================================================================
65 *
66 * Code Developer: Andrew J. Cleary, University of Tennessee.
67 * Current address: Lawrence Livermore National Labs.
68 * This version released: August, 2001.
69 *
70 * =====================================================================
71 *
72 * .. Parameters ..
73  INTEGER totmem
74  parameter( totmem = 3000000 )
75  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76  $ lld_, mb_, m_, nb_, n_, rsrc_
77  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
80 *
81  DOUBLE PRECISION zero
82  INTEGER dblesz, memsiz, ntests
83  DOUBLE PRECISION padval
84  parameter( dblesz = 8,
85  $ memsiz = totmem / dblesz, ntests = 20,
86  $ padval = -9923.0d+0, zero = 0.0d+0 )
87  INTEGER int_one
88  parameter( int_one = 1 )
89 * ..
90 * .. Local Scalars ..
91  LOGICAL check
92  CHARACTER trans
93  CHARACTER*6 passed
94  CHARACTER*80 outfile
95  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
96  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
97  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
98  $ iprepad, ipw, ipw_size, ipw_solve,
99  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102  $ nnr, nout, np, npcol, nprocs, nprocs_real,
103  $ nprow, nq, nrhs, n_first, n_last, worksiz
104  REAL thresh
105  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
106  $ tmflops2
107 * ..
108 * .. Local Arrays ..
109  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
110  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
111  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
112  $ nrval( ntests ), nval( ntests ),
113  $ pval( ntests ), qval( ntests )
114  DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
115 * ..
116 * .. External Subroutines ..
117  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
119  $ blacs_pinfo, descinit, igsum2d, pdbmatgen,
122  $ slcombine, sltimer
123 * ..
124 * .. External Functions ..
125  INTEGER numroc
126  LOGICAL lsame
127  DOUBLE PRECISION pdlange
128  EXTERNAL lsame, numroc, pdlange
129 * ..
130 * .. Intrinsic Functions ..
131  INTRINSIC dble, max, min, mod
132 * ..
133 * .. Data Statements ..
134  DATA kfail, kpass, kskip, ktests / 4*0 /
135 * ..
136 *
137 *
138 *
139 * .. Executable Statements ..
140 *
141 * Get starting information
142 *
143  CALL blacs_pinfo( iam, nprocs )
144  iaseed = 100
145  ibseed = 200
146 *
147  CALL pddbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
148  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
149  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
150  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
151 *
152  check = ( thresh.GE.0.0d+0 )
153 *
154 * Print headings
155 *
156  IF( iam.EQ.0 ) THEN
157  WRITE( nout, fmt = * )
158  WRITE( nout, fmt = 9995 )
159  WRITE( nout, fmt = 9994 )
160  WRITE( nout, fmt = * )
161  END IF
162 *
163 * Loop over different process grids
164 *
165  DO 60 i = 1, ngrids
166 *
167  nprow = pval( i )
168  npcol = qval( i )
169 *
170 * Make sure grid information is correct
171 *
172  ierr( 1 ) = 0
173  IF( nprow.LT.1 ) THEN
174  IF( iam.EQ.0 )
175  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
176  ierr( 1 ) = 1
177  ELSE IF( npcol.LT.1 ) THEN
178  IF( iam.EQ.0 )
179  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
180  ierr( 1 ) = 1
181  ELSE IF( nprow*npcol.GT.nprocs ) THEN
182  IF( iam.EQ.0 )
183  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
184  ierr( 1 ) = 1
185  END IF
186 *
187  IF( ierr( 1 ).GT.0 ) THEN
188  IF( iam.EQ.0 )
189  $ WRITE( nout, fmt = 9997 ) 'grid'
190  kskip = kskip + 1
191  GO TO 50
192  END IF
193 *
194 * Define process grid
195 *
196  CALL blacs_get( -1, 0, ictxt )
197  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
198 *
199 *
200 * Define transpose process grid
201 *
202  CALL blacs_get( -1, 0, ictxtb )
203  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
204 *
205 * Go to bottom of process grid loop if this case doesn't use my
206 * process
207 *
208  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209 *
210  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
211  GO TO 50
212  ENDIF
213 *
214  DO 40 j = 1, nmat
215 *
216  ierr( 1 ) = 0
217 *
218  n = nval( j )
219 *
220 * Make sure matrix information is correct
221 *
222  IF( n.LT.1 ) THEN
223  IF( iam.EQ.0 )
224  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
225  ierr( 1 ) = 1
226  END IF
227 *
228 * Check all processes for an error
229 *
230  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
231  $ -1, 0 )
232 *
233  IF( ierr( 1 ).GT.0 ) THEN
234  IF( iam.EQ.0 )
235  $ WRITE( nout, fmt = 9997 ) 'size'
236  kskip = kskip + 1
237  GO TO 40
238  END IF
239 *
240 *
241  DO 45 bw_num = 1, nbw
242 *
243  ierr( 1 ) = 0
244 *
245  bwl = bwlval( bw_num )
246  IF( bwl.LT.1 ) THEN
247  IF( iam.EQ.0 )
248  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
249  ierr( 1 ) = 1
250  END IF
251 *
252  bwu = bwuval( bw_num )
253  IF( bwu.LT.1 ) THEN
254  IF( iam.EQ.0 )
255  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
256  ierr( 1 ) = 1
257  END IF
258 *
259  IF( bwl.GT.n-1 ) THEN
260  IF( iam.EQ.0 ) THEN
261  ierr( 1 ) = 1
262  ENDIF
263  END IF
264 *
265  IF( bwu.GT.n-1 ) THEN
266  IF( iam.EQ.0 ) THEN
267  ierr( 1 ) = 1
268  ENDIF
269  END IF
270 *
271 * Check all processes for an error
272 *
273  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
274  $ -1, 0 )
275 *
276  IF( ierr( 1 ).GT.0 ) THEN
277  kskip = kskip + 1
278  GO TO 45
279  END IF
280 *
281  DO 30 k = 1, nnb
282 *
283  ierr( 1 ) = 0
284 *
285  nb = nbval( k )
286  IF( nb.LT.0 ) THEN
287  nb =( (n-(npcol-1)*max(bwl,bwu)-1)/npcol + 1 )
288  $ + max(bwl,bwu)
289  nb = max( nb, 2*max(bwl,bwu) )
290  nb = min( n, nb )
291  END IF
292 *
293 * Make sure NB is legal
294 *
295  ierr( 1 ) = 0
296  IF( nb.LT.min( 2*max(bwl,bwu), n ) ) THEN
297  ierr( 1 ) = 1
298  END IF
299 *
300 * Check all processes for an error
301 *
302  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
303  $ -1, 0 )
304 *
305  IF( ierr( 1 ).GT.0 ) THEN
306  kskip = kskip + 1
307  GO TO 30
308  END IF
309 *
310 * Padding constants
311 *
312  np = numroc( (bwl+bwu+1), (bwl+bwu+1),
313  $ myrow, 0, nprow )
314  nq = numroc( n, nb, mycol, 0, npcol )
315 *
316  IF( check ) THEN
317  iprepad = ((bwl+bwu+1)+10)
318  imidpad = 10
319  ipostpad = ((bwl+bwu+1)+10)
320  ELSE
321  iprepad = 0
322  imidpad = 0
323  ipostpad = 0
324  END IF
325 *
326 * Initialize the array descriptor for the matrix A
327 *
328  CALL descinit( desca2d, (bwl+bwu+1), n,
329  $ (bwl+bwu+1), nb, 0, 0,
330  $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
331 *
332 * Convert this to 1D descriptor
333 *
334  desca( 1 ) = 501
335  desca( 3 ) = n
336  desca( 4 ) = nb
337  desca( 5 ) = 0
338  desca( 2 ) = ictxt
339  desca( 6 ) = ((bwl+bwu+1)+10)
340  desca( 7 ) = 0
341 *
342  ierr_temp = ierr( 1 )
343  ierr( 1 ) = 0
344  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
345 *
346 * Check all processes for an error
347 *
348  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
349 *
350  IF( ierr( 1 ).LT.0 ) THEN
351  IF( iam.EQ.0 )
352  $ WRITE( nout, fmt = 9997 ) 'descriptor'
353  kskip = kskip + 1
354  GO TO 30
355  END IF
356 *
357 * Assign pointers into MEM for SCALAPACK arrays, A is
358 * allocated starting at position MEM( IPREPAD+1 )
359 *
360  free_ptr = 1
361  ipb = 0
362 *
363 * Save room for prepadding
364  free_ptr = free_ptr + iprepad
365 *
366  ipa = free_ptr
367  free_ptr = free_ptr + desca2d( lld_ )*
368  $ desca2d( nb_ )
369  $ + ipostpad
370 *
371 * Add memory for fillin
372 * Fillin space needs to store:
373 * Fillin spike:
374 * Contribution to previous proc's diagonal block of
375 * reduced system:
376 * Off-diagonal block of reduced system:
377 * Diagonal block of reduced system:
378 *
379  fillin_size =
380  $ nb*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
381 *
382 * Claim memory for fillin
383 *
384  free_ptr = free_ptr + iprepad
385  ip_fillin = free_ptr
386  free_ptr = free_ptr + fillin_size
387 *
388 * Workspace needed by computational routines:
389 *
390  ipw_size = 0
391 *
392 * factorization:
393 *
394  ipw_size = max(bwl,bwu)*max(bwl,bwu)
395 *
396 * Claim memory for IPW
397 *
398  ipw = free_ptr
399  free_ptr = free_ptr + ipw_size
400 *
401 * Check for adequate memory for problem size
402 *
403  ierr( 1 ) = 0
404  IF( free_ptr.GT.memsiz ) THEN
405  IF( iam.EQ.0 )
406  $ WRITE( nout, fmt = 9996 )
407  $ 'divide and conquer factorization',
408  $ (free_ptr )*dblesz
409  ierr( 1 ) = 1
410  END IF
411 *
412 * Check all processes for an error
413 *
414  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
415  $ 1, -1, 0 )
416 *
417  IF( ierr( 1 ).GT.0 ) THEN
418  IF( iam.EQ.0 )
419  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
420  kskip = kskip + 1
421  GO TO 30
422  END IF
423 *
424 * Worksize needed for LAPRNT
425  worksiz = max( ((bwl+bwu+1)+10), nb )
426 *
427  IF( check ) THEN
428 *
429 * Calculate the amount of workspace required by
430 * the checking routines.
431 *
432 * PDLANGE
433  worksiz = max( worksiz, desca2d( nb_ ) )
434 *
435 * PDDBLASCHK
436  worksiz = max( worksiz,
437  $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
438  END IF
439 *
440  free_ptr = free_ptr + iprepad
441  ip_driver_w = free_ptr
442  free_ptr = free_ptr + worksiz + ipostpad
443 *
444 *
445 * Check for adequate memory for problem size
446 *
447  ierr( 1 ) = 0
448  IF( free_ptr.GT.memsiz ) THEN
449  IF( iam.EQ.0 )
450  $ WRITE( nout, fmt = 9996 ) 'factorization',
451  $ ( free_ptr )*dblesz
452  ierr( 1 ) = 1
453  END IF
454 *
455 * Check all processes for an error
456 *
457  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
458  $ 1, -1, 0 )
459 *
460  IF( ierr( 1 ).GT.0 ) THEN
461  IF( iam.EQ.0 )
462  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
463  kskip = kskip + 1
464  GO TO 30
465  END IF
466 *
467  CALL pdbmatgen( ictxt, 'G', 'D', bwl, bwu, n,
468  $ (bwl+bwu+1), nb, mem( ipa ),
469  $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
470  $ mycol, nprow, npcol )
471 *
472  CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
473  $ ((bwl+bwu+1)+10), iprepad, ipostpad,
474  $ padval )
475 *
476  CALL pdfillpad( ictxt, worksiz, 1,
477  $ mem( ip_driver_w-iprepad ), worksiz,
478  $ iprepad, ipostpad, padval )
479 *
480 * Calculate norm of A for residual error-checking
481 *
482  IF( check ) THEN
483 *
484  anorm = pdlange( '1', (bwl+bwu+1),
485  $ n, mem( ipa ), 1, 1,
486  $ desca2d, mem( ip_driver_w ) )
487  CALL pdchekpad( ictxt, 'PDLANGE', np, nq,
488  $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
489  $ iprepad, ipostpad, padval )
490  CALL pdchekpad( ictxt, 'PDLANGE',
491  $ worksiz, 1,
492  $ mem( ip_driver_w-iprepad ), worksiz,
493  $ iprepad, ipostpad, padval )
494  END IF
495 *
496 *
497  CALL slboot()
498  CALL blacs_barrier( ictxt, 'All' )
499 *
500 * Perform factorization
501 *
502  CALL sltimer( 1 )
503 *
504  CALL pddbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
505  $ mem( ip_fillin ), fillin_size, mem( ipw ),
506  $ ipw_size, info )
507 *
508  CALL sltimer( 1 )
509 *
510  IF( info.NE.0 ) THEN
511  IF( iam.EQ.0 ) THEN
512  WRITE( nout, fmt = * ) 'PDDBTRF INFO=', info
513  ENDIF
514  kfail = kfail + 1
515  GO TO 30
516  END IF
517 *
518  IF( check ) THEN
519 *
520 * Check for memory overwrite in factorization
521 *
522  CALL pdchekpad( ictxt, 'PDDBTRF', np,
523  $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
524  $ iprepad, ipostpad, padval )
525  END IF
526 *
527 *
528 * Loop over the different values for NRHS
529 *
530  DO 20 hh = 1, nnr
531 *
532  ierr( 1 ) = 0
533 *
534  nrhs = nrval( hh )
535 *
536 * Initialize Array Descriptor for rhs
537 *
538  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
539  $ ictxtb, nb+10, ierr( 1 ) )
540 *
541 * Convert this to 1D descriptor
542 *
543  descb( 1 ) = 502
544  descb( 3 ) = n
545  descb( 4 ) = nb
546  descb( 5 ) = 0
547  descb( 2 ) = ictxt
548  descb( 6 ) = descb2d( lld_ )
549  descb( 7 ) = 0
550 *
551 * reset free_ptr to reuse space for right hand sides
552 *
553  IF( ipb .GT. 0 ) THEN
554  free_ptr = ipb
555  ENDIF
556 *
557  free_ptr = free_ptr + iprepad
558  ipb = free_ptr
559  free_ptr = free_ptr + nrhs*descb2d( lld_ )
560  $ + ipostpad
561 *
562 * Allocate workspace for workspace in TRS routine:
563 *
564  ipw_solve_size = (max(bwl,bwu)*nrhs)
565 *
566  ipw_solve = free_ptr
567  free_ptr = free_ptr + ipw_solve_size
568 *
569  ierr( 1 ) = 0
570  IF( free_ptr.GT.memsiz ) THEN
571  IF( iam.EQ.0 )
572  $ WRITE( nout, fmt = 9996 )'solve',
573  $ ( free_ptr )*dblesz
574  ierr( 1 ) = 1
575  END IF
576 *
577 * Check all processes for an error
578 *
579  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
580  $ ierr, 1, -1, 0 )
581 *
582  IF( ierr( 1 ).GT.0 ) THEN
583  IF( iam.EQ.0 )
584  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
585  kskip = kskip + 1
586  GO TO 15
587  END IF
588 *
589  myrhs_size = numroc( n, nb, mycol, 0, npcol )
590 *
591 * Generate RHS
592 *
593  CALL pdmatgen(ictxtb, 'No', 'No',
594  $ descb2d( m_ ), descb2d( n_ ),
595  $ descb2d( mb_ ), descb2d( nb_ ),
596  $ mem( ipb ),
597  $ descb2d( lld_ ), descb2d( rsrc_ ),
598  $ descb2d( csrc_ ),
599  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
600  $ myrow, npcol, nprow )
601 *
602  IF( check ) THEN
603  CALL pdfillpad( ictxtb, nb, nrhs,
604  $ mem( ipb-iprepad ),
605  $ descb2d( lld_ ),
606  $ iprepad, ipostpad,
607  $ padval )
608  CALL pdfillpad( ictxt, worksiz, 1,
609  $ mem( ip_driver_w-iprepad ),
610  $ worksiz, iprepad,
611  $ ipostpad, padval )
612  END IF
613 *
614 *
615  CALL blacs_barrier( ictxt, 'All')
616  CALL sltimer( 2 )
617 *
618 * Solve linear system via factorization
619 *
620  CALL pddbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
621  $ 1, desca, mem( ipb ), 1, descb,
622  $ mem( ip_fillin ), fillin_size,
623  $ mem( ipw_solve ), ipw_solve_size,
624  $ info )
625 *
626  CALL sltimer( 2 )
627 *
628  IF( info.NE.0 ) THEN
629  IF( iam.EQ.0 )
630  $ WRITE( nout, fmt = * ) 'PDDBTRS INFO=', info
631  kfail = kfail + 1
632  passed = 'FAILED'
633  GO TO 20
634  END IF
635 *
636  IF( check ) THEN
637 *
638 * check for memory overwrite
639 *
640  CALL pdchekpad( ictxt, 'PDDBTRS-work',
641  $ worksiz, 1,
642  $ mem( ip_driver_w-iprepad ),
643  $ worksiz, iprepad,
644  $ ipostpad, padval )
645 *
646 * check the solution to rhs
647 *
648  sresid = zero
649 *
650  CALL pddblaschk( 'N', 'D', trans,
651  $ n, bwl, bwu, nrhs,
652  $ mem( ipb ), 1, 1, descb2d,
653  $ iaseed, mem( ipa ), 1, 1, desca2d,
654  $ ibseed, anorm, sresid,
655  $ mem( ip_driver_w ), worksiz )
656 *
657  IF( iam.EQ.0 ) THEN
658  IF( sresid.GT.thresh )
659  $ WRITE( nout, fmt = 9985 ) sresid
660  END IF
661 *
662 * The second test is a NaN trap
663 *
664  IF( ( sresid.LE.thresh ).AND.
665  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
666  kpass = kpass + 1
667  passed = 'PASSED'
668  ELSE
669  kfail = kfail + 1
670  passed = 'FAILED'
671  END IF
672 *
673  END IF
674 *
675  15 CONTINUE
676 * Skipped tests jump to here to print out "SKIPPED"
677 *
678 * Gather maximum of all CPU and WALL clock timings
679 *
680  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
681  $ wtime )
682  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
683  $ ctime )
684 *
685 * Print results
686 *
687  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
688 *
689  nops = 0
690  nops2 = 0
691 *
692  n_first = nb
693  nprocs_real = ( n-1 )/nb + 1
694  n_last = mod( n-1, nb ) + 1
695 *
696 * 2 N bwl bwu + N (bwl) flops
697 * for LU factorization
698 *
699  nops = 2*(dble(n)*dble(bwl)*
700  $ dble(bwu)) +
701  $ (dble(n)*dble(bwl))
702 *
703 * nrhs * 2 N*(bwl+bwu) flops for LU solve.
704 *
705  nops = nops +
706  $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
707  $ *dble(nrhs))
708 *
709 * Second calc to represent actual hardware speed
710 *
711 * 2*N_FIRST bwl*bwu Flops for LU
712 * factorization in proc 1
713 *
714  nops2 = 2*( (dble(n_first)*
715  $ dble(bwl)*dble(bwu)))
716 *
717  IF ( nprocs_real .GT. 1) THEN
718 * 8 N_LAST bwl*bwu
719 * flops for LU and spike
720 * calc in last processor
721 *
722  nops2 = nops2 +
723  $ 8*( (dble(n_last)*dble(bwl)
724  $ *dble(bwu)) )
725  ENDIF
726 *
727  IF ( nprocs_real .GT. 2) THEN
728 * 8 NB bwl*bwu flops for LU and spike
729 * calc in other processors
730 *
731  nops2 = nops2 + (nprocs_real-2)*
732  $ 8*( (dble(nb)*dble(bwl)
733  $ *dble(bwu)) )
734  ENDIF
735 *
736 * Reduced system
737 *
738  nops2 = nops2 +
739  $ 2*( nprocs_real-1 ) *
740  $ ( bwl*bwu*bwl/3 )
741  IF( nprocs_real .GT. 1 ) THEN
742  nops2 = nops2 +
743  $ 2*( nprocs_real-2 ) *
744  $ (2*bwl*bwu*bwl)
745  ENDIF
746 *
747 * Solve stage
748 *
749 * nrhs*2 n_first*
750 * (bwl+bwu)
751 * flops for L,U solve in proc 1.
752 *
753  nops2 = nops2 +
754  $ 2*
755  $ dble(n_first)*
756  $ dble(nrhs) *
757  $ ( dble(bwl)+dble(bwu))
758 *
759  IF ( nprocs_real .GT. 1 ) THEN
760 *
761 * 2*nrhs*2 n_last
762 * (bwl+bwu)
763 * flops for LU solve in other procs
764 *
765  nops2 = nops2 +
766  $ 4*
767  $ (dble(n_last)*(dble(bwl)+
768  $ dble(bwu)))*dble(nrhs)
769  ENDIF
770 *
771  IF ( nprocs_real .GT. 2 ) THEN
772 *
773 * 2*nrhs*2 NB
774 * (bwl+bwu)
775 * flops for LU solve in other procs
776 *
777  nops2 = nops2 +
778  $ ( nprocs_real-2)*2*
779  $ ( (dble(nb)*(dble(bwl)+
780  $ dble(bwu)))*dble(nrhs) )
781  ENDIF
782 *
783 * Reduced system
784 *
785  nops2 = nops2 +
786  $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
787  IF( nprocs_real .GT. 1 ) THEN
788  nops2 = nops2 +
789  $ nrhs*( nprocs_real-2 ) *
790  $ ( 6 * bwl*bwu )
791  ENDIF
792 *
793 *
794 * Calculate total megaflops - factorization and/or
795 * solve -- for WALL and CPU time, and print output
796 *
797 * Print WALL time if machine supports it
798 *
799  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
800  tmflops = nops /
801  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
802  ELSE
803  tmflops = 0.0d+0
804  END IF
805 *
806  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
807  tmflops2 = nops2 /
808  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
809  ELSE
810  tmflops2 = 0.0d+0
811  END IF
812 *
813  IF( wtime( 2 ).GE.0.0d+0 )
814  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
815  $ n,
816  $ bwl, bwu,
817  $ nb, nrhs, nprow, npcol,
818  $ wtime( 1 ), wtime( 2 ), tmflops,
819  $ tmflops2, passed
820 *
821 * Print CPU time if machine supports it
822 *
823  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
824  tmflops = nops /
825  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
826  ELSE
827  tmflops = 0.0d+0
828  END IF
829 *
830  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
831  tmflops2 = nops2 /
832  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
833  ELSE
834  tmflops2 = 0.0d+0
835  END IF
836 *
837  IF( ctime( 2 ).GE.0.0d+0 )
838  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
839  $ n,
840  $ bwl, bwu,
841  $ nb, nrhs, nprow, npcol,
842  $ ctime( 1 ), ctime( 2 ), tmflops,
843  $ tmflops2, passed
844 *
845  END IF
846  20 CONTINUE
847 *
848 *
849  30 CONTINUE
850 * NNB loop
851 *
852  45 CONTINUE
853 * BW[] loop
854 *
855  40 CONTINUE
856 * NMAT loop
857 *
858  CALL blacs_gridexit( ictxt )
859  CALL blacs_gridexit( ictxtb )
860 *
861  50 CONTINUE
862 * NGRIDS DROPOUT
863  60 CONTINUE
864 * NGRIDS loop
865 *
866 * Print ending messages and close output file
867 *
868  IF( iam.EQ.0 ) THEN
869  ktests = kpass + kfail + kskip
870  WRITE( nout, fmt = * )
871  WRITE( nout, fmt = 9992 ) ktests
872  IF( check ) THEN
873  WRITE( nout, fmt = 9991 ) kpass
874  WRITE( nout, fmt = 9989 ) kfail
875  ELSE
876  WRITE( nout, fmt = 9990 ) kpass
877  END IF
878  WRITE( nout, fmt = 9988 ) kskip
879  WRITE( nout, fmt = * )
880  WRITE( nout, fmt = * )
881  WRITE( nout, fmt = 9987 )
882  IF( nout.NE.6 .AND. nout.NE.0 )
883  $ CLOSE ( nout )
884  END IF
885 *
886  CALL blacs_exit( 0 )
887 *
888  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
889  $ '; It should be at least 1' )
890  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
891  $ i4 )
892  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
893  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
894  $ i11 )
895  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
896  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
897  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
898  $ '-------- -------- -------- ------' )
899  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
900  $ 1x,i4,1x,i4,1x,f9.3,
901  $ f9.4, f9.2, f9.2, 1x, a6 )
902  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
903  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
904  9990 FORMAT( i5, ' tests completed without checking.' )
905  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
906  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
907  9987 FORMAT( 'END OF TESTS.' )
908  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
909  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
910 *
911  stop
912 *
913 * End of PDDBTRS_DRIVER
914 *
915  END
916 *
max
#define max(A, B)
Definition: pcgemr.c:180
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pddbtrf
subroutine pddbtrf(N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pddbtrf.f:3
pddblaschk
subroutine pddblaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pddblaschk.f:4
pddbinfo
subroutine pddbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pddbinfo.f:6
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
pddbtrs
subroutine pddbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pddbtrs.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
pddbdriver
program pddbdriver
Definition: pddbdriver.f:1
pdbmatgen
subroutine pdbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdbmatgen.f:5
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181