ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzdbdriver.f
Go to the documentation of this file.
1  PROGRAM pzdbdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PZDBDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by ZDB. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * ZPLXSZ INTEGER, default = 16 bytes.
56 * INTGSZ and ZPLXSZ indicate the length in bytes on the
57 * given platform for an integer and a double precision
58 * complex.
59 * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
60 * All arrays used by ScaLAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPB,
62 * for example, is a pointer to the starting element of MEM for
63 * the solution vector(s) B.
64 *
65 * =====================================================================
66 *
67 * Code Developer: Andrew J. Cleary, University of Tennessee.
68 * Current address: Lawrence Livermore National Labs.
69 * This version released: August, 2001.
70 *
71 * =====================================================================
72 *
73 * .. Parameters ..
74  INTEGER totmem
75  parameter( totmem = 3000000 )
76  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77  $ lld_, mb_, m_, nb_, n_, rsrc_
78  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81 *
82  DOUBLE PRECISION zero
83  INTEGER memsiz, ntests, zplxsz
84  COMPLEX*16 padval
85  parameter( zplxsz = 16,
86  $ memsiz = totmem / zplxsz, ntests = 20,
87  $ padval = ( -9923.0d+0, -9923.0d+0 ),
88  $ zero = 0.0d+0 )
89  INTEGER int_one
90  parameter( int_one = 1 )
91 * ..
92 * .. Local Scalars ..
93  LOGICAL check
94  CHARACTER trans
95  CHARACTER*6 passed
96  CHARACTER*80 outfile
97  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100  $ iprepad, ipw, ipw_size, ipw_solve,
101  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104  $ nnr, nout, np, npcol, nprocs, nprocs_real,
105  $ nprow, nq, nrhs, n_first, n_last, worksiz
106  REAL thresh
107  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
108  $ tmflops2
109 * ..
110 * .. Local Arrays ..
111  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114  $ nrval( ntests ), nval( ntests ),
115  $ pval( ntests ), qval( ntests )
116  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117  COMPLEX*16 mem( memsiz )
118 * ..
119 * .. External Subroutines ..
120  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
122  $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
125  $ slcombine, sltimer
126 * ..
127 * .. External Functions ..
128  INTEGER numroc
129  LOGICAL lsame
130  DOUBLE PRECISION pzlange
131  EXTERNAL lsame, numroc, pzlange
132 * ..
133 * .. Intrinsic Functions ..
134  INTRINSIC dble, max, min, mod
135 * ..
136 * .. Data Statements ..
137  DATA kfail, kpass, kskip, ktests / 4*0 /
138 * ..
139 *
140 *
141 *
142 * .. Executable Statements ..
143 *
144 * Get starting information
145 *
146  CALL blacs_pinfo( iam, nprocs )
147  iaseed = 100
148  ibseed = 200
149 *
150  CALL pzdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154 *
155  check = ( thresh.GE.0.0d+0 )
156 *
157 * Print headings
158 *
159  IF( iam.EQ.0 ) THEN
160  WRITE( nout, fmt = * )
161  WRITE( nout, fmt = 9995 )
162  WRITE( nout, fmt = 9994 )
163  WRITE( nout, fmt = * )
164  END IF
165 *
166 * Loop over different process grids
167 *
168  DO 60 i = 1, ngrids
169 *
170  nprow = pval( i )
171  npcol = qval( i )
172 *
173 * Make sure grid information is correct
174 *
175  ierr( 1 ) = 0
176  IF( nprow.LT.1 ) THEN
177  IF( iam.EQ.0 )
178  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
179  ierr( 1 ) = 1
180  ELSE IF( npcol.LT.1 ) THEN
181  IF( iam.EQ.0 )
182  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
183  ierr( 1 ) = 1
184  ELSE IF( nprow*npcol.GT.nprocs ) THEN
185  IF( iam.EQ.0 )
186  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187  ierr( 1 ) = 1
188  END IF
189 *
190  IF( ierr( 1 ).GT.0 ) THEN
191  IF( iam.EQ.0 )
192  $ WRITE( nout, fmt = 9997 ) 'grid'
193  kskip = kskip + 1
194  GO TO 50
195  END IF
196 *
197 * Define process grid
198 *
199  CALL blacs_get( -1, 0, ictxt )
200  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
201 *
202 *
203 * Define transpose process grid
204 *
205  CALL blacs_get( -1, 0, ictxtb )
206  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
207 *
208 * Go to bottom of process grid loop if this case doesn't use my
209 * process
210 *
211  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 *
213  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
214  GO TO 50
215  ENDIF
216 *
217  DO 40 j = 1, nmat
218 *
219  ierr( 1 ) = 0
220 *
221  n = nval( j )
222 *
223 * Make sure matrix information is correct
224 *
225  IF( n.LT.1 ) THEN
226  IF( iam.EQ.0 )
227  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
228  ierr( 1 ) = 1
229  END IF
230 *
231 * Check all processes for an error
232 *
233  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
234  $ -1, 0 )
235 *
236  IF( ierr( 1 ).GT.0 ) THEN
237  IF( iam.EQ.0 )
238  $ WRITE( nout, fmt = 9997 ) 'size'
239  kskip = kskip + 1
240  GO TO 40
241  END IF
242 *
243 *
244  DO 45 bw_num = 1, nbw
245 *
246  ierr( 1 ) = 0
247 *
248  bwl = bwlval( bw_num )
249  IF( bwl.LT.1 ) THEN
250  IF( iam.EQ.0 )
251  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
252  ierr( 1 ) = 1
253  END IF
254 *
255  bwu = bwuval( bw_num )
256  IF( bwu.LT.1 ) THEN
257  IF( iam.EQ.0 )
258  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
259  ierr( 1 ) = 1
260  END IF
261 *
262  IF( bwl.GT.n-1 ) THEN
263  IF( iam.EQ.0 ) THEN
264  ierr( 1 ) = 1
265  ENDIF
266  END IF
267 *
268  IF( bwu.GT.n-1 ) THEN
269  IF( iam.EQ.0 ) THEN
270  ierr( 1 ) = 1
271  ENDIF
272  END IF
273 *
274 * Check all processes for an error
275 *
276  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
277  $ -1, 0 )
278 *
279  IF( ierr( 1 ).GT.0 ) THEN
280  kskip = kskip + 1
281  GO TO 45
282  END IF
283 *
284  DO 30 k = 1, nnb
285 *
286  ierr( 1 ) = 0
287 *
288  nb = nbval( k )
289  IF( nb.LT.0 ) THEN
290  nb =( (n-(npcol-1)*max(bwl,bwu)-1)/npcol + 1 )
291  $ + max(bwl,bwu)
292  nb = max( nb, 2*max(bwl,bwu) )
293  nb = min( n, nb )
294  END IF
295 *
296 * Make sure NB is legal
297 *
298  ierr( 1 ) = 0
299  IF( nb.LT.min( 2*max(bwl,bwu), n ) ) THEN
300  ierr( 1 ) = 1
301  END IF
302 *
303 * Check all processes for an error
304 *
305  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
306  $ -1, 0 )
307 *
308  IF( ierr( 1 ).GT.0 ) THEN
309  kskip = kskip + 1
310  GO TO 30
311  END IF
312 *
313 * Padding constants
314 *
315  np = numroc( (bwl+bwu+1), (bwl+bwu+1),
316  $ myrow, 0, nprow )
317  nq = numroc( n, nb, mycol, 0, npcol )
318 *
319  IF( check ) THEN
320  iprepad = ((bwl+bwu+1)+10)
321  imidpad = 10
322  ipostpad = ((bwl+bwu+1)+10)
323  ELSE
324  iprepad = 0
325  imidpad = 0
326  ipostpad = 0
327  END IF
328 *
329 * Initialize the array descriptor for the matrix A
330 *
331  CALL descinit( desca2d, (bwl+bwu+1), n,
332  $ (bwl+bwu+1), nb, 0, 0,
333  $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
334 *
335 * Convert this to 1D descriptor
336 *
337  desca( 1 ) = 501
338  desca( 3 ) = n
339  desca( 4 ) = nb
340  desca( 5 ) = 0
341  desca( 2 ) = ictxt
342  desca( 6 ) = ((bwl+bwu+1)+10)
343  desca( 7 ) = 0
344 *
345  ierr_temp = ierr( 1 )
346  ierr( 1 ) = 0
347  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
348 *
349 * Check all processes for an error
350 *
351  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
352 *
353  IF( ierr( 1 ).LT.0 ) THEN
354  IF( iam.EQ.0 )
355  $ WRITE( nout, fmt = 9997 ) 'descriptor'
356  kskip = kskip + 1
357  GO TO 30
358  END IF
359 *
360 * Assign pointers into MEM for SCALAPACK arrays, A is
361 * allocated starting at position MEM( IPREPAD+1 )
362 *
363  free_ptr = 1
364  ipb = 0
365 *
366 * Save room for prepadding
367  free_ptr = free_ptr + iprepad
368 *
369  ipa = free_ptr
370  free_ptr = free_ptr + desca2d( lld_ )*
371  $ desca2d( nb_ )
372  $ + ipostpad
373 *
374 * Add memory for fillin
375 * Fillin space needs to store:
376 * Fillin spike:
377 * Contribution to previous proc's diagonal block of
378 * reduced system:
379 * Off-diagonal block of reduced system:
380 * Diagonal block of reduced system:
381 *
382  fillin_size =
383  $ nb*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
384 *
385 * Claim memory for fillin
386 *
387  free_ptr = free_ptr + iprepad
388  ip_fillin = free_ptr
389  free_ptr = free_ptr + fillin_size
390 *
391 * Workspace needed by computational routines:
392 *
393  ipw_size = 0
394 *
395 * factorization:
396 *
397  ipw_size = max(bwl,bwu)*max(bwl,bwu)
398 *
399 * Claim memory for IPW
400 *
401  ipw = free_ptr
402  free_ptr = free_ptr + ipw_size
403 *
404 * Check for adequate memory for problem size
405 *
406  ierr( 1 ) = 0
407  IF( free_ptr.GT.memsiz ) THEN
408  IF( iam.EQ.0 )
409  $ WRITE( nout, fmt = 9996 )
410  $ 'divide and conquer factorization',
411  $ (free_ptr )*zplxsz
412  ierr( 1 ) = 1
413  END IF
414 *
415 * Check all processes for an error
416 *
417  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
418  $ 1, -1, 0 )
419 *
420  IF( ierr( 1 ).GT.0 ) THEN
421  IF( iam.EQ.0 )
422  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
423  kskip = kskip + 1
424  GO TO 30
425  END IF
426 *
427 * Worksize needed for LAPRNT
428  worksiz = max( ((bwl+bwu+1)+10), nb )
429 *
430  IF( check ) THEN
431 *
432 * Calculate the amount of workspace required by
433 * the checking routines.
434 *
435 * PZLANGE
436  worksiz = max( worksiz, desca2d( nb_ ) )
437 *
438 * PZDBLASCHK
439  worksiz = max( worksiz,
440  $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
441  END IF
442 *
443  free_ptr = free_ptr + iprepad
444  ip_driver_w = free_ptr
445  free_ptr = free_ptr + worksiz + ipostpad
446 *
447 *
448 * Check for adequate memory for problem size
449 *
450  ierr( 1 ) = 0
451  IF( free_ptr.GT.memsiz ) THEN
452  IF( iam.EQ.0 )
453  $ WRITE( nout, fmt = 9996 ) 'factorization',
454  $ ( free_ptr )*zplxsz
455  ierr( 1 ) = 1
456  END IF
457 *
458 * Check all processes for an error
459 *
460  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
461  $ 1, -1, 0 )
462 *
463  IF( ierr( 1 ).GT.0 ) THEN
464  IF( iam.EQ.0 )
465  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
466  kskip = kskip + 1
467  GO TO 30
468  END IF
469 *
470  CALL pzbmatgen( ictxt, 'G', 'D', bwl, bwu, n,
471  $ (bwl+bwu+1), nb, mem( ipa ),
472  $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
473  $ mycol, nprow, npcol )
474 *
475  CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
476  $ ((bwl+bwu+1)+10), iprepad, ipostpad,
477  $ padval )
478 *
479  CALL pzfillpad( ictxt, worksiz, 1,
480  $ mem( ip_driver_w-iprepad ), worksiz,
481  $ iprepad, ipostpad, padval )
482 *
483 * Calculate norm of A for residual error-checking
484 *
485  IF( check ) THEN
486 *
487  anorm = pzlange( '1', (bwl+bwu+1),
488  $ n, mem( ipa ), 1, 1,
489  $ desca2d, mem( ip_driver_w ) )
490  CALL pzchekpad( ictxt, 'PZLANGE', np, nq,
491  $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
492  $ iprepad, ipostpad, padval )
493  CALL pzchekpad( ictxt, 'PZLANGE',
494  $ worksiz, 1,
495  $ mem( ip_driver_w-iprepad ), worksiz,
496  $ iprepad, ipostpad, padval )
497  END IF
498 *
499 *
500  CALL slboot()
501  CALL blacs_barrier( ictxt, 'All' )
502 *
503 * Perform factorization
504 *
505  CALL sltimer( 1 )
506 *
507  CALL pzdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
508  $ mem( ip_fillin ), fillin_size, mem( ipw ),
509  $ ipw_size, info )
510 *
511  CALL sltimer( 1 )
512 *
513  IF( info.NE.0 ) THEN
514  IF( iam.EQ.0 ) THEN
515  WRITE( nout, fmt = * ) 'PZDBTRF INFO=', info
516  ENDIF
517  kfail = kfail + 1
518  GO TO 30
519  END IF
520 *
521  IF( check ) THEN
522 *
523 * Check for memory overwrite in factorization
524 *
525  CALL pzchekpad( ictxt, 'PZDBTRF', np,
526  $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
527  $ iprepad, ipostpad, padval )
528  END IF
529 *
530 *
531 * Loop over the different values for NRHS
532 *
533  DO 20 hh = 1, nnr
534 *
535  ierr( 1 ) = 0
536 *
537  nrhs = nrval( hh )
538 *
539 * Initialize Array Descriptor for rhs
540 *
541  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
542  $ ictxtb, nb+10, ierr( 1 ) )
543 *
544 * Convert this to 1D descriptor
545 *
546  descb( 1 ) = 502
547  descb( 3 ) = n
548  descb( 4 ) = nb
549  descb( 5 ) = 0
550  descb( 2 ) = ictxt
551  descb( 6 ) = descb2d( lld_ )
552  descb( 7 ) = 0
553 *
554 * reset free_ptr to reuse space for right hand sides
555 *
556  IF( ipb .GT. 0 ) THEN
557  free_ptr = ipb
558  ENDIF
559 *
560  free_ptr = free_ptr + iprepad
561  ipb = free_ptr
562  free_ptr = free_ptr + nrhs*descb2d( lld_ )
563  $ + ipostpad
564 *
565 * Allocate workspace for workspace in TRS routine:
566 *
567  ipw_solve_size = (max(bwl,bwu)*nrhs)
568 *
569  ipw_solve = free_ptr
570  free_ptr = free_ptr + ipw_solve_size
571 *
572  ierr( 1 ) = 0
573  IF( free_ptr.GT.memsiz ) THEN
574  IF( iam.EQ.0 )
575  $ WRITE( nout, fmt = 9996 )'solve',
576  $ ( free_ptr )*zplxsz
577  ierr( 1 ) = 1
578  END IF
579 *
580 * Check all processes for an error
581 *
582  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
583  $ ierr, 1, -1, 0 )
584 *
585  IF( ierr( 1 ).GT.0 ) THEN
586  IF( iam.EQ.0 )
587  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
588  kskip = kskip + 1
589  GO TO 15
590  END IF
591 *
592  myrhs_size = numroc( n, nb, mycol, 0, npcol )
593 *
594 * Generate RHS
595 *
596  CALL pzmatgen(ictxtb, 'No', 'No',
597  $ descb2d( m_ ), descb2d( n_ ),
598  $ descb2d( mb_ ), descb2d( nb_ ),
599  $ mem( ipb ),
600  $ descb2d( lld_ ), descb2d( rsrc_ ),
601  $ descb2d( csrc_ ),
602  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
603  $ myrow, npcol, nprow )
604 *
605  IF( check ) THEN
606  CALL pzfillpad( ictxtb, nb, nrhs,
607  $ mem( ipb-iprepad ),
608  $ descb2d( lld_ ),
609  $ iprepad, ipostpad,
610  $ padval )
611  CALL pzfillpad( ictxt, worksiz, 1,
612  $ mem( ip_driver_w-iprepad ),
613  $ worksiz, iprepad,
614  $ ipostpad, padval )
615  END IF
616 *
617 *
618  CALL blacs_barrier( ictxt, 'All')
619  CALL sltimer( 2 )
620 *
621 * Solve linear system via factorization
622 *
623  CALL pzdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
624  $ 1, desca, mem( ipb ), 1, descb,
625  $ mem( ip_fillin ), fillin_size,
626  $ mem( ipw_solve ), ipw_solve_size,
627  $ info )
628 *
629  CALL sltimer( 2 )
630 *
631  IF( info.NE.0 ) THEN
632  IF( iam.EQ.0 )
633  $ WRITE( nout, fmt = * ) 'PZDBTRS INFO=', info
634  kfail = kfail + 1
635  passed = 'FAILED'
636  GO TO 20
637  END IF
638 *
639  IF( check ) THEN
640 *
641 * check for memory overwrite
642 *
643  CALL pzchekpad( ictxt, 'PZDBTRS-work',
644  $ worksiz, 1,
645  $ mem( ip_driver_w-iprepad ),
646  $ worksiz, iprepad,
647  $ ipostpad, padval )
648 *
649 * check the solution to rhs
650 *
651  sresid = zero
652 *
653  CALL pzdblaschk( 'N', 'D', trans,
654  $ n, bwl, bwu, nrhs,
655  $ mem( ipb ), 1, 1, descb2d,
656  $ iaseed, mem( ipa ), 1, 1, desca2d,
657  $ ibseed, anorm, sresid,
658  $ mem( ip_driver_w ), worksiz )
659 *
660  IF( iam.EQ.0 ) THEN
661  IF( sresid.GT.thresh )
662  $ WRITE( nout, fmt = 9985 ) sresid
663  END IF
664 *
665 * The second test is a NaN trap
666 *
667  IF( ( sresid.LE.thresh ).AND.
668  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
669  kpass = kpass + 1
670  passed = 'PASSED'
671  ELSE
672  kfail = kfail + 1
673  passed = 'FAILED'
674  END IF
675 *
676  END IF
677 *
678  15 CONTINUE
679 * Skipped tests jump to here to print out "SKIPPED"
680 *
681 * Gather maximum of all CPU and WALL clock timings
682 *
683  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
684  $ wtime )
685  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
686  $ ctime )
687 *
688 * Print results
689 *
690  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
691 *
692  nops = 0
693  nops2 = 0
694 *
695  n_first = nb
696  nprocs_real = ( n-1 )/nb + 1
697  n_last = mod( n-1, nb ) + 1
698 *
699 * 2 N bwl bwu + N (bwl) flops
700 * for LU factorization
701 *
702  nops = 2*(dble(n)*dble(bwl)*
703  $ dble(bwu)) +
704  $ (dble(n)*dble(bwl))
705 *
706 * nrhs * 2 N*(bwl+bwu) flops for LU solve.
707 *
708  nops = nops +
709  $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
710  $ *dble(nrhs))
711 *
712 * Multiply by 4 to get complex count
713 *
714  nops = nops * dble(4)
715 *
716 * Second calc to represent actual hardware speed
717 *
718 * 2*N_FIRST bwl*bwu Flops for LU
719 * factorization in proc 1
720 *
721  nops2 = 2*( (dble(n_first)*
722  $ dble(bwl)*dble(bwu)))
723 *
724  IF ( nprocs_real .GT. 1) THEN
725 * 8 N_LAST bwl*bwu
726 * flops for LU and spike
727 * calc in last processor
728 *
729  nops2 = nops2 +
730  $ 8*( (dble(n_last)*dble(bwl)
731  $ *dble(bwu)) )
732  ENDIF
733 *
734  IF ( nprocs_real .GT. 2) THEN
735 * 8 NB bwl*bwu flops for LU and spike
736 * calc in other processors
737 *
738  nops2 = nops2 + (nprocs_real-2)*
739  $ 8*( (dble(nb)*dble(bwl)
740  $ *dble(bwu)) )
741  ENDIF
742 *
743 * Reduced system
744 *
745  nops2 = nops2 +
746  $ 2*( nprocs_real-1 ) *
747  $ ( bwl*bwu*bwl/3 )
748  IF( nprocs_real .GT. 1 ) THEN
749  nops2 = nops2 +
750  $ 2*( nprocs_real-2 ) *
751  $ (2*bwl*bwu*bwl)
752  ENDIF
753 *
754 * Solve stage
755 *
756 * nrhs*2 n_first*
757 * (bwl+bwu)
758 * flops for L,U solve in proc 1.
759 *
760  nops2 = nops2 +
761  $ 2*
762  $ dble(n_first)*
763  $ dble(nrhs) *
764  $ ( dble(bwl)+dble(bwu))
765 *
766  IF ( nprocs_real .GT. 1 ) THEN
767 *
768 * 2*nrhs*2 n_last
769 * (bwl+bwu)
770 * flops for LU solve in other procs
771 *
772  nops2 = nops2 +
773  $ 4*
774  $ (dble(n_last)*(dble(bwl)+
775  $ dble(bwu)))*dble(nrhs)
776  ENDIF
777 *
778  IF ( nprocs_real .GT. 2 ) THEN
779 *
780 * 2*nrhs*2 NB
781 * (bwl+bwu)
782 * flops for LU solve in other procs
783 *
784  nops2 = nops2 +
785  $ ( nprocs_real-2)*2*
786  $ ( (dble(nb)*(dble(bwl)+
787  $ dble(bwu)))*dble(nrhs) )
788  ENDIF
789 *
790 * Reduced system
791 *
792  nops2 = nops2 +
793  $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
794  IF( nprocs_real .GT. 1 ) THEN
795  nops2 = nops2 +
796  $ nrhs*( nprocs_real-2 ) *
797  $ ( 6 * bwl*bwu )
798  ENDIF
799 *
800 *
801 * Multiply by 4 to get complex count
802 *
803  nops2 = nops2 * dble(4)
804 *
805 * Calculate total megaflops - factorization and/or
806 * solve -- for WALL and CPU time, and print output
807 *
808 * Print WALL time if machine supports it
809 *
810  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
811  tmflops = nops /
812  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
813  ELSE
814  tmflops = 0.0d+0
815  END IF
816 *
817  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
818  tmflops2 = nops2 /
819  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
820  ELSE
821  tmflops2 = 0.0d+0
822  END IF
823 *
824  IF( wtime( 2 ).GE.0.0d+0 )
825  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
826  $ n,
827  $ bwl, bwu,
828  $ nb, nrhs, nprow, npcol,
829  $ wtime( 1 ), wtime( 2 ), tmflops,
830  $ tmflops2, passed
831 *
832 * Print CPU time if machine supports it
833 *
834  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
835  tmflops = nops /
836  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
837  ELSE
838  tmflops = 0.0d+0
839  END IF
840 *
841  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
842  tmflops2 = nops2 /
843  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
844  ELSE
845  tmflops2 = 0.0d+0
846  END IF
847 *
848  IF( ctime( 2 ).GE.0.0d+0 )
849  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
850  $ n,
851  $ bwl, bwu,
852  $ nb, nrhs, nprow, npcol,
853  $ ctime( 1 ), ctime( 2 ), tmflops,
854  $ tmflops2, passed
855 *
856  END IF
857  20 CONTINUE
858 *
859 *
860  30 CONTINUE
861 * NNB loop
862 *
863  45 CONTINUE
864 * BW[] loop
865 *
866  40 CONTINUE
867 * NMAT loop
868 *
869  CALL blacs_gridexit( ictxt )
870  CALL blacs_gridexit( ictxtb )
871 *
872  50 CONTINUE
873 * NGRIDS DROPOUT
874  60 CONTINUE
875 * NGRIDS loop
876 *
877 * Print ending messages and close output file
878 *
879  IF( iam.EQ.0 ) THEN
880  ktests = kpass + kfail + kskip
881  WRITE( nout, fmt = * )
882  WRITE( nout, fmt = 9992 ) ktests
883  IF( check ) THEN
884  WRITE( nout, fmt = 9991 ) kpass
885  WRITE( nout, fmt = 9989 ) kfail
886  ELSE
887  WRITE( nout, fmt = 9990 ) kpass
888  END IF
889  WRITE( nout, fmt = 9988 ) kskip
890  WRITE( nout, fmt = * )
891  WRITE( nout, fmt = * )
892  WRITE( nout, fmt = 9987 )
893  IF( nout.NE.6 .AND. nout.NE.0 )
894  $ CLOSE ( nout )
895  END IF
896 *
897  CALL blacs_exit( 0 )
898 *
899  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
900  $ '; It should be at least 1' )
901  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
902  $ i4 )
903  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
904  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
905  $ i11 )
906  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
907  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
908  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
909  $ '-------- -------- -------- ------' )
910  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
911  $ 1x,i4,1x,i4,1x,f9.3,
912  $ f9.4, f9.2, f9.2, 1x, a6 )
913  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
914  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
915  9990 FORMAT( i5, ' tests completed without checking.' )
916  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
917  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
918  9987 FORMAT( 'END OF TESTS.' )
919  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
920  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
921 *
922  stop
923 *
924 * End of PZDBTRS_DRIVER
925 *
926  END
927 *
max
#define max(A, B)
Definition: pcgemr.c:180
pzdbdriver
program pzdbdriver
Definition: pzdbdriver.f:1
pzdblaschk
subroutine pzdblaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pzdblaschk.f:4
pzdbtrf
subroutine pzdbtrf(N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pzdbtrf.f:3
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pzdbinfo
subroutine pzdbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pzdbinfo.f:6
pzbmatgen
subroutine pzbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzbmatgen.f:5
pzmatgen
subroutine pzmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pzdbtrs
subroutine pzdbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pzdbtrs.f:3
pzchekpad
subroutine pzchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzchekpad.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
pzfillpad
subroutine pzfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzfillpad.f:2