ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcgbdriver.f
Go to the documentation of this file.
1  PROGRAM pcgbdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PCGBDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by CGB. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTMEM INTEGER, default = 2048.
55 * INTMEM is the size of the integer workspace used in this
56 * driver as input as the IPIV vector. It represents an
57 * upper bound on NB, the blocksize of the data
58 * distribution.
59 *
60 * INTGSZ INTEGER, default = 4 bytes.
61 * CPLXSZ INTEGER, default = 8 bytes.
62 * INTGSZ and CPLXSZ indicate the length in bytes on the
63 * given platform for an integer and a single precision
64 * complex.
65 * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
66 * All arrays used by ScaLAPACK routines are allocated from
67 * this array and referenced by pointers. The integer IPB,
68 * for example, is a pointer to the starting element of MEM for
69 * the solution vector(s) B.
70 *
71 * =====================================================================
72 *
73 * Code Developer: Andrew J. Cleary, University of Tennessee.
74 * Current address: Lawrence Livermore National Labs.
75 * This version released: August, 2001.
76 *
77 * =====================================================================
78 *
79 * .. Parameters ..
80  INTEGER totmem
81  parameter( totmem = 3000000 )
82  INTEGER intmem
83  parameter( intmem = 2048 )
84  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
85  $ lld_, mb_, m_, nb_, n_, rsrc_
86  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
87  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
88  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
89 *
90  REAL zero
91  INTEGER cplxsz, memsiz, ntests
92  COMPLEX padval
93  parameter( cplxsz = 8,
94  $ memsiz = totmem / cplxsz, ntests = 20,
95  $ padval = ( -9923.0e+0, -9923.0e+0 ),
96  $ zero = 0.0e+0 )
97  INTEGER int_one
98  parameter( int_one = 1 )
99 * ..
100 * .. Local Scalars ..
101  LOGICAL check
102  CHARACTER trans
103  CHARACTER*6 passed
104  CHARACTER*80 outfile
105  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
106  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
107  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
108  $ iprepad, ipw, ipw_size, ipw_solve,
109  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
110  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
111  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
112  $ nnr, nout, np, npcol, nprocs, nprocs_real,
113  $ nprow, nq, nrhs, n_first, n_last, worksiz
114  REAL anorm, sresid, thresh
115  DOUBLE PRECISION nops, nops2, tmflops, tmflops2
116 * ..
117 * .. Local Arrays ..
118  INTEGER ipiv(intmem)
119  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
120  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
121  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
122  $ nrval( ntests ), nval( ntests ),
123  $ pval( ntests ), qval( ntests )
124  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
125  COMPLEX mem( memsiz )
126 * ..
127 * .. External Subroutines ..
128  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
129  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
130  $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
133  $ sltimer
134 * ..
135 * .. External Functions ..
136  INTEGER numroc
137  LOGICAL lsame
138  REAL pclange
139  EXTERNAL lsame, numroc, pclange
140 * ..
141 * .. Intrinsic Functions ..
142  INTRINSIC dble, max, min, mod
143 * ..
144 * .. Data Statements ..
145  DATA kfail, kpass, kskip, ktests / 4*0 /
146 * ..
147 *
148 *
149 *
150 * .. Executable Statements ..
151 *
152 * Get starting information
153 *
154  CALL blacs_pinfo( iam, nprocs )
155  iaseed = 100
156  ibseed = 200
157 *
158  CALL pcgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
159  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
160  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
161  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
162 *
163  check = ( thresh.GE.0.0e+0 )
164 *
165 * Print headings
166 *
167  IF( iam.EQ.0 ) THEN
168  WRITE( nout, fmt = * )
169  WRITE( nout, fmt = 9995 )
170  WRITE( nout, fmt = 9994 )
171  WRITE( nout, fmt = * )
172  END IF
173 *
174 * Loop over different process grids
175 *
176  DO 60 i = 1, ngrids
177 *
178  nprow = pval( i )
179  npcol = qval( i )
180 *
181 * Make sure grid information is correct
182 *
183  ierr( 1 ) = 0
184  IF( nprow.LT.1 ) THEN
185  IF( iam.EQ.0 )
186  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
187  ierr( 1 ) = 1
188  ELSE IF( npcol.LT.1 ) THEN
189  IF( iam.EQ.0 )
190  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
191  ierr( 1 ) = 1
192  ELSE IF( nprow*npcol.GT.nprocs ) THEN
193  IF( iam.EQ.0 )
194  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
195  ierr( 1 ) = 1
196  END IF
197 *
198  IF( ierr( 1 ).GT.0 ) THEN
199  IF( iam.EQ.0 )
200  $ WRITE( nout, fmt = 9997 ) 'grid'
201  kskip = kskip + 1
202  GO TO 50
203  END IF
204 *
205 * Define process grid
206 *
207  CALL blacs_get( -1, 0, ictxt )
208  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
209 *
210 *
211 * Define transpose process grid
212 *
213  CALL blacs_get( -1, 0, ictxtb )
214  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
215 *
216 * Go to bottom of process grid loop if this case doesn't use my
217 * process
218 *
219  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
220 *
221  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
222  GO TO 50
223  ENDIF
224 *
225  DO 40 j = 1, nmat
226 *
227  ierr( 1 ) = 0
228 *
229  n = nval( j )
230 *
231 * Make sure matrix information is correct
232 *
233  IF( n.LT.1 ) THEN
234  IF( iam.EQ.0 )
235  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
236  ierr( 1 ) = 1
237  END IF
238 *
239 * Check all processes for an error
240 *
241  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
242  $ -1, 0 )
243 *
244  IF( ierr( 1 ).GT.0 ) THEN
245  IF( iam.EQ.0 )
246  $ WRITE( nout, fmt = 9997 ) 'size'
247  kskip = kskip + 1
248  GO TO 40
249  END IF
250 *
251 *
252  DO 45 bw_num = 1, nbw
253 *
254  ierr( 1 ) = 0
255 *
256  bwl = bwlval( bw_num )
257  IF( bwl.LT.1 ) THEN
258  IF( iam.EQ.0 )
259  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
260  ierr( 1 ) = 1
261  END IF
262 *
263  bwu = bwuval( bw_num )
264  IF( bwu.LT.1 ) THEN
265  IF( iam.EQ.0 )
266  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
267  ierr( 1 ) = 1
268  END IF
269 *
270  IF( bwl.GT.n-1 ) THEN
271  IF( iam.EQ.0 ) THEN
272  ierr( 1 ) = 1
273  ENDIF
274  END IF
275 *
276  IF( bwu.GT.n-1 ) THEN
277  IF( iam.EQ.0 ) THEN
278  ierr( 1 ) = 1
279  ENDIF
280  END IF
281 *
282 * Check all processes for an error
283 *
284  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
285  $ -1, 0 )
286 *
287  IF( ierr( 1 ).GT.0 ) THEN
288  kskip = kskip + 1
289  GO TO 45
290  END IF
291 *
292  DO 30 k = 1, nnb
293 *
294  ierr( 1 ) = 0
295 *
296  nb = nbval( k )
297  IF( nb.LT.0 ) THEN
298  nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
299  $ + (bwl+bwu)
300  nb = max( nb, 2*(bwl+bwu) )
301  nb = min( n, nb )
302  END IF
303 *
304 * Make sure NB is legal
305 *
306  ierr( 1 ) = 0
307 *
308  IF( nb.GT.intmem ) THEN
309  ierr( 1 ) = 1
310  IF( iam.EQ.0 ) THEN
311  WRITE( nout,* )'You have chosen an '
312  $ ,'NB > INTMEM in the driver.'
313  WRITE(nout, *)'Please edit the driver '
314  $ ,'and increase the value of INTMEM'
315  ENDIF
316  END IF
317 *
318 * Check all processes for an error
319 *
320  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
321  $ -1, 0 )
322 *
323  IF( ierr( 1 ).GT.0 ) THEN
324  kskip = kskip + 1
325  GO TO 30
326  END IF
327 *
328 * Padding constants
329 *
330  np = numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
331  $ myrow, 0, nprow )
332  nq = numroc( n, nb, mycol, 0, npcol )
333 *
334  IF( check ) THEN
335  iprepad = ((2*bwl+2*bwu+1)+10)
336  imidpad = 10
337  ipostpad = ((2*bwl+2*bwu+1)+10)
338  ELSE
339  iprepad = 0
340  imidpad = 0
341  ipostpad = 0
342  END IF
343 *
344 * Initialize the array descriptor for the matrix A
345 *
346  CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
347  $ (2*bwl+2*bwu+1), nb, 0, 0,
348  $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
349 *
350 * Convert this to 1D descriptor
351 *
352  desca( 1 ) = 501
353  desca( 3 ) = n
354  desca( 4 ) = nb
355  desca( 5 ) = 0
356  desca( 2 ) = ictxt
357  desca( 6 ) = ((2*bwl+2*bwu+1)+10)
358  desca( 7 ) = 0
359 *
360  ierr_temp = ierr( 1 )
361  ierr( 1 ) = 0
362  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
363 *
364 * Check all processes for an error
365 *
366  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
367 *
368  IF( ierr( 1 ).LT.0 ) THEN
369  IF( iam.EQ.0 )
370  $ WRITE( nout, fmt = 9997 ) 'descriptor'
371  kskip = kskip + 1
372  GO TO 30
373  END IF
374 *
375 * Assign pointers into MEM for SCALAPACK arrays, A is
376 * allocated starting at position MEM( IPREPAD+1 )
377 *
378  free_ptr = 1
379  ipb = 0
380 *
381 * Save room for prepadding
382  free_ptr = free_ptr + iprepad
383 *
384  ipa = free_ptr
385  free_ptr = free_ptr + desca2d( lld_ )*
386  $ desca2d( nb_ )
387  $ + ipostpad
388 *
389 * Add memory for fillin
390 * Fillin space needs to store:
391 * Fillin spike:
392 * Contribution to previous proc's diagonal block of
393 * reduced system:
394 * Off-diagonal block of reduced system:
395 * Diagonal block of reduced system:
396 *
397  fillin_size =
398  $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
399 *
400 * Claim memory for fillin
401 *
402  free_ptr = free_ptr + iprepad
403  ip_fillin = free_ptr
404  free_ptr = free_ptr + fillin_size
405 *
406 * Workspace needed by computational routines:
407 *
408  ipw_size = 0
409 *
410 * factorization:
411 *
412  ipw_size = 1
413 *
414 * Claim memory for IPW
415 *
416  ipw = free_ptr
417  free_ptr = free_ptr + ipw_size
418 *
419 * Check for adequate memory for problem size
420 *
421  ierr( 1 ) = 0
422  IF( free_ptr.GT.memsiz ) THEN
423  IF( iam.EQ.0 )
424  $ WRITE( nout, fmt = 9996 )
425  $ 'divide and conquer factorization',
426  $ (free_ptr )*cplxsz
427  ierr( 1 ) = 1
428  END IF
429 *
430 * Check all processes for an error
431 *
432  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
433  $ 1, -1, 0 )
434 *
435  IF( ierr( 1 ).GT.0 ) THEN
436  IF( iam.EQ.0 )
437  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
438  kskip = kskip + 1
439  GO TO 30
440  END IF
441 *
442 * Worksize needed for LAPRNT
443  worksiz = max( ((2*bwl+2*bwu+1)+10), nb )
444 *
445  IF( check ) THEN
446 *
447 * Calculate the amount of workspace required by
448 * the checking routines.
449 *
450 * PCLANGE
451  worksiz = max( worksiz, desca2d( nb_ ) )
452 *
453 * PCDBLASCHK
454  worksiz = max( worksiz,
455  $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
456  END IF
457 *
458  free_ptr = free_ptr + iprepad
459  ip_driver_w = free_ptr
460  free_ptr = free_ptr + worksiz + ipostpad
461 *
462 *
463 * Check for adequate memory for problem size
464 *
465  ierr( 1 ) = 0
466  IF( free_ptr.GT.memsiz ) THEN
467  IF( iam.EQ.0 )
468  $ WRITE( nout, fmt = 9996 ) 'factorization',
469  $ ( free_ptr )*cplxsz
470  ierr( 1 ) = 1
471  END IF
472 *
473 * Check all processes for an error
474 *
475  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
476  $ 1, -1, 0 )
477 *
478  IF( ierr( 1 ).GT.0 ) THEN
479  IF( iam.EQ.0 )
480  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
481  kskip = kskip + 1
482  GO TO 30
483  END IF
484 *
485  CALL pcbmatgen( ictxt, 'G', 'N', bwl, bwu, n,
486  $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
487  $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
488  $ myrow, mycol, nprow, npcol )
489 *
490  CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
491  $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
492  $ padval )
493 *
494  CALL pcfillpad( ictxt, worksiz, 1,
495  $ mem( ip_driver_w-iprepad ), worksiz,
496  $ iprepad, ipostpad, padval )
497 *
498 * Calculate norm of A for residual error-checking
499 *
500  IF( check ) THEN
501 *
502  anorm = pclange( '1', (2*bwl+2*bwu+1),
503  $ n, mem( ipa ), 1, 1,
504  $ desca2d, mem( ip_driver_w ) )
505  CALL pcchekpad( ictxt, 'PCLANGE', np, nq,
506  $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
507  $ iprepad, ipostpad, padval )
508  CALL pcchekpad( ictxt, 'PCLANGE',
509  $ worksiz, 1,
510  $ mem( ip_driver_w-iprepad ), worksiz,
511  $ iprepad, ipostpad, padval )
512  END IF
513 *
514 *
515  CALL slboot()
516  CALL blacs_barrier( ictxt, 'All' )
517 *
518 * Perform factorization
519 *
520  CALL sltimer( 1 )
521 *
522  CALL pcgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
523  $ mem( ip_fillin ), fillin_size, mem( ipw ),
524  $ ipw_size, info )
525 *
526  CALL sltimer( 1 )
527 *
528  IF( info.NE.0 ) THEN
529  IF( iam.EQ.0 ) THEN
530  WRITE( nout, fmt = * ) 'PCGBTRF INFO=', info
531  ENDIF
532  kfail = kfail + 1
533  GO TO 30
534  END IF
535 *
536  IF( check ) THEN
537 *
538 * Check for memory overwrite in factorization
539 *
540  CALL pcchekpad( ictxt, 'PCGBTRF', np,
541  $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
542  $ iprepad, ipostpad, padval )
543  END IF
544 *
545 *
546 * Loop over the different values for NRHS
547 *
548  DO 20 hh = 1, nnr
549 *
550  ierr( 1 ) = 0
551 *
552  nrhs = nrval( hh )
553 *
554 * Initialize Array Descriptor for rhs
555 *
556  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
557  $ ictxtb, nb+10, ierr( 1 ) )
558 *
559 * Convert this to 1D descriptor
560 *
561  descb( 1 ) = 502
562  descb( 3 ) = n
563  descb( 4 ) = nb
564  descb( 5 ) = 0
565  descb( 2 ) = ictxt
566  descb( 6 ) = descb2d( lld_ )
567  descb( 7 ) = 0
568 *
569 * reset free_ptr to reuse space for right hand sides
570 *
571  IF( ipb .GT. 0 ) THEN
572  free_ptr = ipb
573  ENDIF
574 *
575  free_ptr = free_ptr + iprepad
576  ipb = free_ptr
577  free_ptr = free_ptr + nrhs*descb2d( lld_ )
578  $ + ipostpad
579 *
580 * Allocate workspace for workspace in TRS routine:
581 *
582  ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
583 *
584  ipw_solve = free_ptr
585  free_ptr = free_ptr + ipw_solve_size
586 *
587  ierr( 1 ) = 0
588  IF( free_ptr.GT.memsiz ) THEN
589  IF( iam.EQ.0 )
590  $ WRITE( nout, fmt = 9996 )'solve',
591  $ ( free_ptr )*cplxsz
592  ierr( 1 ) = 1
593  END IF
594 *
595 * Check all processes for an error
596 *
597  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
598  $ ierr, 1, -1, 0 )
599 *
600  IF( ierr( 1 ).GT.0 ) THEN
601  IF( iam.EQ.0 )
602  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
603  kskip = kskip + 1
604  GO TO 15
605  END IF
606 *
607  myrhs_size = numroc( n, nb, mycol, 0, npcol )
608 *
609 * Generate RHS
610 *
611  CALL pcmatgen(ictxtb, 'No', 'No',
612  $ descb2d( m_ ), descb2d( n_ ),
613  $ descb2d( mb_ ), descb2d( nb_ ),
614  $ mem( ipb ),
615  $ descb2d( lld_ ), descb2d( rsrc_ ),
616  $ descb2d( csrc_ ),
617  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
618  $ myrow, npcol, nprow )
619 *
620  IF( check ) THEN
621  CALL pcfillpad( ictxtb, nb, nrhs,
622  $ mem( ipb-iprepad ),
623  $ descb2d( lld_ ),
624  $ iprepad, ipostpad,
625  $ padval )
626  CALL pcfillpad( ictxt, worksiz, 1,
627  $ mem( ip_driver_w-iprepad ),
628  $ worksiz, iprepad,
629  $ ipostpad, padval )
630  END IF
631 *
632 *
633  CALL blacs_barrier( ictxt, 'All')
634  CALL sltimer( 2 )
635 *
636 * Solve linear system via factorization
637 *
638  CALL pcgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
639  $ 1, desca, ipiv, mem( ipb ), 1, descb,
640  $ mem( ip_fillin ), fillin_size,
641  $ mem( ipw_solve ), ipw_solve_size,
642  $ info )
643 *
644  CALL sltimer( 2 )
645 *
646  IF( info.NE.0 ) THEN
647  IF( iam.EQ.0 )
648  $ WRITE( nout, fmt = * ) 'PCGBTRS INFO=', info
649  kfail = kfail + 1
650  passed = 'FAILED'
651  GO TO 20
652  END IF
653 *
654  IF( check ) THEN
655 *
656 * check for memory overwrite
657 *
658  CALL pcchekpad( ictxt, 'PCGBTRS-work',
659  $ worksiz, 1,
660  $ mem( ip_driver_w-iprepad ),
661  $ worksiz, iprepad,
662  $ ipostpad, padval )
663 *
664 * check the solution to rhs
665 *
666  sresid = zero
667 *
668  CALL pcdblaschk( 'N', 'N', trans,
669  $ n, bwl, bwu, nrhs,
670  $ mem( ipb ), 1, 1, descb2d,
671  $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
672  $ ibseed, anorm, sresid,
673  $ mem( ip_driver_w ), worksiz )
674 *
675  IF( iam.EQ.0 ) THEN
676  IF( sresid.GT.thresh )
677  $ WRITE( nout, fmt = 9985 ) sresid
678  END IF
679 *
680 * The second test is a NaN trap
681 *
682  IF( ( sresid.LE.thresh ).AND.
683  $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
684  kpass = kpass + 1
685  passed = 'PASSED'
686  ELSE
687  kfail = kfail + 1
688  passed = 'FAILED'
689  END IF
690 *
691  END IF
692 *
693  15 CONTINUE
694 * Skipped tests jump to here to print out "SKIPPED"
695 *
696 * Gather maximum of all CPU and WALL clock timings
697 *
698  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
699  $ wtime )
700  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
701  $ ctime )
702 *
703 * Print results
704 *
705  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
706 *
707  nops = 0
708  nops2 = 0
709 *
710  n_first = nb
711  nprocs_real = ( n-1 )/nb + 1
712  n_last = mod( n-1, nb ) + 1
713 *
714 * 2 N bwl (bwl+bwu) + N (bwl) flops
715 * for LU factorization
716 *
717  nops = 2*(dble(n)*dble(bwl)*
718  $ dble((bwl+bwu))) +
719  $ (dble(n)*dble(bwl))
720 *
721 * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
722 *
723  nops = nops +
724  $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
725  $ *dble(nrhs))
726 *
727 * Multiply by 4 to get complex count
728 *
729  nops = nops * dble(4)
730 *
731 * Second calc to represent actual hardware speed
732 *
733 * 2*N_FIRST (bwl+bwu)*bwu Flops for LU
734 * factorization in proc 1
735 *
736  nops2 = 2*( (dble(n_first)*
737  $ dble((bwl+bwu))*dble(bwu)))
738 *
739  IF ( nprocs_real .GT. 1) THEN
740 * 8 N_LAST (bwl+bwu)*(bwl+bwu)
741 * flops for LU and spike
742 * calc in last processor
743 *
744  nops2 = nops2 +
745  $ 8*( (dble(n_last)*dble((bwl+bwu))
746  $ *dble((bwl+bwu))) )
747  ENDIF
748 *
749  IF ( nprocs_real .GT. 2) THEN
750 * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike
751 * calc in other processors
752 *
753  nops2 = nops2 + (nprocs_real-2)*
754  $ 8*( (dble(nb)*dble((bwl+bwu))
755  $ *dble((bwl+bwu))) )
756  ENDIF
757 *
758 * Reduced system
759 *
760  nops2 = nops2 +
761  $ 2*( nprocs_real-1 ) *
762  $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
763  IF( nprocs_real .GT. 1 ) THEN
764  nops2 = nops2 +
765  $ 2*( nprocs_real-2 ) *
766  $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
767  ENDIF
768 *
769 * Solve stage
770 *
771 * nrhs*2 n_first*
772 * (bwl+(bwl+bwu))
773 * flops for L,U solve in proc 1.
774 *
775  nops2 = nops2 +
776  $ 2*
777  $ dble(n_first)*
778  $ dble(nrhs) *
779  $ ( dble(bwl)+dble((bwl+bwu)))
780 *
781  IF ( nprocs_real .GT. 1 ) THEN
782 *
783 * 2*nrhs*2 n_last
784 * ((bwl+bwu)+(bwl+bwu))
785 * flops for LU solve in other procs
786 *
787  nops2 = nops2 +
788  $ 4*
789  $ (dble(n_last)*(dble((bwl+bwu))+
790  $ dble((bwl+bwu))))*dble(nrhs)
791  ENDIF
792 *
793  IF ( nprocs_real .GT. 2 ) THEN
794 *
795 * 2*nrhs*2 NB
796 * ((bwl+bwu)+(bwl+bwu))
797 * flops for LU solve in other procs
798 *
799  nops2 = nops2 +
800  $ ( nprocs_real-2)*2*
801  $ ( (dble(nb)*(dble((bwl+bwu))+
802  $ dble((bwl+bwu))))*dble(nrhs) )
803  ENDIF
804 *
805 * Reduced system
806 *
807  nops2 = nops2 +
808  $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
809  IF( nprocs_real .GT. 1 ) THEN
810  nops2 = nops2 +
811  $ nrhs*( nprocs_real-2 ) *
812  $ ( 6 * (bwl+bwu)*(bwl+bwu) )
813  ENDIF
814 *
815 *
816 * Multiply by 4 to get complex count
817 *
818  nops2 = nops2 * dble(4)
819 *
820 * Calculate total megaflops - factorization and/or
821 * solve -- for WALL and CPU time, and print output
822 *
823 * Print WALL time if machine supports it
824 *
825  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
826  tmflops = nops /
827  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
828  ELSE
829  tmflops = 0.0d+0
830  END IF
831 *
832  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
833  tmflops2 = nops2 /
834  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
835  ELSE
836  tmflops2 = 0.0d+0
837  END IF
838 *
839  IF( wtime( 2 ).GE.0.0d+0 )
840  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
841  $ n,
842  $ bwl, bwu,
843  $ nb, nrhs, nprow, npcol,
844  $ wtime( 1 ), wtime( 2 ), tmflops,
845  $ tmflops2, passed
846 *
847 * Print CPU time if machine supports it
848 *
849  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
850  tmflops = nops /
851  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
852  ELSE
853  tmflops = 0.0d+0
854  END IF
855 *
856  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
857  tmflops2 = nops2 /
858  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
859  ELSE
860  tmflops2 = 0.0d+0
861  END IF
862 *
863  IF( ctime( 2 ).GE.0.0d+0 )
864  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
865  $ n,
866  $ bwl, bwu,
867  $ nb, nrhs, nprow, npcol,
868  $ ctime( 1 ), ctime( 2 ), tmflops,
869  $ tmflops2, passed
870 *
871  END IF
872  20 CONTINUE
873 *
874 *
875  30 CONTINUE
876 * NNB loop
877 *
878  45 CONTINUE
879 * BW[] loop
880 *
881  40 CONTINUE
882 * NMAT loop
883 *
884  CALL blacs_gridexit( ictxt )
885  CALL blacs_gridexit( ictxtb )
886 *
887  50 CONTINUE
888 * NGRIDS DROPOUT
889  60 CONTINUE
890 * NGRIDS loop
891 *
892 * Print ending messages and close output file
893 *
894  IF( iam.EQ.0 ) THEN
895  ktests = kpass + kfail + kskip
896  WRITE( nout, fmt = * )
897  WRITE( nout, fmt = 9992 ) ktests
898  IF( check ) THEN
899  WRITE( nout, fmt = 9991 ) kpass
900  WRITE( nout, fmt = 9989 ) kfail
901  ELSE
902  WRITE( nout, fmt = 9990 ) kpass
903  END IF
904  WRITE( nout, fmt = 9988 ) kskip
905  WRITE( nout, fmt = * )
906  WRITE( nout, fmt = * )
907  WRITE( nout, fmt = 9987 )
908  IF( nout.NE.6 .AND. nout.NE.0 )
909  $ CLOSE ( nout )
910  END IF
911 *
912  CALL blacs_exit( 0 )
913 *
914  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
915  $ '; It should be at least 1' )
916  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
917  $ i4 )
918  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
919  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
920  $ i11 )
921  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
922  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
923  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
924  $ '-------- -------- -------- ------' )
925  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
926  $ 1x,i4,1x,i4,1x,f9.3,
927  $ f9.4, f9.2, f9.2, 1x, a6 )
928  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
929  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
930  9990 FORMAT( i5, ' tests completed without checking.' )
931  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
932  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
933  9987 FORMAT( 'END OF TESTS.' )
934  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
935  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
936 *
937  stop
938 *
939 * End of PCGBTRS_DRIVER
940 *
941  END
942 *
max
#define max(A, B)
Definition: pcgemr.c:180
pcgbinfo
subroutine pcgbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcgbinfo.f:6
pcbmatgen
subroutine pcbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcbmatgen.f:5
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pcgbtrs
subroutine pcgbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pcgbtrs.f:3
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcgbdriver
program pcgbdriver
Definition: pcgbdriver.f:1
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pcgbtrf
subroutine pcgbtrf(N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, WORK, LWORK, INFO)
Definition: pcgbtrf.f:3
min
#define min(A, B)
Definition: pcgemr.c:181
pcdblaschk
subroutine pcdblaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pcdblaschk.f:4