ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcptdriver.f
Go to the documentation of this file.
1  PROGRAM pcptdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PCPTDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by CPT. This test driver performs an
15 * A = L*L**H factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * CPLXSZ INTEGER, default = 8 bytes.
56 * INTGSZ and CPLXSZ indicate the length in bytes on the
57 * given platform for an integer and a single precision
58 * complex.
59 * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60 * All arrays used by ScaLAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPB,
62 * for example, is a pointer to the starting element of MEM for
63 * the solution vector(s) B.
64 *
65 * =====================================================================
66 *
67 * Code Developer: Andrew J. Cleary, University of Tennessee.
68 * Current address: Lawrence Livermore National Labs.
69 * This version released: August, 2001.
70 *
71 * =====================================================================
72 *
73 * .. Parameters ..
74  INTEGER totmem
75  parameter( totmem = 3000000 )
76  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77  $ lld_, mb_, m_, nb_, n_, rsrc_
78  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81 *
82  REAL zero
83  INTEGER cplxsz, memsiz, ntests
84  COMPLEX padval
85  parameter( cplxsz = 8,
86  $ memsiz = totmem / cplxsz, ntests = 20,
87  $ padval = ( -9923.0e+0, -9923.0e+0 ),
88  $ zero = 0.0e+0 )
89  INTEGER int_one
90  parameter( int_one = 1 )
91 * ..
92 * .. Local Scalars ..
93  LOGICAL check
94  CHARACTER uplo
95  CHARACTER*6 passed
96  CHARACTER*80 outfile
97  INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
98  $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
99  $ imidpad, info, int_temp, ipa, ipb, ipostpad,
100  $ iprepad, ipw, ipw_size, ipw_solve,
101  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104  $ nnr, nout, np, npcol, nprocs, nprocs_real,
105  $ nprow, nq, nrhs, n_first, n_last, worksiz
106  REAL anorm, sresid, thresh
107  DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108 * ..
109 * .. Local Arrays ..
110  INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
111  $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
112  $ nbrval( ntests ), nbval( ntests ),
113  $ nrval( ntests ), nval( ntests ),
114  $ pval( ntests ), qval( ntests )
115  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116  COMPLEX mem( memsiz )
117 * ..
118 * .. External Subroutines ..
119  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121  $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
124  $ slcombine, sltimer
125 * ..
126 * .. External Functions ..
127  INTEGER numroc
128  LOGICAL lsame
129  REAL pclange
130  EXTERNAL lsame, numroc, pclange
131 * ..
132 * .. Intrinsic Functions ..
133  INTRINSIC dble, max, min, mod
134 * ..
135 * .. Data Statements ..
136  DATA kfail, kpass, kskip, ktests / 4*0 /
137 * ..
138 *
139 *
140 *
141 * .. Executable Statements ..
142 *
143 * Get starting information
144 *
145  CALL blacs_pinfo( iam, nprocs )
146  iaseed = 100
147  ibseed = 200
148 *
149  CALL pcptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
150  $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
151  $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
152  $ qval, ntests, thresh, mem, iam, nprocs )
153 *
154  check = ( thresh.GE.0.0e+0 )
155 *
156 * Print headings
157 *
158  IF( iam.EQ.0 ) THEN
159  WRITE( nout, fmt = * )
160  WRITE( nout, fmt = 9995 )
161  WRITE( nout, fmt = 9994 )
162  WRITE( nout, fmt = * )
163  END IF
164 *
165 * Loop over different process grids
166 *
167  DO 60 i = 1, ngrids
168 *
169  nprow = pval( i )
170  npcol = qval( i )
171 *
172 * Make sure grid information is correct
173 *
174  ierr( 1 ) = 0
175  IF( nprow.LT.1 ) THEN
176  IF( iam.EQ.0 )
177  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178  ierr( 1 ) = 1
179  ELSE IF( npcol.LT.1 ) THEN
180  IF( iam.EQ.0 )
181  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
182  ierr( 1 ) = 1
183  ELSE IF( nprow*npcol.GT.nprocs ) THEN
184  IF( iam.EQ.0 )
185  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
186  ierr( 1 ) = 1
187  END IF
188 *
189  IF( ierr( 1 ).GT.0 ) THEN
190  IF( iam.EQ.0 )
191  $ WRITE( nout, fmt = 9997 ) 'grid'
192  kskip = kskip + 1
193  GO TO 50
194  END IF
195 *
196 * Define process grid
197 *
198  CALL blacs_get( -1, 0, ictxt )
199  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
200 *
201 *
202 * Define transpose process grid
203 *
204  CALL blacs_get( -1, 0, ictxtb )
205  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
206 *
207 * Go to bottom of process grid loop if this case doesn't use my
208 * process
209 *
210  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
211 *
212  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
213  GO TO 50
214  ENDIF
215 *
216  DO 40 j = 1, nmat
217 *
218  ierr( 1 ) = 0
219 *
220  n = nval( j )
221 *
222 * Make sure matrix information is correct
223 *
224  IF( n.LT.1 ) THEN
225  IF( iam.EQ.0 )
226  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
227  ierr( 1 ) = 1
228  END IF
229 *
230 * Check all processes for an error
231 *
232  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
233  $ -1, 0 )
234 *
235  IF( ierr( 1 ).GT.0 ) THEN
236  IF( iam.EQ.0 )
237  $ WRITE( nout, fmt = 9997 ) 'size'
238  kskip = kskip + 1
239  GO TO 40
240  END IF
241 *
242 *
243  DO 45 bw_num = 1, nbw
244 *
245  ierr( 1 ) = 0
246 *
247  bw = 1
248  IF( bw.LT.0 ) THEN
249  IF( iam.EQ.0 )
250  $ WRITE( nout, fmt = 9999 ) 'Band', 'bw', bw
251  ierr( 1 ) = 1
252  END IF
253 *
254  IF( bw.GT.n-1 ) THEN
255  ierr( 1 ) = 1
256  END IF
257 *
258 * Check all processes for an error
259 *
260  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
261  $ -1, 0 )
262 *
263  IF( ierr( 1 ).GT.0 ) THEN
264  kskip = kskip + 1
265  GO TO 45
266  END IF
267 *
268  DO 30 k = 1, nnb
269 *
270  ierr( 1 ) = 0
271 *
272  nb = nbval( k )
273  IF( nb.LT.0 ) THEN
274  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
275  $ + int_one
276  nb = max( nb, 2*int_one )
277  nb = min( n, nb )
278  END IF
279 *
280 * Make sure NB is legal
281 *
282  ierr( 1 ) = 0
283  IF( nb.LT.min( 2*int_one, n ) ) THEN
284  ierr( 1 ) = 1
285  ENDIF
286 *
287 * Check all processes for an error
288 *
289  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
290  $ -1, 0 )
291 *
292  IF( ierr( 1 ).GT.0 ) THEN
293  kskip = kskip + 1
294  GO TO 30
295  END IF
296 *
297 * Padding constants
298 *
299  np = numroc( (2), (2),
300  $ myrow, 0, nprow )
301  nq = numroc( n, nb, mycol, 0, npcol )
302 *
303  IF( check ) THEN
304  iprepad = ((2)+10)
305  imidpad = 10
306  ipostpad = ((2)+10)
307  ELSE
308  iprepad = 0
309  imidpad = 0
310  ipostpad = 0
311  END IF
312 *
313 * Initialize the array descriptor for the matrix A
314 *
315  CALL descinit( desca2d, n, (2),
316  $ nb, 1, 0, 0,
317  $ ictxtb, nb+10, ierr( 1 ) )
318 *
319 * Convert this to 1D descriptor
320 *
321  desca( 1 ) = 501
322  desca( 3 ) = n
323  desca( 4 ) = nb
324  desca( 5 ) = 0
325  desca( 2 ) = ictxt
326  desca( 6 ) = ((2)+10)
327  desca( 7 ) = 0
328 *
329  ierr_temp = ierr( 1 )
330  ierr( 1 ) = 0
331  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
332 *
333 * Check all processes for an error
334 *
335  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
336 *
337  IF( ierr( 1 ).LT.0 ) THEN
338  IF( iam.EQ.0 )
339  $ WRITE( nout, fmt = 9997 ) 'descriptor'
340  kskip = kskip + 1
341  GO TO 30
342  END IF
343 *
344 * Assign pointers into MEM for SCALAPACK arrays, A is
345 * allocated starting at position MEM( IPREPAD+1 )
346 *
347  free_ptr = 1
348  ipb = 0
349 *
350 * Save room for prepadding
351  free_ptr = free_ptr + iprepad
352 *
353  ipa = free_ptr
354  free_ptr = free_ptr + (nb+10)*(2)
355  $ + ipostpad
356 *
357 * Add memory for fillin
358 * Fillin space needs to store:
359 * Fillin spike:
360 * Contribution to previous proc's diagonal block of
361 * reduced system:
362 * Off-diagonal block of reduced system:
363 * Diagonal block of reduced system:
364 *
365  fillin_size =
366  $ (12*npcol + 3*nb)
367 *
368 * Claim memory for fillin
369 *
370  free_ptr = free_ptr + iprepad
371  ip_fillin = free_ptr
372  free_ptr = free_ptr + fillin_size
373 *
374 * Workspace needed by computational routines:
375 *
376  ipw_size = 0
377 *
378 * factorization:
379 *
380  ipw_size = 8*npcol
381 *
382 * Claim memory for IPW
383 *
384  ipw = free_ptr
385  free_ptr = free_ptr + ipw_size
386 *
387 * Check for adequate memory for problem size
388 *
389  ierr( 1 ) = 0
390  IF( free_ptr.GT.memsiz ) THEN
391  IF( iam.EQ.0 )
392  $ WRITE( nout, fmt = 9996 )
393  $ 'divide and conquer factorization',
394  $ (free_ptr )*cplxsz
395  ierr( 1 ) = 1
396  END IF
397 *
398 * Check all processes for an error
399 *
400  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
401  $ 1, -1, 0 )
402 *
403  IF( ierr( 1 ).GT.0 ) THEN
404  IF( iam.EQ.0 )
405  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
406  kskip = kskip + 1
407  GO TO 30
408  END IF
409 *
410 * Worksize needed for LAPRNT
411  worksiz = max( ((2)+10), nb )
412 *
413  IF( check ) THEN
414 *
415 * Calculate the amount of workspace required by
416 * the checking routines.
417 *
418 * PCLANGE
419  worksiz = max( worksiz, desca2d( nb_ ) )
420 *
421 * PCPTLASCHK
422  worksiz = max( worksiz,
423  $ max(5,nb)+2*nb )
424  END IF
425 *
426  free_ptr = free_ptr + iprepad
427  ip_driver_w = free_ptr
428  free_ptr = free_ptr + worksiz + ipostpad
429 *
430 *
431 * Check for adequate memory for problem size
432 *
433  ierr( 1 ) = 0
434  IF( free_ptr.GT.memsiz ) THEN
435  IF( iam.EQ.0 )
436  $ WRITE( nout, fmt = 9996 ) 'factorization',
437  $ ( free_ptr )*cplxsz
438  ierr( 1 ) = 1
439  END IF
440 *
441 * Check all processes for an error
442 *
443  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
444  $ 1, -1, 0 )
445 *
446  IF( ierr( 1 ).GT.0 ) THEN
447  IF( iam.EQ.0 )
448  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
449  kskip = kskip + 1
450  GO TO 30
451  END IF
452 *
453  CALL pcbmatgen( ictxt, uplo, 'T', bw, bw, n, (2), nb,
454  $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
455  $ mycol, nprow, npcol )
456  CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),
457  $ nb+10, iprepad, ipostpad,
458  $ padval )
459 *
460  CALL pcfillpad( ictxt, worksiz, 1,
461  $ mem( ip_driver_w-iprepad ), worksiz,
462  $ iprepad, ipostpad, padval )
463 *
464 * Calculate norm of A for residual error-checking
465 *
466  IF( check ) THEN
467 *
468  anorm = pclange( 'I', n,
469  $ (2), mem( ipa ), 1, 1,
470  $ desca2d, mem( ip_driver_w ) )
471  CALL pcchekpad( ictxt, 'PCLANGE', nq, np,
472  $ mem( ipa-iprepad ), nb+10,
473  $ iprepad, ipostpad, padval )
474  CALL pcchekpad( ictxt, 'PCLANGE',
475  $ worksiz, 1,
476  $ mem( ip_driver_w-iprepad ), worksiz,
477  $ iprepad, ipostpad, padval )
478  END IF
479 *
480  IF( lsame( uplo, 'L' ) ) THEN
481  int_temp = 0
482  ELSE
483  int_temp = desca2d( lld_ )
484  ENDIF
485 *
486 * For SPD Tridiagonal complex matrices, diagonal is stored
487 * as a real. Thus, compact D into half the space
488 *
489  DO 10 h=1, numroc(n,nb,mycol,0,npcol)/2
490  mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
491  $ +mem( ipa+int_temp+2*h-1 )*( 0.0e+0, 1.0e+0 )
492  10 CONTINUE
493  IF( 2*(numroc(n,nb,mycol,0,npcol)/2).NE.
494  $ numroc(n,nb,mycol,0,npcol) ) THEN
495  h=numroc(n,nb,mycol,0,npcol)/2+1
496  mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
497  ENDIF
498 *
499 *
500  CALL slboot()
501  CALL blacs_barrier( ictxt, 'All' )
502 *
503 * Perform factorization
504 *
505  CALL sltimer( 1 )
506 *
507  CALL pcpttrf( n, mem( ipa+int_temp ),
508  $ mem( ipa+1*( nb+10-int_temp ) ), 1, desca,
509  $ mem( ip_fillin ), fillin_size, mem( ipw ),
510  $ ipw_size, info )
511 *
512  CALL sltimer( 1 )
513 *
514  IF( info.NE.0 ) THEN
515  IF( iam.EQ.0 ) THEN
516  WRITE( nout, fmt = * ) 'PCPTTRF INFO=', info
517  ENDIF
518  kfail = kfail + 1
519  GO TO 30
520  END IF
521 *
522  IF( check ) THEN
523 *
524 * Check for memory overwrite in factorization
525 *
526  CALL pcchekpad( ictxt, 'PCPTTRF', nq,
527  $ np, mem( ipa-iprepad ), nb+10,
528  $ iprepad, ipostpad, padval )
529  END IF
530 *
531 *
532 * Loop over the different values for NRHS
533 *
534  DO 20 hh = 1, nnr
535 *
536  ierr( 1 ) = 0
537 *
538  nrhs = nrval( hh )
539 *
540 * Initialize Array Descriptor for rhs
541 *
542  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
543  $ ictxtb, nb+10, ierr( 1 ) )
544 *
545 * Convert this to 1D descriptor
546 *
547  descb( 1 ) = 502
548  descb( 3 ) = n
549  descb( 4 ) = nb
550  descb( 5 ) = 0
551  descb( 2 ) = ictxt
552  descb( 6 ) = descb2d( lld_ )
553  descb( 7 ) = 0
554 *
555 * reset free_ptr to reuse space for right hand sides
556 *
557  IF( ipb .GT. 0 ) THEN
558  free_ptr = ipb
559  ENDIF
560 *
561  free_ptr = free_ptr + iprepad
562  ipb = free_ptr
563  free_ptr = free_ptr + nrhs*descb2d( lld_ )
564  $ + ipostpad
565 *
566 * Allocate workspace for workspace in TRS routine:
567 *
568  ipw_solve_size = (10+2*min(100,nrhs))*npcol+4*nrhs
569 *
570  ipw_solve = free_ptr
571  free_ptr = free_ptr + ipw_solve_size
572 *
573  ierr( 1 ) = 0
574  IF( free_ptr.GT.memsiz ) THEN
575  IF( iam.EQ.0 )
576  $ WRITE( nout, fmt = 9996 )'solve',
577  $ ( free_ptr )*cplxsz
578  ierr( 1 ) = 1
579  END IF
580 *
581 * Check all processes for an error
582 *
583  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
584  $ ierr, 1, -1, 0 )
585 *
586  IF( ierr( 1 ).GT.0 ) THEN
587  IF( iam.EQ.0 )
588  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
589  kskip = kskip + 1
590  GO TO 15
591  END IF
592 *
593  myrhs_size = numroc( n, nb, mycol, 0, npcol )
594 *
595 * Generate RHS
596 *
597  CALL pcmatgen(ictxtb, 'No', 'No',
598  $ descb2d( m_ ), descb2d( n_ ),
599  $ descb2d( mb_ ), descb2d( nb_ ),
600  $ mem( ipb ),
601  $ descb2d( lld_ ), descb2d( rsrc_ ),
602  $ descb2d( csrc_ ),
603  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
604  $ myrow, npcol, nprow )
605 *
606  IF( check ) THEN
607  CALL pcfillpad( ictxtb, nb, nrhs,
608  $ mem( ipb-iprepad ),
609  $ descb2d( lld_ ),
610  $ iprepad, ipostpad,
611  $ padval )
612  CALL pcfillpad( ictxt, worksiz, 1,
613  $ mem( ip_driver_w-iprepad ),
614  $ worksiz, iprepad,
615  $ ipostpad, padval )
616  END IF
617 *
618 *
619  CALL blacs_barrier( ictxt, 'All')
620  CALL sltimer( 2 )
621 *
622 * Solve linear system via factorization
623 *
624  CALL pcpttrs( uplo, n, nrhs, mem( ipa+int_temp ),
625  $ mem( ipa+1*( nb+10-int_temp ) ), 1,
626  $ desca, mem( ipb ), 1, descb,
627  $ mem( ip_fillin ), fillin_size,
628  $ mem( ipw_solve ), ipw_solve_size,
629  $ info )
630 *
631  CALL sltimer( 2 )
632 *
633  IF( info.NE.0 ) THEN
634  IF( iam.EQ.0 )
635  $ WRITE( nout, fmt = * ) 'PCPTTRS INFO=', info
636  kfail = kfail + 1
637  passed = 'FAILED'
638  GO TO 20
639  END IF
640 *
641  IF( check ) THEN
642 *
643 * check for memory overwrite
644 *
645  CALL pcchekpad( ictxt, 'PCPTTRS-work',
646  $ worksiz, 1,
647  $ mem( ip_driver_w-iprepad ),
648  $ worksiz, iprepad,
649  $ ipostpad, padval )
650 *
651 * check the solution to rhs
652 *
653  sresid = zero
654 *
655 * Reset descriptor describing A to 1-by-P grid for
656 * use in banded utility routines
657 *
658  CALL descinit( desca2d, (2), n,
659  $ (2), nb, 0, 0,
660  $ ictxt, (2), ierr( 1 ) )
661  CALL pcptlaschk( 'H', uplo, n, bw, bw, nrhs,
662  $ mem( ipb ), 1, 1, descb2d,
663  $ iaseed, mem( ipa ), 1, 1, desca2d,
664  $ ibseed, anorm, sresid,
665  $ mem( ip_driver_w ), worksiz )
666 *
667  IF( iam.EQ.0 ) THEN
668  IF( sresid.GT.thresh )
669  $ WRITE( nout, fmt = 9985 ) sresid
670  END IF
671 *
672 * The second test is a NaN trap
673 *
674  IF( ( sresid.LE.thresh ).AND.
675  $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
676  kpass = kpass + 1
677  passed = 'PASSED'
678  ELSE
679  kfail = kfail + 1
680  passed = 'FAILED'
681  END IF
682 *
683  END IF
684 *
685  15 CONTINUE
686 * Skipped tests jump to here to print out "SKIPPED"
687 *
688 * Gather maximum of all CPU and WALL clock timings
689 *
690  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
691  $ wtime )
692  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
693  $ ctime )
694 *
695 * Print results
696 *
697  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
698 *
699  nops = 0
700  nops2 = 0
701 *
702  n_first = nb
703  nprocs_real = ( n-1 )/nb + 1
704  n_last = mod( n-1, nb ) + 1
705 *
706 *
707  nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
708  $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
709  $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
710  $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
711  nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
712  $ *( -1.d0 /2.d0+dble(bw)
713  $ *( -1.d0 / 3.d0 ) ) ) +
714  $ dble(n)*( dble(bw) /
715  $ 2.d0*( 1.d0+dble(bw) ) )
716 *
717  nops = nops +
718  $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
719  $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
720  $ ( dble(bw)*( 2*dble(n)-
721  $ ( dble(bw)+1.d0 ) ) )
722 *
723 *
724 * Second calc to represent actual hardware speed
725 *
726 * NB bw^2 flops for LLt factorization in 1st proc
727 *
728  nops2 = ( (dble(n_first))* dble(bw)**2 )
729 *
730  IF ( nprocs_real .GT. 1) THEN
731 * 4 NB bw^2 flops for LLt factorization and
732 * spike calc in last processor
733 *
734  nops2 = nops2 +
735  $ 4*( (dble(n_last)*dble(bw)**2) )
736  ENDIF
737 *
738  IF ( nprocs_real .GT. 2) THEN
739 * 4 NB bw^2 flops for LLt factorization and
740 * spike calc in other processors
741 *
742  nops2 = nops2 + (nprocs_real-2)*
743  $ 4*( (dble(nb)*dble(bw)**2) )
744  ENDIF
745 *
746 * Reduced system
747 *
748  nops2 = nops2 +
749  $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
750  IF( nprocs_real .GT. 1 ) THEN
751  nops2 = nops2 +
752  $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
753  ENDIF
754 *
755 *
756 * nrhs * 4 n_first*bw flops for LLt solve in proc 1.
757 *
758  nops2 = nops2 +
759  $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
760 *
761  IF ( nprocs_real .GT. 1 ) THEN
762 *
763 * 2*nrhs*4 n_last*bw flops for LLt solve in last.
764 *
765  nops2 = nops2 +
766  $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
767  ENDIF
768 *
769  IF ( nprocs_real .GT. 2 ) THEN
770 *
771 * 2 * nrhs * 4 NB*bw flops for LLt solve in others.
772 *
773  nops2 = nops2 +
774  $ ( nprocs_real-2)*2*
775  $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
776  ENDIF
777 *
778 * Reduced system
779 *
780  nops2 = nops2 +
781  $ nrhs*( nprocs_real-1 ) * ( bw*bw )
782  IF( nprocs_real .GT. 1 ) THEN
783  nops2 = nops2 +
784  $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
785  ENDIF
786 *
787 *
788 * Multiply by 4 to get complex count
789 *
790  nops2 = nops2 * dble(4)
791 *
792 * Calculate total megaflops - factorization and/or
793 * solve -- for WALL and CPU time, and print output
794 *
795 * Print WALL time if machine supports it
796 *
797  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
798  tmflops = nops /
799  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
800  ELSE
801  tmflops = 0.0d+0
802  END IF
803 *
804  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
805  tmflops2 = nops2 /
806  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
807  ELSE
808  tmflops2 = 0.0d+0
809  END IF
810 *
811  IF( wtime( 2 ).GE.0.0d+0 )
812  $ WRITE( nout, fmt = 9993 ) 'WALL', uplo,
813  $ n,
814  $ bw,
815  $ nb, nrhs, nprow, npcol,
816  $ wtime( 1 ), wtime( 2 ), tmflops,
817  $ tmflops2, passed
818 *
819 * Print CPU time if machine supports it
820 *
821  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
822  tmflops = nops /
823  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
824  ELSE
825  tmflops = 0.0d+0
826  END IF
827 *
828  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
829  tmflops2 = nops2 /
830  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
831  ELSE
832  tmflops2 = 0.0d+0
833  END IF
834 *
835  IF( ctime( 2 ).GE.0.0d+0 )
836  $ WRITE( nout, fmt = 9993 ) 'CPU ', uplo,
837  $ n,
838  $ bw,
839  $ nb, nrhs, nprow, npcol,
840  $ ctime( 1 ), ctime( 2 ), tmflops,
841  $ tmflops2, passed
842 *
843  END IF
844  20 CONTINUE
845 *
846 *
847  30 CONTINUE
848 * NNB loop
849 *
850  45 CONTINUE
851 * BW[] loop
852 *
853  40 CONTINUE
854 * NMAT loop
855 *
856  CALL blacs_gridexit( ictxt )
857  CALL blacs_gridexit( ictxtb )
858 *
859  50 CONTINUE
860 * NGRIDS DROPOUT
861  60 CONTINUE
862 * NGRIDS loop
863 *
864 * Print ending messages and close output file
865 *
866  IF( iam.EQ.0 ) THEN
867  ktests = kpass + kfail + kskip
868  WRITE( nout, fmt = * )
869  WRITE( nout, fmt = 9992 ) ktests
870  IF( check ) THEN
871  WRITE( nout, fmt = 9991 ) kpass
872  WRITE( nout, fmt = 9989 ) kfail
873  ELSE
874  WRITE( nout, fmt = 9990 ) kpass
875  END IF
876  WRITE( nout, fmt = 9988 ) kskip
877  WRITE( nout, fmt = * )
878  WRITE( nout, fmt = * )
879  WRITE( nout, fmt = 9987 )
880  IF( nout.NE.6 .AND. nout.NE.0 )
881  $ CLOSE ( nout )
882  END IF
883 *
884  CALL blacs_exit( 0 )
885 *
886  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
887  $ '; It should be at least 1' )
888  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
889  $ i4 )
890  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
891  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
892  $ i11 )
893  9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ',
894  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
895  9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',
896  $ '-------- ------ ------ ------' )
897  9993 FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
898  $ i5, 1x, i2, 1x,
899  $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
900  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
901  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
902  9990 FORMAT( i5, ' tests completed without checking.' )
903  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
904  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
905  9987 FORMAT( 'END OF TESTS.' )
906  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
907  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
908 *
909  stop
910 *
911 * End of PCPTTRS_DRIVER
912 *
913  END
914 *
max
#define max(A, B)
Definition: pcgemr.c:180
pcbmatgen
subroutine pcbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcbmatgen.f:5
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pcptdriver
program pcptdriver
Definition: pcptdriver.f:1
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcpttrs
subroutine pcpttrs(UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pcpttrs.f:3
pcptlaschk
subroutine pcptlaschk(SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pcptlaschk.f:4
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
pcptinfo
subroutine pcptinfo(SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcptinfo.f:6
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pcpttrf
subroutine pcpttrf(N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pcpttrf.f:3
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181