ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psinvdriver.f
Go to the documentation of this file.
1  PROGRAM psinvdriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * Purpose
9 * =======
10 *
11 * PSINVDRIVER is the main test program for the REAL
12 * SCALAPACK matrix inversion routines. This test driver computes the
13 * inverse of different kind of matrix and tests the results.
14 *
15 * The program must be driven by a short data file. An annotated example
16 * of a data file can be obtained by deleting the first 3 characters
17 * from the following 14 lines:
18 * 'ScaLAPACK Matrix Inversion Testing input file'
19 * 'PVM machine.'
20 * 'INV.out' output file name (if any)
21 * 6 device out
22 * 5 number of matrix types (next line)
23 * 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD
24 * 4 number of problems sizes
25 * 1000 2000 3000 4000 values of N
26 * 3 number of NB's
27 * 4 30 35 values of NB
28 * 2 number of process grids (ordered P & Q)
29 * 4 2 values of P
30 * 4 4 values of Q
31 * 1.0 threshold
32 *
33 * Internal Parameters
34 * ===================
35 *
36 * TOTMEM INTEGER, default = 2000000
37 * TOTMEM is a machine-specific parameter indicating the
38 * maximum amount of available memory in bytes.
39 * The user should customize TOTMEM to his platform. Remember
40 * to leave room in memory for the operating system, the BLACS
41 * buffer, etc. For example, on a system with 8 MB of memory
42 * per process (e.g., one processor on an Intel iPSC/860), the
43 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
44 * code, BLACS buffer, etc). However, for PVM, we usually set
45 * TOTMEM = 2000000. Some experimenting with the maximum value
46 * of TOTMEM may be required.
47 *
48 * INTGSZ INTEGER, default = 4 bytes.
49 * REALSZ INTEGER, default = 4 bytes.
50 * INTGSZ and REALSZ indicate the length in bytes on the
51 * given platform for an integer and a single precision real.
52 * MEM REAL array, dimension ( TOTMEM / REALSZ )
53 *
54 * All arrays used by SCALAPACK routines are allocated from
55 * this array and referenced by pointers. The integer IPA,
56 * for example, is a pointer to the starting element of MEM for
57 * the matrix A.
58 *
59 * =====================================================================
60 *
61 * .. Parameters ..
62  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
63  $ lld_, mb_, m_, nb_, n_, rsrc_
64  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
65  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
66  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
67  INTEGER intgsz, memsiz, ntests, realsz, totmem
68  REAL padval, zero
69  parameter( intgsz = 4, realsz = 4, totmem = 2000000,
70  $ memsiz = totmem / realsz, ntests = 20,
71  $ padval = -9923.0e+0, zero = 0.0e+0 )
72 * ..
73 * .. Local Scalars ..
74  CHARACTER uplo
75  CHARACTER*3 mtyp
76  CHARACTER*6 passed
77  CHARACTER*80 outfile
78  LOGICAL check
79  INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
80  $ ippiv, iprepad, ipostpad, ipiw, ipw, itemp, j,
81  $ k, ktests, kpass, kfail, kskip, l, lcm, lipiv,
82  $ liwork, lwork, mycol, myrow, n, nb, ngrids,
83  $ nmat, nmtyp, nnb, nout, np, npcol, nprocs,
84  $ nprow, nq, workiinv, workinv, worksiz
85  REAL anorm, fresid, rcond, thresh
86  DOUBLE PRECISION nops, tmflops
87 * ..
88 * .. Local Arrays ..
89  CHARACTER*3 mattyp( ntests )
90  INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
91  $ nval( ntests ), pval( ntests ),
92  $ qval( ntests )
93  REAL mem( memsiz )
94  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
95 * ..
96 * .. External Subroutines ..
97  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
98  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
99  $ blacs_pinfo, descinit, igsum2d, pschekpad,
104 * ..
105 * .. External Functions ..
106  LOGICAL lsamen
107  INTEGER iceil, ilcm, numroc
108  REAL pslange, pslansy, pslantr
109  EXTERNAL iceil, ilcm, lsamen, numroc, pslange,
110  $ pslansy, pslantr
111 * ..
112 * .. Intrinsic Functions ..
113  INTRINSIC dble, max
114 * ..
115 * .. Data Statements ..
116  DATA ktests, kpass, kfail, kskip /4*0/
117 * ..
118 * .. Executable Statements ..
119 *
120 * Get starting information
121 *
122  CALL blacs_pinfo( iam, nprocs )
123  iaseed = 100
124  CALL psinvinfo( outfile, nout, nmtyp, mattyp, ntests, nmat, nval,
125  $ ntests, nnb, nbval, ntests, ngrids, pval, ntests,
126  $ qval, ntests, thresh, mem, iam, nprocs )
127  check = ( thresh.GE.0.0e+0 )
128 *
129 * Loop over the different matrix types
130 *
131  DO 40 i = 1, nmtyp
132 *
133  mtyp = mattyp( i )
134 *
135 * Print headings
136 *
137  IF( iam.EQ.0 ) THEN
138  WRITE( nout, fmt = * )
139  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
140  WRITE( nout, fmt = 9986 )
141  $ 'A is a general matrix.'
142  ELSE IF( lsamen( 3, mtyp, 'UTR' ) ) THEN
143  WRITE( nout, fmt = 9986 )
144  $ 'A is an upper triangular matrix.'
145  ELSE IF( lsamen( 3, mtyp, 'LTR' ) ) THEN
146  WRITE( nout, fmt = 9986 )
147  $ 'A is a lower triangular matrix.'
148  ELSE IF( lsamen( 3, mtyp, 'UPD' ) ) THEN
149  WRITE( nout, fmt = 9986 )
150  $ 'A is a symmetric positive definite matrix.'
151  WRITE( nout, fmt = 9986 )
152  $ 'Only the upper triangular part will be '//
153  $ 'referenced.'
154  ELSE IF( lsamen( 3, mtyp, 'LPD' ) ) THEN
155  WRITE( nout, fmt = 9986 )
156  $ 'A is a symmetric positive definite matrix.'
157  WRITE( nout, fmt = 9986 )
158  $ 'Only the lower triangular part will be '//
159  $ 'referenced.'
160  END IF
161  WRITE( nout, fmt = * )
162  WRITE( nout, fmt = 9995 )
163  WRITE( nout, fmt = 9994 )
164  WRITE( nout, fmt = * )
165  END IF
166 *
167 * Loop over different process grids
168 *
169  DO 30 j = 1, ngrids
170 *
171  nprow = pval( j )
172  npcol = qval( j )
173 *
174 * Make sure grid information is correct
175 *
176  ierr( 1 ) = 0
177  IF( nprow.LT.1 ) THEN
178  IF( iam.EQ.0 )
179  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
180  ierr( 1 ) = 1
181  ELSE IF( npcol.LT.1 ) THEN
182  IF( iam.EQ.0 )
183  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
184  ierr( 1 ) = 1
185  ELSE IF( nprow*npcol.GT.nprocs ) THEN
186  IF( iam.EQ.0 )
187  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
188  ierr( 1 ) = 1
189  END IF
190 *
191  IF( ierr( 1 ).GT.0 ) THEN
192  IF( iam.EQ.0 )
193  $ WRITE( nout, fmt = 9997 ) 'grid'
194  kskip = kskip + 1
195  GO TO 30
196  END IF
197 *
198 * Define process grid
199 *
200  CALL blacs_get( -1, 0, ictxt )
201  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
202  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
203 *
204 * Go to bottom of loop if this case doesn't use my process
205 *
206  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
207  $ GO TO 30
208 *
209  DO 20 k = 1, nmat
210 *
211  n = nval( k )
212 *
213 * Make sure matrix information is correct
214 *
215  ierr( 1 ) = 0
216  IF( n.LT.1 ) THEN
217  IF( iam.EQ.0 )
218  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
219  ierr( 1 ) = 1
220  END IF
221 *
222 * Make sure no one had error
223 *
224  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
225 *
226  IF( ierr( 1 ).GT.0 ) THEN
227  IF( iam.EQ.0 )
228  $ WRITE( nout, fmt = 9997 ) 'matrix'
229  kskip = kskip + 1
230  GO TO 20
231  END IF
232 *
233 * Loop over different blocking sizes
234 *
235  DO 10 l = 1, nnb
236 *
237  nb = nbval( l )
238 *
239 * Make sure nb is legal
240 *
241  ierr( 1 ) = 0
242  IF( nb.LT.1 ) THEN
243  ierr( 1 ) = 1
244  IF( iam.EQ.0 )
245  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
246  END IF
247 *
248 * Check all processes for an error
249 *
250  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
251  $ 0 )
252 *
253  IF( ierr( 1 ).GT.0 ) THEN
254  IF( iam.EQ.0 )
255  $ WRITE( nout, fmt = 9997 ) 'NB'
256  kskip = kskip + 1
257  GO TO 10
258  END IF
259 *
260 * Padding constants
261 *
262  np = numroc( n, nb, myrow, 0, nprow )
263  nq = numroc( n, nb, mycol, 0, npcol )
264  IF( check ) THEN
265  iprepad = max( nb, np )
266  imidpad = nb
267  ipostpad = max( nb, nq )
268  ELSE
269  iprepad = 0
270  imidpad = 0
271  ipostpad = 0
272  END IF
273 *
274 * Initialize the array descriptor for the matrix A
275 *
276  CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
277  $ max( 1, np ) + imidpad, ierr( 1 ) )
278 *
279 * Check all processes for an error
280 *
281  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
282  $ 0 )
283 *
284  IF( ierr( 1 ).LT.0 ) THEN
285  IF( iam.EQ.0 )
286  $ WRITE( nout, fmt = 9997 ) 'descriptor'
287  kskip = kskip + 1
288  GO TO 10
289  END IF
290 *
291 * Assign pointers into MEM for ScaLAPACK arrays, A is
292 * allocated starting at position MEM( IPREPAD+1 )
293 *
294  ipa = iprepad+1
295 *
296  lcm = ilcm( nprow, npcol )
297  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
298 *
299 * Pivots are needed by LU factorization
300 *
301  ippiv = ipa + desca( lld_ ) * nq + ipostpad +
302  $ iprepad
303  lipiv = iceil( intgsz * ( np + nb ), realsz )
304  ipw = ippiv + lipiv + ipostpad + iprepad
305 *
306  lwork = max( 1, np * desca( nb_ ) )
307  workinv = lwork + ipostpad
308 *
309 * Figure the amount of workspace required by the
310 * general matrix inversion
311 *
312  IF( nprow.EQ.npcol ) THEN
313  liwork = nq + desca( nb_ )
314  ELSE
315 *
316 * change the integer workspace needed for PDGETRI
317 * LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) *
318 * $ ICEIL( ICEIL( DESCA( LLD_ ),
319 * $ DESCA( MB_ ) ), LCM / NPROW ) )
320 * $ + NQ
321  liwork = numroc( desca( m_ ) +
322  $ desca( mb_ ) * nprow
323  $ + mod( 1 - 1, desca( mb_ ) ), desca( nb_ ),
324  $ mycol, desca( csrc_ ), npcol ) +
325  $ max( desca( mb_ ) * iceil( iceil(
326  $ numroc( desca( m_ ) + desca( mb_ ) * nprow,
327  $ desca( mb_ ), myrow, desca( rsrc_ ), nprow ),
328  $ desca( mb_ ) ), lcm / nprow ), desca( nb_ ) )
329 *
330  END IF
331  workiinv = iceil( liwork*intgsz, realsz ) +
332  $ ipostpad
333  ipiw = ipw + workinv + iprepad
334  worksiz = workinv + iprepad + workiinv
335 *
336  ELSE
337 *
338 * No pivots or workspace needed for triangular or
339 * symmetric positive definite matrices.
340 *
341  ipw = ipa + desca( lld_ ) * nq + ipostpad + iprepad
342  worksiz = 1 + ipostpad
343 *
344  END IF
345 *
346  IF( check ) THEN
347 *
348 * Figure amount of work space for the norm
349 * computations
350 *
351  IF( lsamen( 3, mtyp, 'GEN' ).OR.
352  $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
353  itemp = nq
354  ELSE
355  itemp = 2 * nq + np
356  IF( nprow.NE.npcol ) THEN
357  itemp = itemp +
358  $ nb * iceil( iceil( np, nb ),
359  $ lcm / nprow )
360  END IF
361  END IF
362  worksiz = max( worksiz-ipostpad, itemp )
363 *
364 * Figure the amount of workspace required by the
365 * checking routine
366 *
367  worksiz = max( worksiz, 2 * nb * max( 1, np ) ) +
368  $ ipostpad
369 *
370  END IF
371 *
372 * Check for adequate memory for problem size
373 *
374  ierr( 1 ) = 0
375  IF( ipw+worksiz.GT.memsiz ) THEN
376  IF( iam.EQ.0 )
377  $ WRITE( nout, fmt = 9996 ) 'inversion',
378  $ ( ipw + worksiz ) * realsz
379  ierr( 1 ) = 1
380  END IF
381 *
382 * Check all processes for an error
383 *
384  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
385  $ 0 )
386 *
387  IF( ierr( 1 ).GT.0 ) THEN
388  IF( iam.EQ.0 )
389  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
390  kskip = kskip + 1
391  GO TO 10
392  END IF
393 *
394  IF( lsamen( 3, mtyp, 'GEN' ).OR.
395  $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
396 *
397 * Generate a general diagonally dominant matrix A
398 *
399  CALL psmatgen( ictxt, 'N', 'D', desca( m_ ),
400  $ desca( n_ ), desca( mb_ ),
401  $ desca( nb_ ), mem( ipa ),
402  $ desca( lld_ ), desca( rsrc_ ),
403  $ desca( csrc_ ), iaseed, 0, np, 0,
404  $ nq, myrow, mycol, nprow, npcol )
405 *
406  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
407 *
408 * Generate a symmetric positive definite matrix
409 *
410  CALL psmatgen( ictxt, 'S', 'D', desca( m_ ),
411  $ desca( n_ ), desca( mb_ ),
412  $ desca( nb_ ), mem( ipa ),
413  $ desca( lld_ ), desca( rsrc_ ),
414  $ desca( csrc_ ), iaseed, 0, np, 0,
415  $ nq, myrow, mycol, nprow, npcol )
416 *
417  END IF
418 *
419 * Zeros not-referenced part of A, if any.
420 *
421  IF( lsamen( 1, mtyp, 'U' ) ) THEN
422 *
423  uplo = 'U'
424  CALL pslaset( 'Lower', n-1, n-1, zero, zero,
425  $ mem( ipa ), 2, 1, desca )
426 *
427  ELSE IF( lsamen( 1, mtyp, 'L' ) ) THEN
428 *
429  uplo = 'L'
430  CALL pslaset( 'Upper', n-1, n-1, zero, zero,
431  $ mem( ipa ), 1, 2, desca )
432 *
433  ELSE
434 *
435  uplo = 'G'
436 *
437  END IF
438 *
439 * Need 1-norm of A for checking
440 *
441  IF( check ) THEN
442 *
443  CALL psfillpad( ictxt, np, nq, mem( ipa-iprepad ),
444  $ desca( lld_ ), iprepad, ipostpad,
445  $ padval )
446  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
447  $ mem( ipw-iprepad ),
448  $ worksiz-ipostpad, iprepad,
449  $ ipostpad, padval )
450 *
451  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
452 *
453  CALL psfillpad( ictxt, lipiv, 1,
454  $ mem( ippiv-iprepad ), lipiv,
455  $ iprepad, ipostpad, padval )
456  anorm = pslange( '1', n, n, mem( ipa ), 1, 1,
457  $ desca, mem( ipw ) )
458  CALL pschekpad( ictxt, 'PSLANGE', np, nq,
459  $ mem( ipa-iprepad ),
460  $ desca( lld_ ),
461  $ iprepad, ipostpad, padval )
462  CALL pschekpad( ictxt, 'PSLANGE',
463  $ worksiz-ipostpad, 1,
464  $ mem( ipw-iprepad ),
465  $ worksiz-ipostpad,
466  $ iprepad, ipostpad, padval )
467  CALL psfillpad( ictxt, workinv-ipostpad, 1,
468  $ mem( ipw-iprepad ),
469  $ workinv-ipostpad,
470  $ iprepad, ipostpad, padval )
471  CALL psfillpad( ictxt, workiinv-ipostpad, 1,
472  $ mem( ipiw-iprepad ),
473  $ workiinv-ipostpad, iprepad,
474  $ ipostpad, padval )
475  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
476 *
477  anorm = pslantr( '1', uplo, 'Non unit', n, n,
478  $ mem( ipa ), 1, 1, desca,
479  $ mem( ipw ) )
480  CALL pschekpad( ictxt, 'PSLANTR', np, nq,
481  $ mem( ipa-iprepad ),
482  $ desca( lld_ ),
483  $ iprepad, ipostpad, padval )
484  CALL pschekpad( ictxt, 'PSLANTR',
485  $ worksiz-ipostpad, 1,
486  $ mem( ipw-iprepad ),
487  $ worksiz-ipostpad,
488  $ iprepad, ipostpad, padval )
489 *
490  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
491 *
492  anorm = pslansy( '1', uplo, n, mem( ipa ), 1, 1,
493  $ desca, mem( ipw ) )
494  CALL pschekpad( ictxt, 'PSLANSY', np, nq,
495  $ mem( ipa-iprepad ),
496  $ desca( lld_ ),
497  $ iprepad, ipostpad, padval )
498  CALL pschekpad( ictxt, 'PSLANSY',
499  $ worksiz-ipostpad, 1,
500  $ mem( ipw-iprepad ),
501  $ worksiz-ipostpad,
502  $ iprepad, ipostpad, padval )
503 *
504  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'SY' ) ) THEN
505 *
506  CALL psfillpad( ictxt, lipiv, 1,
507  $ mem( ippiv-iprepad ), lipiv,
508  $ iprepad, ipostpad, padval )
509  anorm = pslansy( '1', uplo, n, mem( ipa ), 1, 1,
510  $ desca, mem( ipw ) )
511  CALL pschekpad( ictxt, 'PSLANSY', np, nq,
512  $ mem( ipa-iprepad ),
513  $ desca( lld_ ),
514  $ iprepad, ipostpad, padval )
515  CALL pschekpad( ictxt, 'PSLANSY',
516  $ worksiz-ipostpad, 1,
517  $ mem( ipw-iprepad ),
518  $ worksiz-ipostpad,
519  $ iprepad,ipostpad, padval )
520 *
521  END IF
522 *
523  END IF
524 *
525  CALL slboot()
526  CALL blacs_barrier( ictxt, 'All' )
527 *
528  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
529 *
530 * Perform LU factorization
531 *
532  CALL sltimer( 1 )
533  CALL psgetrf( n, n, mem( ipa ), 1, 1, desca,
534  $ mem( ippiv ), info )
535  CALL sltimer( 1 )
536 *
537  IF( check ) THEN
538 *
539 * Check for memory overwrite
540 *
541  CALL pschekpad( ictxt, 'PSGETRF', np, nq,
542  $ mem( ipa-iprepad ),
543  $ desca( lld_ ),
544  $ iprepad, ipostpad, padval )
545  CALL pschekpad( ictxt, 'PSGETRF', lipiv, 1,
546  $ mem( ippiv-iprepad ), lipiv,
547  $ iprepad, ipostpad, padval )
548  END IF
549 *
550 * Perform the general matrix inversion
551 *
552  CALL sltimer( 2 )
553  CALL psgetri( n, mem( ipa ), 1, 1, desca,
554  $ mem( ippiv ), mem( ipw ), lwork,
555  $ mem( ipiw ), liwork, info )
556  CALL sltimer( 2 )
557 *
558  IF( check ) THEN
559 *
560 * Check for memory overwrite
561 *
562  CALL pschekpad( ictxt, 'PSGETRI', np, nq,
563  $ mem( ipa-iprepad ),
564  $ desca( lld_ ),
565  $ iprepad, ipostpad, padval )
566  CALL pschekpad( ictxt, 'PSGETRI', lipiv, 1,
567  $ mem( ippiv-iprepad ), lipiv,
568  $ iprepad, ipostpad, padval )
569  CALL pschekpad( ictxt, 'PSGETRI',
570  $ workiinv-ipostpad, 1,
571  $ mem( ipiw-iprepad ),
572  $ workiinv-ipostpad,
573  $ iprepad, ipostpad, padval )
574  CALL pschekpad( ictxt, 'PSGETRI',
575  $ workinv-ipostpad, 1,
576  $ mem( ipw-iprepad ),
577  $ workinv-ipostpad,
578  $ iprepad, ipostpad, padval )
579  END IF
580 *
581  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
582 *
583 * Perform the general matrix inversion
584 *
585  CALL sltimer( 2 )
586  CALL pstrtri( uplo, 'Non unit', n, mem( ipa ), 1,
587  $ 1, desca, info )
588  CALL sltimer( 2 )
589 *
590  IF( check ) THEN
591 *
592 * Check for memory overwrite
593 *
594  CALL pschekpad( ictxt, 'PSTRTRI', np, nq,
595  $ mem( ipa-iprepad ),
596  $ desca( lld_ ),
597  $ iprepad, ipostpad, padval )
598  END IF
599 *
600  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
601 *
602 * Perform Cholesky factorization
603 *
604  CALL sltimer( 1 )
605  CALL pspotrf( uplo, n, mem( ipa ), 1, 1, desca,
606  $ info )
607  CALL sltimer( 1 )
608 *
609  IF( check ) THEN
610 *
611 * Check for memory overwrite
612 *
613  CALL pschekpad( ictxt, 'PSPOTRF', np, nq,
614  $ mem( ipa-iprepad ),
615  $ desca( lld_ ),
616  $ iprepad, ipostpad, padval )
617  END IF
618 *
619 * Perform the symmetric positive definite matrix
620 * inversion
621 *
622  CALL sltimer( 2 )
623  CALL pspotri( uplo, n, mem( ipa ), 1, 1, desca,
624  $ info )
625  CALL sltimer( 2 )
626 *
627  IF( check ) THEN
628 *
629 * Check for memory overwrite
630 *
631  CALL pschekpad( ictxt, 'PSPOTRI', np, nq,
632  $ mem( ipa-iprepad ),
633  $ desca( lld_ ),
634  $ iprepad, ipostpad, padval )
635  END IF
636 *
637  END IF
638 *
639  IF( check ) THEN
640 *
641  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
642  $ mem( ipw-iprepad ),
643  $ worksiz-ipostpad, iprepad,
644  $ ipostpad, padval )
645 *
646 * Compute fresid = || inv(A)*A-I ||
647 *
648  CALL psinvchk( mtyp, n, mem( ipa ), 1, 1, desca,
649  $ iaseed, anorm, fresid, rcond,
650  $ mem( ipw ) )
651 *
652 * Check for memory overwrite
653 *
654  CALL pschekpad( ictxt, 'PSINVCHK', np, nq,
655  $ mem( ipa-iprepad ),
656  $ desca( lld_ ),
657  $ iprepad, ipostpad, padval )
658  CALL pschekpad( ictxt, 'PSINVCHK',
659  $ worksiz-ipostpad, 1,
660  $ mem( ipw-iprepad ),
661  $ worksiz-ipostpad, iprepad,
662  $ ipostpad, padval )
663 *
664 * Test residual and detect NaN result
665 *
666  IF( fresid.LE.thresh .AND. info.EQ.0 .AND.
667  $ ( (fresid-fresid) .EQ. 0.0e+0 ) ) THEN
668  kpass = kpass + 1
669  passed = 'PASSED'
670  ELSE
671  kfail = kfail + 1
672  IF( info.GT.0 ) THEN
673  passed = 'SINGUL'
674  ELSE
675  passed = 'FAILED'
676  END IF
677  END IF
678 *
679  ELSE
680 *
681 * Don't perform the checking, only the timing
682 * operation
683 *
684  kpass = kpass + 1
685  fresid = fresid - fresid
686  passed = 'BYPASS'
687 *
688  END IF
689 *
690 * Gather maximum of all CPU and WALL clock timings
691 *
692  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1, wtime )
693  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1, ctime )
694 *
695 * Print results
696 *
697  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
698 *
699  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
700 *
701 * 2/3 N^3 - 1/2 N^2 flops for LU factorization
702 *
703  nops = ( 2.0d+0 / 3.0d+0 )*( dble( n )**3 ) -
704  $ ( 1.0d+0 / 2.0d+0 )*( dble( n )**2 )
705 *
706 * 4/3 N^3 - N^2 flops for inversion
707 *
708  nops = nops +
709  $ ( 4.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) -
710  $ dble( n )**2
711 *
712  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
713 *
714 * 1/3 N^3 + 2/3 N flops for triangular inversion
715 *
716  ctime(1) = 0.0d+0
717  wtime(1) = 0.0d+0
718  nops = ( 1.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
719  $ ( 2.0d+0 / 3.0d+0 ) * ( dble( n ) )
720 *
721  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
722 *
723 * 1/3 N^3 + 1/2 N^2 flops for Cholesky
724 * factorization
725 *
726  nops = ( 1.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
727  $ ( 1.0d+0 / 2.0d+0 ) * ( dble( n )**2 )
728 *
729 * 2/3 N^3 + 1/2 N^2 flops for Cholesky inversion
730 *
731  nops = nops +
732  $ ( 2.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
733  $ ( 1.0d+0 / 2.0d+0 ) * ( dble( n )**2 )
734 *
735  END IF
736 *
737 * Figure total megaflops -- factorization and
738 * inversion, for WALL and CPU time, and print
739 * output.
740 *
741 * Print WALL time if machine supports it
742 *
743  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
744  tmflops = nops /
745  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
746  ELSE
747  tmflops = 0.0d+0
748  END IF
749 *
750  IF( wtime( 2 ) .GE. 0.0d+0 )
751  $ WRITE( nout, fmt = 9993 ) 'WALL', n, nb, nprow,
752  $ npcol, wtime( 1 ), wtime( 2 ), tmflops,
753  $ rcond, fresid, passed
754 *
755 * Print CPU time if machine supports it
756 *
757  IF( ctime( 1 ) + ctime( 2 ) .GT. 0.0d+0 ) THEN
758  tmflops = nops /
759  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
760  ELSE
761  tmflops = 0.0d+0
762  END IF
763 *
764  IF( ctime( 2 ) .GE. 0.0d+0 )
765  $ WRITE( nout, fmt = 9993 ) 'CPU ', n, nb, nprow,
766  $ npcol, ctime( 1 ), ctime( 2 ), tmflops,
767  $ rcond, fresid, passed
768  END IF
769 *
770  10 CONTINUE
771 *
772  20 CONTINUE
773 *
774  CALL blacs_gridexit( ictxt )
775 *
776  30 CONTINUE
777 *
778  40 CONTINUE
779 *
780 * Print out ending messages and close output file
781 *
782  IF( iam.EQ.0 ) THEN
783  ktests = kpass + kfail + kskip
784  WRITE( nout, fmt = * )
785  WRITE( nout, fmt = 9992 ) ktests
786  IF( check ) THEN
787  WRITE( nout, fmt = 9991 ) kpass
788  WRITE( nout, fmt = 9989 ) kfail
789  ELSE
790  WRITE( nout, fmt = 9990 ) kpass
791  END IF
792  WRITE( nout, fmt = 9988 ) kskip
793  WRITE( nout, fmt = * )
794  WRITE( nout, fmt = * )
795  WRITE( nout, fmt = 9987 )
796  IF( nout.NE.6 .AND. nout.NE.0 )
797  $ CLOSE ( nout )
798  END IF
799 *
800  CALL blacs_exit( 0 )
801 *
802  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
803  $ '; It should be at least 1' )
804  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
805  $ i4 )
806  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
807  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
808  $ i11 )
809  9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ',
810  $ ' MFLOPS Cond Resid CHECK' )
811  9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ',
812  $ '----------- ------- ------- ------' )
813  9993 FORMAT( a4, 1x, i5, 1x, i3, 1x, i5, 1x, i5, 1x, f8.2, 1x, f8.2,
814  $ 1x, f11.2, 1x, f7.1, 1x, f7.2, 1x, a6 )
815  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
816  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
817  9990 FORMAT( i5, ' tests completed without checking.' )
818  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
819  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
820  9987 FORMAT( 'END OF TESTS.' )
821  9986 FORMAT( a )
822 *
823  stop
824 *
825 * End of PSINVDRIVER
826 *
827  END
max
#define max(A, B)
Definition: pcgemr.c:180
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
psinvdriver
program psinvdriver
Definition: psinvdriver.f:1
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
psinvchk
subroutine psinvchk(MATTYP, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, RCOND, WORK)
Definition: psinvchk.f:3
pschekpad
subroutine pschekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pschekpad.f:3
psgetrf
subroutine psgetrf(M, N, A, IA, JA, DESCA, IPIV, INFO)
Definition: psgetrf.f:2
psinvinfo
subroutine psinvinfo(SUMMRY, NOUT, NMTYP, MATTYP, LDMTYP, NMAT, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: psinvinfo.f:5
pslansy
real function pslansy(NORM, UPLO, N, A, IA, JA, DESCA, WORK)
Definition: pslansy.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
psgetri
subroutine psgetri(N, A, IA, JA, DESCA, IPIV, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psgetri.f:3
psmatgen
subroutine psmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: psmatgen.f:4
pspotri
subroutine pspotri(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pspotri.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
lsamen
logical function lsamen(N, CA, CB)
Definition: pblastst.f:1457
pstrtri
subroutine pstrtri(UPLO, DIAG, N, A, IA, JA, DESCA, INFO)
Definition: pstrtri.f:2
pspotrf
subroutine pspotrf(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pspotrf.f:2
psfillpad
subroutine psfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psfillpad.f:2
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863
pslantr
real function pslantr(NORM, UPLO, DIAG, M, N, A, IA, JA, DESCA, WORK)
Definition: pslantr.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2