SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psinvdriver.f
Go to the documentation of this file.
1 PROGRAM psinvdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* =======
10*
11* PSINVDRIVER is the main test program for the REAL
12* SCALAPACK matrix inversion routines. This test driver computes the
13* inverse of different kind of matrix and tests the results.
14*
15* The program must be driven by a short data file. An annotated example
16* of a data file can be obtained by deleting the first 3 characters
17* from the following 14 lines:
18* 'ScaLAPACK Matrix Inversion Testing input file'
19* 'PVM machine.'
20* 'INV.out' output file name (if any)
21* 6 device out
22* 5 number of matrix types (next line)
23* 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD
24* 4 number of problems sizes
25* 1000 2000 3000 4000 values of N
26* 3 number of NB's
27* 4 30 35 values of NB
28* 2 number of process grids (ordered P & Q)
29* 4 2 values of P
30* 4 4 values of Q
31* 1.0 threshold
32*
33* Internal Parameters
34* ===================
35*
36* TOTMEM INTEGER, default = 2000000
37* TOTMEM is a machine-specific parameter indicating the
38* maximum amount of available memory in bytes.
39* The user should customize TOTMEM to his platform. Remember
40* to leave room in memory for the operating system, the BLACS
41* buffer, etc. For example, on a system with 8 MB of memory
42* per process (e.g., one processor on an Intel iPSC/860), the
43* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
44* code, BLACS buffer, etc). However, for PVM, we usually set
45* TOTMEM = 2000000. Some experimenting with the maximum value
46* of TOTMEM may be required.
47*
48* INTGSZ INTEGER, default = 4 bytes.
49* REALSZ INTEGER, default = 4 bytes.
50* INTGSZ and REALSZ indicate the length in bytes on the
51* given platform for an integer and a single precision real.
52* MEM REAL array, dimension ( TOTMEM / REALSZ )
53*
54* All arrays used by SCALAPACK routines are allocated from
55* this array and referenced by pointers. The integer IPA,
56* for example, is a pointer to the starting element of MEM for
57* the matrix A.
58*
59* =====================================================================
60*
61* .. Parameters ..
62 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
63 $ lld_, mb_, m_, nb_, n_, rsrc_
64 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
65 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
66 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
67 INTEGER intgsz, memsiz, ntests, realsz, totmem
68 REAL padval, zero
69 parameter( intgsz = 4, realsz = 4, totmem = 2000000,
70 $ memsiz = totmem / realsz, ntests = 20,
71 $ padval = -9923.0e+0, zero = 0.0e+0 )
72* ..
73* .. Local Scalars ..
74 CHARACTER uplo
75 CHARACTER*3 mtyp
76 CHARACTER*6 passed
77 CHARACTER*80 outfile
78 LOGICAL check
79 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
80 $ ippiv, iprepad, ipostpad, ipiw, ipw, itemp, j,
81 $ k, ktests, kpass, kfail, kskip, l, lcm, lipiv,
82 $ liwork, lwork, mycol, myrow, n, nb, ngrids,
83 $ nmat, nmtyp, nnb, nout, np, npcol, nprocs,
84 $ nprow, nq, workiinv, workinv, worksiz
85 REAL anorm, fresid, rcond, thresh
86 DOUBLE PRECISION nops, tmflops
87* ..
88* .. Local Arrays ..
89 CHARACTER*3 mattyp( ntests )
90 INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
91 $ nval( ntests ), pval( ntests ),
92 $ qval( ntests )
93 REAL mem( memsiz )
94 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
95* ..
96* .. External Subroutines ..
97 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
98 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
99 $ blacs_pinfo, descinit, igsum2d, pschekpad,
104* ..
105* .. External Functions ..
106 LOGICAL lsamen
107 INTEGER iceil, ilcm, numroc
108 REAL pslange, pslansy, pslantr
109 EXTERNAL iceil, ilcm, lsamen, numroc, pslange,
111* ..
112* .. Intrinsic Functions ..
113 INTRINSIC dble, max
114* ..
115* .. Data Statements ..
116 DATA ktests, kpass, kfail, kskip /4*0/
117* ..
118* .. Executable Statements ..
119*
120* Get starting information
121*
122 CALL blacs_pinfo( iam, nprocs )
123 iaseed = 100
124 CALL psinvinfo( outfile, nout, nmtyp, mattyp, ntests, nmat, nval,
125 $ ntests, nnb, nbval, ntests, ngrids, pval, ntests,
126 $ qval, ntests, thresh, mem, iam, nprocs )
127 check = ( thresh.GE.0.0e+0 )
128*
129* Loop over the different matrix types
130*
131 DO 40 i = 1, nmtyp
132*
133 mtyp = mattyp( i )
134*
135* Print headings
136*
137 IF( iam.EQ.0 ) THEN
138 WRITE( nout, fmt = * )
139 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
140 WRITE( nout, fmt = 9986 )
141 $ 'A is a general matrix.'
142 ELSE IF( lsamen( 3, mtyp, 'UTR' ) ) THEN
143 WRITE( nout, fmt = 9986 )
144 $ 'A is an upper triangular matrix.'
145 ELSE IF( lsamen( 3, mtyp, 'LTR' ) ) THEN
146 WRITE( nout, fmt = 9986 )
147 $ 'A is a lower triangular matrix.'
148 ELSE IF( lsamen( 3, mtyp, 'UPD' ) ) THEN
149 WRITE( nout, fmt = 9986 )
150 $ 'A is a symmetric positive definite matrix.'
151 WRITE( nout, fmt = 9986 )
152 $ 'Only the upper triangular part will be '//
153 $ 'referenced.'
154 ELSE IF( lsamen( 3, mtyp, 'LPD' ) ) THEN
155 WRITE( nout, fmt = 9986 )
156 $ 'A is a symmetric positive definite matrix.'
157 WRITE( nout, fmt = 9986 )
158 $ 'Only the lower triangular part will be '//
159 $ 'referenced.'
160 END IF
161 WRITE( nout, fmt = * )
162 WRITE( nout, fmt = 9995 )
163 WRITE( nout, fmt = 9994 )
164 WRITE( nout, fmt = * )
165 END IF
166*
167* Loop over different process grids
168*
169 DO 30 j = 1, ngrids
170*
171 nprow = pval( j )
172 npcol = qval( j )
173*
174* Make sure grid information is correct
175*
176 ierr( 1 ) = 0
177 IF( nprow.LT.1 ) THEN
178 IF( iam.EQ.0 )
179 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
180 ierr( 1 ) = 1
181 ELSE IF( npcol.LT.1 ) THEN
182 IF( iam.EQ.0 )
183 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
184 ierr( 1 ) = 1
185 ELSE IF( nprow*npcol.GT.nprocs ) THEN
186 IF( iam.EQ.0 )
187 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
188 ierr( 1 ) = 1
189 END IF
190*
191 IF( ierr( 1 ).GT.0 ) THEN
192 IF( iam.EQ.0 )
193 $ WRITE( nout, fmt = 9997 ) 'grid'
194 kskip = kskip + 1
195 GO TO 30
196 END IF
197*
198* Define process grid
199*
200 CALL blacs_get( -1, 0, ictxt )
201 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
202 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
203*
204* Go to bottom of loop if this case doesn't use my process
205*
206 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
207 $ GO TO 30
208*
209 DO 20 k = 1, nmat
210*
211 n = nval( k )
212*
213* Make sure matrix information is correct
214*
215 ierr( 1 ) = 0
216 IF( n.LT.1 ) THEN
217 IF( iam.EQ.0 )
218 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
219 ierr( 1 ) = 1
220 END IF
221*
222* Make sure no one had error
223*
224 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
225*
226 IF( ierr( 1 ).GT.0 ) THEN
227 IF( iam.EQ.0 )
228 $ WRITE( nout, fmt = 9997 ) 'matrix'
229 kskip = kskip + 1
230 GO TO 20
231 END IF
232*
233* Loop over different blocking sizes
234*
235 DO 10 l = 1, nnb
236*
237 nb = nbval( l )
238*
239* Make sure nb is legal
240*
241 ierr( 1 ) = 0
242 IF( nb.LT.1 ) THEN
243 ierr( 1 ) = 1
244 IF( iam.EQ.0 )
245 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
246 END IF
247*
248* Check all processes for an error
249*
250 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
251 $ 0 )
252*
253 IF( ierr( 1 ).GT.0 ) THEN
254 IF( iam.EQ.0 )
255 $ WRITE( nout, fmt = 9997 ) 'NB'
256 kskip = kskip + 1
257 GO TO 10
258 END IF
259*
260* Padding constants
261*
262 np = numroc( n, nb, myrow, 0, nprow )
263 nq = numroc( n, nb, mycol, 0, npcol )
264 IF( check ) THEN
265 iprepad = max( nb, np )
266 imidpad = nb
267 ipostpad = max( nb, nq )
268 ELSE
269 iprepad = 0
270 imidpad = 0
271 ipostpad = 0
272 END IF
273*
274* Initialize the array descriptor for the matrix A
275*
276 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
277 $ max( 1, np ) + imidpad, ierr( 1 ) )
278*
279* Check all processes for an error
280*
281 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
282 $ 0 )
283*
284 IF( ierr( 1 ).LT.0 ) THEN
285 IF( iam.EQ.0 )
286 $ WRITE( nout, fmt = 9997 ) 'descriptor'
287 kskip = kskip + 1
288 GO TO 10
289 END IF
290*
291* Assign pointers into MEM for ScaLAPACK arrays, A is
292* allocated starting at position MEM( IPREPAD+1 )
293*
294 ipa = iprepad+1
295*
296 lcm = ilcm( nprow, npcol )
297 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
298*
299* Pivots are needed by LU factorization
300*
301 ippiv = ipa + desca( lld_ ) * nq + ipostpad +
302 $ iprepad
303 lipiv = iceil( intgsz * ( np + nb ), realsz )
304 ipw = ippiv + lipiv + ipostpad + iprepad
305*
306 lwork = max( 1, np * desca( nb_ ) )
307 workinv = lwork + ipostpad
308*
309* Figure the amount of workspace required by the
310* general matrix inversion
311*
312 IF( nprow.EQ.npcol ) THEN
313 liwork = nq + desca( nb_ )
314 ELSE
315*
316* change the integer workspace needed for PDGETRI
317* LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) *
318* $ ICEIL( ICEIL( DESCA( LLD_ ),
319* $ DESCA( MB_ ) ), LCM / NPROW ) )
320* $ + NQ
321 liwork = numroc( desca( m_ ) +
322 $ desca( mb_ ) * nprow
323 $ + mod( 1 - 1, desca( mb_ ) ), desca( nb_ ),
324 $ mycol, desca( csrc_ ), npcol ) +
325 $ max( desca( mb_ ) * iceil( iceil(
326 $ numroc( desca( m_ ) + desca( mb_ ) * nprow,
327 $ desca( mb_ ), myrow, desca( rsrc_ ), nprow ),
328 $ desca( mb_ ) ), lcm / nprow ), desca( nb_ ) )
329*
330 END IF
331 workiinv = iceil( liwork*intgsz, realsz ) +
332 $ ipostpad
333 ipiw = ipw + workinv + iprepad
334 worksiz = workinv + iprepad + workiinv
335*
336 ELSE
337*
338* No pivots or workspace needed for triangular or
339* symmetric positive definite matrices.
340*
341 ipw = ipa + desca( lld_ ) * nq + ipostpad + iprepad
342 worksiz = 1 + ipostpad
343*
344 END IF
345*
346 IF( check ) THEN
347*
348* Figure amount of work space for the norm
349* computations
350*
351 IF( lsamen( 3, mtyp, 'GEN' ).OR.
352 $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
353 itemp = nq
354 ELSE
355 itemp = 2 * nq + np
356 IF( nprow.NE.npcol ) THEN
357 itemp = itemp +
358 $ nb * iceil( iceil( np, nb ),
359 $ lcm / nprow )
360 END IF
361 END IF
362 worksiz = max( worksiz-ipostpad, itemp )
363*
364* Figure the amount of workspace required by the
365* checking routine
366*
367 worksiz = max( worksiz, 2 * nb * max( 1, np ) ) +
368 $ ipostpad
369*
370 END IF
371*
372* Check for adequate memory for problem size
373*
374 ierr( 1 ) = 0
375 IF( ipw+worksiz.GT.memsiz ) THEN
376 IF( iam.EQ.0 )
377 $ WRITE( nout, fmt = 9996 ) 'inversion',
378 $ ( ipw + worksiz ) * realsz
379 ierr( 1 ) = 1
380 END IF
381*
382* Check all processes for an error
383*
384 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
385 $ 0 )
386*
387 IF( ierr( 1 ).GT.0 ) THEN
388 IF( iam.EQ.0 )
389 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
390 kskip = kskip + 1
391 GO TO 10
392 END IF
393*
394 IF( lsamen( 3, mtyp, 'GEN' ).OR.
395 $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
396*
397* Generate a general diagonally dominant matrix A
398*
399 CALL psmatgen( ictxt, 'N', 'D', desca( m_ ),
400 $ desca( n_ ), desca( mb_ ),
401 $ desca( nb_ ), mem( ipa ),
402 $ desca( lld_ ), desca( rsrc_ ),
403 $ desca( csrc_ ), iaseed, 0, np, 0,
404 $ nq, myrow, mycol, nprow, npcol )
405*
406 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
407*
408* Generate a symmetric positive definite matrix
409*
410 CALL psmatgen( ictxt, 'S', 'D', desca( m_ ),
411 $ desca( n_ ), desca( mb_ ),
412 $ desca( nb_ ), mem( ipa ),
413 $ desca( lld_ ), desca( rsrc_ ),
414 $ desca( csrc_ ), iaseed, 0, np, 0,
415 $ nq, myrow, mycol, nprow, npcol )
416*
417 END IF
418*
419* Zeros not-referenced part of A, if any.
420*
421 IF( lsamen( 1, mtyp, 'U' ) ) THEN
422*
423 uplo = 'U'
424 CALL pslaset( 'Lower', n-1, n-1, zero, zero,
425 $ mem( ipa ), 2, 1, desca )
426*
427 ELSE IF( lsamen( 1, mtyp, 'L' ) ) THEN
428*
429 uplo = 'L'
430 CALL pslaset( 'Upper', n-1, n-1, zero, zero,
431 $ mem( ipa ), 1, 2, desca )
432*
433 ELSE
434*
435 uplo = 'G'
436*
437 END IF
438*
439* Need 1-norm of A for checking
440*
441 IF( check ) THEN
442*
443 CALL psfillpad( ictxt, np, nq, mem( ipa-iprepad ),
444 $ desca( lld_ ), iprepad, ipostpad,
445 $ padval )
446 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
447 $ mem( ipw-iprepad ),
448 $ worksiz-ipostpad, iprepad,
449 $ ipostpad, padval )
450*
451 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
452*
453 CALL psfillpad( ictxt, lipiv, 1,
454 $ mem( ippiv-iprepad ), lipiv,
455 $ iprepad, ipostpad, padval )
456 anorm = pslange( '1', n, n, mem( ipa ), 1, 1,
457 $ desca, mem( ipw ) )
458 CALL pschekpad( ictxt, 'PSLANGE', np, nq,
459 $ mem( ipa-iprepad ),
460 $ desca( lld_ ),
461 $ iprepad, ipostpad, padval )
462 CALL pschekpad( ictxt, 'PSLANGE',
463 $ worksiz-ipostpad, 1,
464 $ mem( ipw-iprepad ),
465 $ worksiz-ipostpad,
466 $ iprepad, ipostpad, padval )
467 CALL psfillpad( ictxt, workinv-ipostpad, 1,
468 $ mem( ipw-iprepad ),
469 $ workinv-ipostpad,
470 $ iprepad, ipostpad, padval )
471 CALL psfillpad( ictxt, workiinv-ipostpad, 1,
472 $ mem( ipiw-iprepad ),
473 $ workiinv-ipostpad, iprepad,
474 $ ipostpad, padval )
475 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
476*
477 anorm = pslantr( '1', uplo, 'Non unit', n, n,
478 $ mem( ipa ), 1, 1, desca,
479 $ mem( ipw ) )
480 CALL pschekpad( ictxt, 'PSLANTR', np, nq,
481 $ mem( ipa-iprepad ),
482 $ desca( lld_ ),
483 $ iprepad, ipostpad, padval )
484 CALL pschekpad( ictxt, 'PSLANTR',
485 $ worksiz-ipostpad, 1,
486 $ mem( ipw-iprepad ),
487 $ worksiz-ipostpad,
488 $ iprepad, ipostpad, padval )
489*
490 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
491*
492 anorm = pslansy( '1', uplo, n, mem( ipa ), 1, 1,
493 $ desca, mem( ipw ) )
494 CALL pschekpad( ictxt, 'PSLANSY', np, nq,
495 $ mem( ipa-iprepad ),
496 $ desca( lld_ ),
497 $ iprepad, ipostpad, padval )
498 CALL pschekpad( ictxt, 'PSLANSY',
499 $ worksiz-ipostpad, 1,
500 $ mem( ipw-iprepad ),
501 $ worksiz-ipostpad,
502 $ iprepad, ipostpad, padval )
503*
504 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'SY' ) ) THEN
505*
506 CALL psfillpad( ictxt, lipiv, 1,
507 $ mem( ippiv-iprepad ), lipiv,
508 $ iprepad, ipostpad, padval )
509 anorm = pslansy( '1', uplo, n, mem( ipa ), 1, 1,
510 $ desca, mem( ipw ) )
511 CALL pschekpad( ictxt, 'PSLANSY', np, nq,
512 $ mem( ipa-iprepad ),
513 $ desca( lld_ ),
514 $ iprepad, ipostpad, padval )
515 CALL pschekpad( ictxt, 'PSLANSY',
516 $ worksiz-ipostpad, 1,
517 $ mem( ipw-iprepad ),
518 $ worksiz-ipostpad,
519 $ iprepad,ipostpad, padval )
520*
521 END IF
522*
523 END IF
524*
525 CALL slboot()
526 CALL blacs_barrier( ictxt, 'All' )
527*
528 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
529*
530* Perform LU factorization
531*
532 CALL sltimer( 1 )
533 CALL psgetrf( n, n, mem( ipa ), 1, 1, desca,
534 $ mem( ippiv ), info )
535 CALL sltimer( 1 )
536*
537 IF( check ) THEN
538*
539* Check for memory overwrite
540*
541 CALL pschekpad( ictxt, 'PSGETRF', np, nq,
542 $ mem( ipa-iprepad ),
543 $ desca( lld_ ),
544 $ iprepad, ipostpad, padval )
545 CALL pschekpad( ictxt, 'PSGETRF', lipiv, 1,
546 $ mem( ippiv-iprepad ), lipiv,
547 $ iprepad, ipostpad, padval )
548 END IF
549*
550* Perform the general matrix inversion
551*
552 CALL sltimer( 2 )
553 CALL psgetri( n, mem( ipa ), 1, 1, desca,
554 $ mem( ippiv ), mem( ipw ), lwork,
555 $ mem( ipiw ), liwork, info )
556 CALL sltimer( 2 )
557*
558 IF( check ) THEN
559*
560* Check for memory overwrite
561*
562 CALL pschekpad( ictxt, 'PSGETRI', np, nq,
563 $ mem( ipa-iprepad ),
564 $ desca( lld_ ),
565 $ iprepad, ipostpad, padval )
566 CALL pschekpad( ictxt, 'PSGETRI', lipiv, 1,
567 $ mem( ippiv-iprepad ), lipiv,
568 $ iprepad, ipostpad, padval )
569 CALL pschekpad( ictxt, 'PSGETRI',
570 $ workiinv-ipostpad, 1,
571 $ mem( ipiw-iprepad ),
572 $ workiinv-ipostpad,
573 $ iprepad, ipostpad, padval )
574 CALL pschekpad( ictxt, 'PSGETRI',
575 $ workinv-ipostpad, 1,
576 $ mem( ipw-iprepad ),
577 $ workinv-ipostpad,
578 $ iprepad, ipostpad, padval )
579 END IF
580*
581 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
582*
583* Perform the general matrix inversion
584*
585 CALL sltimer( 2 )
586 CALL pstrtri( uplo, 'Non unit', n, mem( ipa ), 1,
587 $ 1, desca, info )
588 CALL sltimer( 2 )
589*
590 IF( check ) THEN
591*
592* Check for memory overwrite
593*
594 CALL pschekpad( ictxt, 'PSTRTRI', np, nq,
595 $ mem( ipa-iprepad ),
596 $ desca( lld_ ),
597 $ iprepad, ipostpad, padval )
598 END IF
599*
600 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
601*
602* Perform Cholesky factorization
603*
604 CALL sltimer( 1 )
605 CALL pspotrf( uplo, n, mem( ipa ), 1, 1, desca,
606 $ info )
607 CALL sltimer( 1 )
608*
609 IF( check ) THEN
610*
611* Check for memory overwrite
612*
613 CALL pschekpad( ictxt, 'PSPOTRF', np, nq,
614 $ mem( ipa-iprepad ),
615 $ desca( lld_ ),
616 $ iprepad, ipostpad, padval )
617 END IF
618*
619* Perform the symmetric positive definite matrix
620* inversion
621*
622 CALL sltimer( 2 )
623 CALL pspotri( uplo, n, mem( ipa ), 1, 1, desca,
624 $ info )
625 CALL sltimer( 2 )
626*
627 IF( check ) THEN
628*
629* Check for memory overwrite
630*
631 CALL pschekpad( ictxt, 'PSPOTRI', np, nq,
632 $ mem( ipa-iprepad ),
633 $ desca( lld_ ),
634 $ iprepad, ipostpad, padval )
635 END IF
636*
637 END IF
638*
639 IF( check ) THEN
640*
641 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
642 $ mem( ipw-iprepad ),
643 $ worksiz-ipostpad, iprepad,
644 $ ipostpad, padval )
645*
646* Compute fresid = || inv(A)*A-I ||
647*
648 CALL psinvchk( mtyp, n, mem( ipa ), 1, 1, desca,
649 $ iaseed, anorm, fresid, rcond,
650 $ mem( ipw ) )
651*
652* Check for memory overwrite
653*
654 CALL pschekpad( ictxt, 'PSINVCHK', np, nq,
655 $ mem( ipa-iprepad ),
656 $ desca( lld_ ),
657 $ iprepad, ipostpad, padval )
658 CALL pschekpad( ictxt, 'PSINVCHK',
659 $ worksiz-ipostpad, 1,
660 $ mem( ipw-iprepad ),
661 $ worksiz-ipostpad, iprepad,
662 $ ipostpad, padval )
663*
664* Test residual and detect NaN result
665*
666 IF( fresid.LE.thresh .AND. info.EQ.0 .AND.
667 $ ( (fresid-fresid) .EQ. 0.0e+0 ) ) THEN
668 kpass = kpass + 1
669 passed = 'PASSED'
670 ELSE
671 kfail = kfail + 1
672 IF( info.GT.0 ) THEN
673 passed = 'SINGUL'
674 ELSE
675 passed = 'FAILED'
676 END IF
677 END IF
678*
679 ELSE
680*
681* Don't perform the checking, only the timing
682* operation
683*
684 kpass = kpass + 1
685 fresid = fresid - fresid
686 passed = 'BYPASS'
687*
688 END IF
689*
690* Gather maximum of all CPU and WALL clock timings
691*
692 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1, wtime )
693 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1, ctime )
694*
695* Print results
696*
697 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
698*
699 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
700*
701* 2/3 N^3 - 1/2 N^2 flops for LU factorization
702*
703 nops = ( 2.0d+0 / 3.0d+0 )*( dble( n )**3 ) -
704 $ ( 1.0d+0 / 2.0d+0 )*( dble( n )**2 )
705*
706* 4/3 N^3 - N^2 flops for inversion
707*
708 nops = nops +
709 $ ( 4.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) -
710 $ dble( n )**2
711*
712 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
713*
714* 1/3 N^3 + 2/3 N flops for triangular inversion
715*
716 ctime(1) = 0.0d+0
717 wtime(1) = 0.0d+0
718 nops = ( 1.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
719 $ ( 2.0d+0 / 3.0d+0 ) * ( dble( n ) )
720*
721 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
722*
723* 1/3 N^3 + 1/2 N^2 flops for Cholesky
724* factorization
725*
726 nops = ( 1.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
727 $ ( 1.0d+0 / 2.0d+0 ) * ( dble( n )**2 )
728*
729* 2/3 N^3 + 1/2 N^2 flops for Cholesky inversion
730*
731 nops = nops +
732 $ ( 2.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
733 $ ( 1.0d+0 / 2.0d+0 ) * ( dble( n )**2 )
734*
735 END IF
736*
737* Figure total megaflops -- factorization and
738* inversion, for WALL and CPU time, and print
739* output.
740*
741* Print WALL time if machine supports it
742*
743 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
744 tmflops = nops /
745 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
746 ELSE
747 tmflops = 0.0d+0
748 END IF
749*
750 IF( wtime( 2 ) .GE. 0.0d+0 )
751 $ WRITE( nout, fmt = 9993 ) 'WALL', n, nb, nprow,
752 $ npcol, wtime( 1 ), wtime( 2 ), tmflops,
753 $ rcond, fresid, passed
754*
755* Print CPU time if machine supports it
756*
757 IF( ctime( 1 ) + ctime( 2 ) .GT. 0.0d+0 ) THEN
758 tmflops = nops /
759 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
760 ELSE
761 tmflops = 0.0d+0
762 END IF
763*
764 IF( ctime( 2 ) .GE. 0.0d+0 )
765 $ WRITE( nout, fmt = 9993 ) 'CPU ', n, nb, nprow,
766 $ npcol, ctime( 1 ), ctime( 2 ), tmflops,
767 $ rcond, fresid, passed
768 END IF
769*
770 10 CONTINUE
771*
772 20 CONTINUE
773*
774 CALL blacs_gridexit( ictxt )
775*
776 30 CONTINUE
777*
778 40 CONTINUE
779*
780* Print out ending messages and close output file
781*
782 IF( iam.EQ.0 ) THEN
783 ktests = kpass + kfail + kskip
784 WRITE( nout, fmt = * )
785 WRITE( nout, fmt = 9992 ) ktests
786 IF( check ) THEN
787 WRITE( nout, fmt = 9991 ) kpass
788 WRITE( nout, fmt = 9989 ) kfail
789 ELSE
790 WRITE( nout, fmt = 9990 ) kpass
791 END IF
792 WRITE( nout, fmt = 9988 ) kskip
793 WRITE( nout, fmt = * )
794 WRITE( nout, fmt = * )
795 WRITE( nout, fmt = 9987 )
796 IF( nout.NE.6 .AND. nout.NE.0 )
797 $ CLOSE ( nout )
798 END IF
799*
800 CALL blacs_exit( 0 )
801*
802 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
803 $ '; It should be at least 1' )
804 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
805 $ i4 )
806 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
807 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
808 $ i11 )
809 9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ',
810 $ ' MFLOPS Cond Resid CHECK' )
811 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ',
812 $ '----------- ------- ------- ------' )
813 9993 FORMAT( a4, 1x, i5, 1x, i3, 1x, i5, 1x, i5, 1x, f8.2, 1x, f8.2,
814 $ 1x, f11.2, 1x, f7.1, 1x, f7.2, 1x, a6 )
815 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
816 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
817 9990 FORMAT( i5, ' tests completed without checking.' )
818 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
819 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
820 9987 FORMAT( 'END OF TESTS.' )
821 9986 FORMAT( a )
822*
823 stop
824*
825* End of PSINVDRIVER
826*
827 END
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
logical function lsamen(n, ca, cb)
Definition pblastst.f:1457
#define max(A, B)
Definition pcgemr.c:180
subroutine pslaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition psblastst.f:6863
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition psgetrf.f:2
subroutine psgetri(n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
Definition psgetri.f:3
subroutine psinvchk(mattyp, n, a, ia, ja, desca, iaseed, anorm, fresid, rcond, work)
Definition psinvchk.f:3
program psinvdriver
Definition psinvdriver.f:1
subroutine psinvinfo(summry, nout, nmtyp, mattyp, ldmtyp, nmat, nval, ldnval, nnb, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition psinvinfo.f:5
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
real function pslansy(norm, uplo, n, a, ia, ja, desca, work)
Definition pslansy.f:3
real function pslantr(norm, uplo, diag, m, n, a, ia, ja, desca, work)
Definition pslantr.f:3
subroutine pspotrf(uplo, n, a, ia, ja, desca, info)
Definition pspotrf.f:2
subroutine pspotri(uplo, n, a, ia, ja, desca, info)
Definition pspotri.f:2
subroutine pstrtri(uplo, diag, n, a, ia, ja, desca, info)
Definition pstrtri.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267