|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 PROGRAM PCLLTDRIVER 00002 * 00003 * -- ScaLAPACK testing driver (version 1.7) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * May 1, 1997 00007 * 00008 * Purpose 00009 * ======= 00010 * 00011 * PCLLTDRIVER is the main test program for the COMPLEX 00012 * ScaLAPACK Cholesky routines. This test driver performs an 00013 * A = L*L**H or A = U**H*U factorization and solve, and optionally 00014 * performs condition estimation and iterative refinement. 00015 * 00016 * The program must be driven by a short data file. An annotated 00017 * example of a data file can be obtained by deleting the first 3 00018 * characters from the following 18 lines: 00019 * 'ScaLAPACK LLt factorization input file' 00020 * 'Intel iPSC/860 hypercube, gamma model.' 00021 * 'LLT.out' output file name (if any) 00022 * 6 device out 00023 * 'U' define Lower or Upper 00024 * 1 number of problems sizes 00025 * 31 100 200 values of N 00026 * 1 number of NB's 00027 * 2 10 24 values of NB 00028 * 1 number of NRHS's 00029 * 1 values of NRHS 00030 * 1 Number of NBRHS's 00031 * 1 values of NBRHS 00032 * 1 number of process grids (ordered pairs of P & Q) 00033 * 2 values of P 00034 * 2 values of Q 00035 * 1.0 threshold 00036 * T (T or F) Test Cond. Est. and Iter. Ref. Routines 00037 * 00038 * 00039 * Internal Parameters 00040 * =================== 00041 * 00042 * TOTMEM INTEGER, default = 2000000 00043 * TOTMEM is a machine-specific parameter indicating the 00044 * maximum amount of available memory in bytes. 00045 * The user should customize TOTMEM to his platform. Remember 00046 * to leave room in memory for the operating system, the BLACS 00047 * buffer, etc. For example, on a system with 8 MB of memory 00048 * per process (e.g., one processor on an Intel iPSC/860), the 00049 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, 00050 * code, BLACS buffer, etc). However, for PVM, we usually set 00051 * TOTMEM = 2000000. Some experimenting with the maximum value 00052 * of TOTMEM may be required. 00053 * 00054 * INTGSZ INTEGER, default = 4 bytes. 00055 * CPLXSZ INTEGER, default = 8 bytes. 00056 * INTGSZ and CPLXSZ indicate the length in bytes on the 00057 * given platform for an integer and a single precision 00058 * complex. 00059 * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) 00060 * 00061 * All arrays used by SCALAPACK routines are allocated from 00062 * this array and referenced by pointers. The integer IPA, 00063 * for example, is a pointer to the starting element of MEM for 00064 * the matrix A. 00065 * 00066 * ===================================================================== 00067 * 00068 * .. Parameters .. 00069 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, 00070 $ LLD_, MB_, M_, NB_, N_, RSRC_ 00071 PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, 00072 $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, 00073 $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) 00074 INTEGER CPLXSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM 00075 REAL ZERO 00076 COMPLEX PADVAL 00077 PARAMETER ( CPLXSZ = 8, REALSZ = 4, TOTMEM = 2000000, 00078 $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, 00079 $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), 00080 $ ZERO = 0.0E+0 ) 00081 * .. 00082 * .. Local Scalars .. 00083 LOGICAL CHECK, EST 00084 CHARACTER UPLO 00085 CHARACTER*6 PASSED 00086 CHARACTER*80 OUTFILE 00087 INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, 00088 $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, 00089 $ IPREPAD, IPOSTPAD, IPW, IPW2, ITEMP, J, K, 00090 $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, 00091 $ LRWORK, LWORK, LW2, MYCOL, MYRHS, MYROW, N, NB, 00092 $ NBRHS, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, 00093 $ NPCOL, NPROCS, NPROW, NQ, NRHS, WORKSIZ 00094 REAL ANORM, ANORM1, FRESID, RCOND, SRESID, SRESID2, 00095 $ THRESH 00096 DOUBLE PRECISION NOPS, TMFLOPS 00097 * .. 00098 * .. Local Arrays .. 00099 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), 00100 $ NBRVAL( NTESTS ), NBVAL( NTESTS ), 00101 $ NRVAL( NTESTS ), NVAL( NTESTS ), 00102 $ PVAL( NTESTS ), QVAL( NTESTS ) 00103 DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) 00104 COMPLEX MEM( MEMSIZ ) 00105 * .. 00106 * .. External Subroutines .. 00107 EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GRIDEXIT, 00108 $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, 00109 $ IGSUM2D, BLACS_PINFO, PCCHEKPAD, PCFILLPAD, 00110 $ PCLAFCHK, PCLASCHK, PCLLTINFO, 00111 $ PCMATGEN, PCPOCON, PCPORFS, 00112 $ PCPOTRF, PCPOTRRV, PCPOTRS, SLBOOT, 00113 $ SLCOMBINE, SLTIMER 00114 * .. 00115 * .. External Functions .. 00116 LOGICAL LSAME 00117 INTEGER ICEIL, ILCM, NUMROC 00118 REAL PCLANHE 00119 EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PCLANHE 00120 * .. 00121 * .. Intrinsic Functions .. 00122 INTRINSIC DBLE, MAX, MIN 00123 * .. 00124 * .. Data Statements .. 00125 DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / 00126 * .. 00127 * .. Executable Statements .. 00128 * 00129 * Get starting information 00130 * 00131 CALL BLACS_PINFO( IAM, NPROCS ) 00132 IASEED = 100 00133 IBSEED = 200 00134 CALL PCLLTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, 00135 $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, 00136 $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, 00137 $ THRESH, EST, MEM, IAM, NPROCS ) 00138 CHECK = ( THRESH.GE.0.0E+0 ) 00139 * 00140 * Print headings 00141 * 00142 IF( IAM.EQ.0 ) THEN 00143 WRITE( NOUT, FMT = * ) 00144 WRITE( NOUT, FMT = 9995 ) 00145 WRITE( NOUT, FMT = 9994 ) 00146 WRITE( NOUT, FMT = * ) 00147 END IF 00148 * 00149 * Loop over different process grids 00150 * 00151 DO 50 I = 1, NGRIDS 00152 * 00153 NPROW = PVAL( I ) 00154 NPCOL = QVAL( I ) 00155 * 00156 * Make sure grid information is correct 00157 * 00158 IERR( 1 ) = 0 00159 IF( NPROW.LT.1 ) THEN 00160 IF( IAM.EQ.0 ) 00161 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW 00162 IERR( 1 ) = 1 00163 ELSE IF( NPCOL.LT.1 ) THEN 00164 IF( IAM.EQ.0 ) 00165 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL 00166 IERR( 1 ) = 1 00167 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN 00168 IF( IAM.EQ.0 ) 00169 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS 00170 IERR( 1 ) = 1 00171 END IF 00172 * 00173 IF( IERR( 1 ).GT.0 ) THEN 00174 IF( IAM.EQ.0 ) 00175 $ WRITE( NOUT, FMT = 9997 ) 'grid' 00176 KSKIP = KSKIP + 1 00177 GO TO 50 00178 END IF 00179 * 00180 * Define process grid 00181 * 00182 CALL BLACS_GET( -1, 0, ICTXT ) 00183 CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) 00184 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 00185 * 00186 * Go to bottom of process grid loop if this case doesn't use my 00187 * process 00188 * 00189 IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) 00190 $ GO TO 50 00191 * 00192 DO 40 J = 1, NMAT 00193 * 00194 N = NVAL( J ) 00195 * 00196 * Make sure matrix information is correct 00197 * 00198 IERR( 1 ) = 0 00199 IF( N.LT.1 ) THEN 00200 IF( IAM.EQ.0 ) 00201 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 00202 IERR( 1 ) = 1 00203 ELSE IF( N.LT.1 ) THEN 00204 IF( IAM.EQ.0 ) 00205 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 00206 IERR( 1 ) = 1 00207 END IF 00208 * 00209 * Check all processes for an error 00210 * 00211 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 00212 * 00213 IF( IERR( 1 ).GT.0 ) THEN 00214 IF( IAM.EQ.0 ) 00215 $ WRITE( NOUT, FMT = 9997 ) 'matrix' 00216 KSKIP = KSKIP + 1 00217 GO TO 40 00218 END IF 00219 * 00220 DO 30 K = 1, NNB 00221 * 00222 NB = NBVAL( K ) 00223 * 00224 * Make sure nb is legal 00225 * 00226 IERR( 1 ) = 0 00227 IF( NB.LT.1 ) THEN 00228 IERR( 1 ) = 1 00229 IF( IAM.EQ.0 ) 00230 $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB 00231 END IF 00232 * 00233 * Check all processes for an error 00234 * 00235 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 00236 * 00237 IF( IERR( 1 ).GT.0 ) THEN 00238 IF( IAM.EQ.0 ) 00239 $ WRITE( NOUT, FMT = 9997 ) 'NB' 00240 KSKIP = KSKIP + 1 00241 GO TO 30 00242 END IF 00243 * 00244 * Padding constants 00245 * 00246 NP = NUMROC( N, NB, MYROW, 0, NPROW ) 00247 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) 00248 IF( CHECK ) THEN 00249 IPREPAD = MAX( NB, NP ) 00250 IMIDPAD = NB 00251 IPOSTPAD = MAX( NB, NQ ) 00252 ELSE 00253 IPREPAD = 0 00254 IMIDPAD = 0 00255 IPOSTPAD = 0 00256 END IF 00257 * 00258 * Initialize the array descriptor for the matrix A 00259 * 00260 CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, 00261 $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) 00262 * 00263 * Check all processes for an error 00264 * 00265 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 00266 * 00267 IF( IERR( 1 ).LT.0 ) THEN 00268 IF( IAM.EQ.0 ) 00269 $ WRITE( NOUT, FMT = 9997 ) 'descriptor' 00270 KSKIP = KSKIP + 1 00271 GO TO 30 00272 END IF 00273 * 00274 * Assign pointers into MEM for SCALAPACK arrays, A is 00275 * allocated starting at position MEM( IPREPAD+1 ) 00276 * 00277 IPA = IPREPAD+1 00278 IF( EST ) THEN 00279 IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD 00280 IPW = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD 00281 ELSE 00282 IPW = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD 00283 END IF 00284 * 00285 * 00286 IF( CHECK ) THEN 00287 * 00288 * Calculate the amount of workspace required by 00289 * the checking routines PCLAFCHK, PCPOTRRV, and 00290 * PCLANHE 00291 * 00292 WORKSIZ = NP * DESCA( NB_ ) 00293 * 00294 WORKSIZ = MAX( WORKSIZ, DESCA( MB_ ) * DESCA( NB_ ) ) 00295 * 00296 LCM = ILCM( NPROW, NPCOL ) 00297 ITEMP = MAX( 2, 2 * NQ ) + NP 00298 IF( NPROW.NE.NPCOL ) THEN 00299 ITEMP = ITEMP + 00300 $ NB * ICEIL( ICEIL( NP, NB ), LCM / NPROW ) 00301 END IF 00302 WORKSIZ = MAX( WORKSIZ, 00303 $ ICEIL( REALSZ * ITEMP, CPLXSZ ) ) 00304 WORKSIZ = WORKSIZ + IPOSTPAD 00305 * 00306 ELSE 00307 * 00308 WORKSIZ = IPOSTPAD 00309 * 00310 END IF 00311 * 00312 * Check for adequate memory for problem size 00313 * 00314 IERR( 1 ) = 0 00315 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN 00316 IF( IAM.EQ.0 ) 00317 $ WRITE( NOUT, FMT = 9996 ) 'factorization', 00318 $ ( IPW+WORKSIZ )*CPLXSZ 00319 IERR( 1 ) = 1 00320 END IF 00321 * 00322 * Check all processes for an error 00323 * 00324 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 00325 * 00326 IF( IERR( 1 ).GT.0 ) THEN 00327 IF( IAM.EQ.0 ) 00328 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00329 KSKIP = KSKIP + 1 00330 GO TO 30 00331 END IF 00332 * 00333 * Generate a Hermitian positive definite matrix A 00334 * 00335 CALL PCMATGEN( ICTXT, 'Herm', 'Diag', DESCA( M_ ), 00336 $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), 00337 $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), 00338 $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, 00339 $ MYROW, MYCOL, NPROW, NPCOL ) 00340 * 00341 * Calculate inf-norm of A for residual error-checking 00342 * 00343 IF( CHECK ) THEN 00344 CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), 00345 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, 00346 $ PADVAL ) 00347 CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, 00348 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, 00349 $ IPREPAD, IPOSTPAD, PADVAL ) 00350 ANORM = PCLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, 00351 $ DESCA, MEM( IPW ) ) 00352 ANORM1 = PCLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, 00353 $ DESCA, MEM( IPW ) ) 00354 CALL PCCHEKPAD( ICTXT, 'PCLANHE', NP, NQ, 00355 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), 00356 $ IPREPAD, IPOSTPAD, PADVAL ) 00357 CALL PCCHEKPAD( ICTXT, 'PCLANHE', WORKSIZ-IPOSTPAD, 00358 $ 1, MEM( IPW-IPREPAD ), 00359 $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, 00360 $ PADVAL ) 00361 END IF 00362 * 00363 IF( EST ) THEN 00364 CALL PCMATGEN( ICTXT, 'Herm', 'Diag', DESCA( M_ ), 00365 $ DESCA( N_ ), DESCA( MB_ ), 00366 $ DESCA( NB_ ), MEM( IPA0 ), 00367 $ DESCA( LLD_ ), DESCA( RSRC_ ), 00368 $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, 00369 $ MYROW, MYCOL, NPROW, NPCOL ) 00370 IF( CHECK ) 00371 $ CALL PCFILLPAD( ICTXT, NP, NQ, 00372 $ MEM( IPA0-IPREPAD ), DESCA( LLD_ ), 00373 $ IPREPAD, IPOSTPAD, PADVAL ) 00374 END IF 00375 * 00376 CALL SLBOOT() 00377 CALL BLACS_BARRIER( ICTXT, 'All' ) 00378 * 00379 * Perform LLt factorization 00380 * 00381 CALL SLTIMER( 1 ) 00382 * 00383 CALL PCPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, INFO ) 00384 * 00385 CALL SLTIMER( 1 ) 00386 * 00387 IF( INFO.NE.0 ) THEN 00388 IF( IAM.EQ.0 ) 00389 $ WRITE( NOUT, FMT = * ) 'PCPOTRF INFO=', INFO 00390 KFAIL = KFAIL + 1 00391 RCOND = ZERO 00392 GO TO 60 00393 END IF 00394 * 00395 IF( CHECK ) THEN 00396 * 00397 * Check for memory overwrite in LLt factorization 00398 * 00399 CALL PCCHEKPAD( ICTXT, 'PCPOTRF', NP, NQ, 00400 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), 00401 $ IPREPAD, IPOSTPAD, PADVAL ) 00402 END IF 00403 * 00404 IF( EST ) THEN 00405 * 00406 * Calculate workspace required for PCPOCON 00407 * 00408 LWORK = MAX( 1, 2*NP ) + 00409 $ MAX( 2, DESCA( NB_ )* 00410 $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), 00411 $ NQ + DESCA( NB_ )* 00412 $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) 00413 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD 00414 LRWORK = MAX( 1, 2*NQ ) 00415 LW2 = ICEIL( LRWORK*REALSZ, CPLXSZ ) + IPOSTPAD 00416 * 00417 IERR( 1 ) = 0 00418 IF( IPW2+LW2.GT.MEMSIZ ) THEN 00419 IF( IAM.EQ.0 ) 00420 $ WRITE( NOUT, FMT = 9996 )'cond est', 00421 $ ( IPW2+LW2 )*CPLXSZ 00422 IERR( 1 ) = 1 00423 END IF 00424 * 00425 * Check all processes for an error 00426 * 00427 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00428 $ -1, 0 ) 00429 * 00430 IF( IERR( 1 ).GT.0 ) THEN 00431 IF( IAM.EQ.0 ) 00432 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00433 KSKIP = KSKIP + 1 00434 GO TO 60 00435 END IF 00436 * 00437 IF( CHECK ) THEN 00438 CALL PCFILLPAD( ICTXT, LWORK, 1, 00439 $ MEM( IPW-IPREPAD ), LWORK, 00440 $ IPREPAD, IPOSTPAD, PADVAL ) 00441 CALL PCFILLPAD( ICTXT, LW2-IPOSTPAD, 1, 00442 $ MEM( IPW2-IPREPAD ), 00443 $ LW2-IPOSTPAD, IPREPAD, 00444 $ IPOSTPAD, PADVAL ) 00445 END IF 00446 * 00447 * Compute condition number of the matrix 00448 * 00449 CALL PCPOCON( UPLO, N, MEM( IPA ), 1, 1, DESCA, 00450 $ ANORM1, RCOND, MEM( IPW ), LWORK, 00451 $ MEM( IPW2 ), LRWORK, INFO ) 00452 * 00453 IF( CHECK ) THEN 00454 CALL PCCHEKPAD( ICTXT, 'PCPOCON', NP, NQ, 00455 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), 00456 $ IPREPAD, IPOSTPAD, PADVAL ) 00457 CALL PCCHEKPAD( ICTXT, 'PCPOCON', 00458 $ LWORK, 1, MEM( IPW-IPREPAD ), 00459 $ LWORK, IPREPAD, IPOSTPAD, 00460 $ PADVAL ) 00461 CALL PCCHEKPAD( ICTXT, 'PCPOCON', 00462 $ LW2-IPOSTPAD, 1, 00463 $ MEM( IPW2-IPREPAD ), LW2-IPOSTPAD, 00464 $ IPREPAD, IPOSTPAD, PADVAL ) 00465 END IF 00466 END IF 00467 * 00468 * Loop over the different values for NRHS 00469 * 00470 DO 20 HH = 1, NNR 00471 * 00472 NRHS = NRVAL( HH ) 00473 * 00474 DO 10 KK = 1, NNBR 00475 * 00476 NBRHS = NBRVAL( KK ) 00477 * 00478 * Initialize Array Descriptor for rhs 00479 * 00480 CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, 00481 $ ICTXT, MAX( 1, NP )+IMIDPAD, 00482 $ IERR( 1 ) ) 00483 * 00484 * move IPW to allow room for RHS 00485 * 00486 MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), MYCOL, 00487 $ DESCB( CSRC_ ), NPCOL ) 00488 IPB = IPW 00489 * 00490 IF( EST ) THEN 00491 IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + 00492 $ IPREPAD 00493 IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + IPOSTPAD 00494 $ + IPREPAD 00495 IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD 00496 IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD 00497 ELSE 00498 IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + 00499 $ IPREPAD 00500 END IF 00501 * 00502 IF( CHECK ) THEN 00503 * 00504 * Calculate the amount of workspace required by 00505 * the checking routines PCLASCHK 00506 * 00507 LCMQ = LCM / NPCOL 00508 WORKSIZ = MAX( WORKSIZ-IPOSTPAD, 00509 $ NQ * NBRHS + NP * NBRHS + 00510 $ MAX( MAX( NQ*NB, 2*NBRHS ), 00511 $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, 00512 $ 0,0,LCMQ ) ) ) 00513 WORKSIZ = IPOSTPAD + WORKSIZ 00514 ELSE 00515 WORKSIZ = IPOSTPAD 00516 END IF 00517 * 00518 IERR( 1 ) = 0 00519 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN 00520 IF( IAM.EQ.0 ) 00521 $ WRITE( NOUT, FMT = 9996 )'solve', 00522 $ ( IPW+WORKSIZ )*CPLXSZ 00523 IERR( 1 ) = 1 00524 END IF 00525 * 00526 * Check all processes for an error 00527 * 00528 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00529 $ -1, 0 ) 00530 * 00531 IF( IERR( 1 ).GT.0 ) THEN 00532 IF( IAM.EQ.0 ) 00533 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00534 KSKIP = KSKIP + 1 00535 GO TO 10 00536 END IF 00537 * 00538 * Generate RHS 00539 * 00540 CALL PCMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), 00541 $ DESCB( N_ ), DESCB( MB_ ), 00542 $ DESCB( NB_ ), MEM( IPB ), 00543 $ DESCB( LLD_ ), DESCB( RSRC_ ), 00544 $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, 00545 $ MYRHS, MYROW, MYCOL, NPROW, NPCOL ) 00546 * 00547 IF( CHECK ) 00548 $ CALL PCFILLPAD( ICTXT, NP, MYRHS, 00549 $ MEM( IPB-IPREPAD ), 00550 $ DESCB( LLD_ ), 00551 $ IPREPAD, IPOSTPAD, PADVAL ) 00552 * 00553 IF( EST ) THEN 00554 CALL PCMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), 00555 $ DESCB( N_ ), DESCB( MB_ ), 00556 $ DESCB( NB_ ), MEM( IPB0 ), 00557 $ DESCB( LLD_ ), DESCB( RSRC_ ), 00558 $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, 00559 $ MYRHS, MYROW, MYCOL, NPROW, 00560 $ NPCOL ) 00561 * 00562 IF( CHECK ) THEN 00563 CALL PCFILLPAD( ICTXT, NP, MYRHS, 00564 $ MEM( IPB0-IPREPAD ), 00565 $ DESCB( LLD_ ), IPREPAD, 00566 $ IPOSTPAD, PADVAL ) 00567 CALL PCFILLPAD( ICTXT, 1, MYRHS, 00568 $ MEM( IPFERR-IPREPAD ), 1, 00569 $ IPREPAD, IPOSTPAD, 00570 $ PADVAL ) 00571 CALL PCFILLPAD( ICTXT, 1, MYRHS, 00572 $ MEM( IPBERR-IPREPAD ), 1, 00573 $ IPREPAD, IPOSTPAD, 00574 $ PADVAL ) 00575 END IF 00576 END IF 00577 * 00578 CALL BLACS_BARRIER( ICTXT, 'All' ) 00579 CALL SLTIMER( 2 ) 00580 * 00581 * Solve linear system via Cholesky factorization 00582 * 00583 CALL PCPOTRS( UPLO, N, NRHS, MEM( IPA ), 1, 1, 00584 $ DESCA, MEM( IPB ), 1, 1, DESCB, 00585 $ INFO ) 00586 * 00587 CALL SLTIMER( 2 ) 00588 * 00589 IF( CHECK ) THEN 00590 * 00591 * check for memory overwrite 00592 * 00593 CALL PCCHEKPAD( ICTXT, 'PCPOTRS', NP, NQ, 00594 $ MEM( IPA-IPREPAD ), 00595 $ DESCA( LLD_ ), 00596 $ IPREPAD, IPOSTPAD, PADVAL ) 00597 CALL PCCHEKPAD( ICTXT, 'PCPOTRS', NP, 00598 $ MYRHS, MEM( IPB-IPREPAD ), 00599 $ DESCB( LLD_ ), IPREPAD, 00600 $ IPOSTPAD, PADVAL ) 00601 * 00602 CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, 00603 $ MEM( IPW-IPREPAD ), 00604 $ WORKSIZ-IPOSTPAD, IPREPAD, 00605 $ IPOSTPAD, PADVAL ) 00606 * 00607 * check the solution to rhs 00608 * 00609 CALL PCLASCHK( 'Herm', 'Diag', N, NRHS, 00610 $ MEM( IPB ), 1, 1, DESCB, 00611 $ IASEED, 1, 1, DESCA, IBSEED, 00612 $ ANORM, SRESID, MEM( IPW ) ) 00613 * 00614 IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) 00615 $ WRITE( NOUT, FMT = 9985 ) SRESID 00616 * 00617 * check for memory overwrite 00618 * 00619 CALL PCCHEKPAD( ICTXT, 'PCLASCHK', NP, 00620 $ MYRHS, MEM( IPB-IPREPAD ), 00621 $ DESCB( LLD_ ), IPREPAD, 00622 $ IPOSTPAD, PADVAL ) 00623 CALL PCCHEKPAD( ICTXT, 'PCLASCHK', 00624 $ WORKSIZ-IPOSTPAD, 1, 00625 $ MEM( IPW-IPREPAD ), 00626 $ WORKSIZ-IPOSTPAD, IPREPAD, 00627 $ IPOSTPAD, PADVAL ) 00628 * 00629 * The second test is a NaN trap 00630 * 00631 IF( ( SRESID.LE.THRESH ).AND. 00632 $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN 00633 KPASS = KPASS + 1 00634 PASSED = 'PASSED' 00635 ELSE 00636 KFAIL = KFAIL + 1 00637 PASSED = 'FAILED' 00638 END IF 00639 ELSE 00640 KPASS = KPASS + 1 00641 SRESID = SRESID - SRESID 00642 PASSED = 'BYPASS' 00643 END IF 00644 * 00645 IF( EST ) THEN 00646 * 00647 * Calculate workspace required for PCPORFS 00648 * 00649 LWORK = MAX( 1, 2*NP ) 00650 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD 00651 LRWORK = MAX( 1, NP ) 00652 LW2 = ICEIL( LRWORK*REALSZ, CPLXSZ ) + 00653 $ IPOSTPAD 00654 * 00655 IERR( 1 ) = 0 00656 IF( IPW2+LW2.GT.MEMSIZ ) THEN 00657 IF( IAM.EQ.0 ) 00658 $ WRITE( NOUT, FMT = 9996 ) 00659 $ 'iter ref', ( IPW2+LW2 )*CPLXSZ 00660 IERR( 1 ) = 1 00661 END IF 00662 * 00663 * Check all processes for an error 00664 * 00665 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 00666 $ 1, -1, 0 ) 00667 * 00668 IF( IERR( 1 ).GT.0 ) THEN 00669 IF( IAM.EQ.0 ) 00670 $ WRITE( NOUT, FMT = 9997 ) 00671 $ 'MEMORY' 00672 KSKIP = KSKIP + 1 00673 GO TO 10 00674 END IF 00675 * 00676 IF( CHECK ) THEN 00677 CALL PCFILLPAD( ICTXT, LWORK, 1, 00678 $ MEM( IPW-IPREPAD ), 00679 $ LWORK, IPREPAD, IPOSTPAD, 00680 $ PADVAL ) 00681 CALL PCFILLPAD( ICTXT, LW2-IPOSTPAD, 00682 $ 1, MEM( IPW2-IPREPAD ), 00683 $ LW2-IPOSTPAD, 00684 $ IPREPAD, IPOSTPAD, 00685 $ PADVAL ) 00686 END IF 00687 * 00688 * Use iterative refinement to improve the 00689 * computed solution 00690 * 00691 CALL PCPORFS( UPLO, N, NRHS, MEM( IPA0 ), 00692 $ 1, 1, DESCA, MEM( IPA ), 1, 1, 00693 $ DESCA, MEM( IPB0 ), 1, 1, 00694 $ DESCB, MEM( IPB ), 1, 1, DESCB, 00695 $ MEM( IPFERR ), MEM( IPBERR ), 00696 $ MEM( IPW ), LWORK, MEM( IPW2 ), 00697 $ LRWORK, INFO ) 00698 * 00699 * check for memory overwrite 00700 * 00701 IF( CHECK ) THEN 00702 CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, 00703 $ NQ, MEM( IPA0-IPREPAD ), 00704 $ DESCA( LLD_ ), IPREPAD, 00705 $ IPOSTPAD, PADVAL ) 00706 CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, 00707 $ NQ, MEM( IPA-IPREPAD ), 00708 $ DESCA( LLD_ ), IPREPAD, 00709 $ IPOSTPAD, PADVAL ) 00710 CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, 00711 $ MYRHS, MEM( IPB-IPREPAD ), 00712 $ DESCB( LLD_ ), IPREPAD, 00713 $ IPOSTPAD, PADVAL ) 00714 CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, 00715 $ MYRHS, 00716 $ MEM( IPB0-IPREPAD ), 00717 $ DESCB( LLD_ ), IPREPAD, 00718 $ IPOSTPAD, PADVAL ) 00719 CALL PCCHEKPAD( ICTXT, 'PCPORFS', 1, 00720 $ MYRHS, 00721 $ MEM( IPFERR-IPREPAD ), 1, 00722 $ IPREPAD, IPOSTPAD, 00723 $ PADVAL ) 00724 CALL PCCHEKPAD( ICTXT, 'PCPORFS', 1, 00725 $ MYRHS, 00726 $ MEM( IPBERR-IPREPAD ), 1, 00727 $ IPREPAD, IPOSTPAD, 00728 $ PADVAL ) 00729 CALL PCCHEKPAD( ICTXT, 'PCPORFS', LWORK, 00730 $ 1, MEM( IPW-IPREPAD ), 00731 $ LWORK, IPREPAD, IPOSTPAD, 00732 $ PADVAL ) 00733 CALL PCCHEKPAD( ICTXT, 'PCPORFS', 00734 $ LW2-IPOSTPAD, 1, 00735 $ MEM( IPW2-IPREPAD ), 00736 $ LW2-IPOSTPAD, 00737 $ IPREPAD, IPOSTPAD, 00738 $ PADVAL ) 00739 * 00740 CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 00741 $ 1, MEM( IPW-IPREPAD ), 00742 $ WORKSIZ-IPOSTPAD, IPREPAD, 00743 $ IPOSTPAD, PADVAL ) 00744 * 00745 * check the solution to rhs 00746 * 00747 CALL PCLASCHK( 'Herm', 'Diag', N, NRHS, 00748 $ MEM( IPB ), 1, 1, DESCB, 00749 $ IASEED, 1, 1, DESCA, 00750 $ IBSEED, ANORM, SRESID2, 00751 $ MEM( IPW ) ) 00752 * 00753 IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) 00754 $ WRITE( NOUT, FMT = 9985 ) SRESID2 00755 * 00756 * check for memory overwrite 00757 * 00758 CALL PCCHEKPAD( ICTXT, 'PCLASCHK', NP, 00759 $ MYRHS, MEM( IPB-IPREPAD ), 00760 $ DESCB( LLD_ ), IPREPAD, 00761 $ IPOSTPAD, PADVAL ) 00762 CALL PCCHEKPAD( ICTXT, 'PCLASCHK', 00763 $ WORKSIZ-IPOSTPAD, 1, 00764 $ MEM( IPW-IPREPAD ), 00765 $ WORKSIZ-IPOSTPAD, 00766 $ IPREPAD, IPOSTPAD, 00767 $ PADVAL ) 00768 END IF 00769 END IF 00770 * 00771 * Gather maximum of all CPU and WALL clock timings 00772 * 00773 CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, 00774 $ WTIME ) 00775 CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, 00776 $ CTIME ) 00777 * 00778 * Print results 00779 * 00780 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN 00781 * 00782 * 4/3 N^3 + 3 N^2 flops for LLt factorization 00783 * 00784 NOPS = 4.0D+0*(DBLE(N)**3)/3.0D+0 + 00785 $ 3.0D+0*(DBLE(N)**2) 00786 * 00787 * nrhs * 8 N^2 flops for LLt solve. 00788 * 00789 NOPS = NOPS + 8.0D+0*(DBLE(N)**2)*DBLE(NRHS) 00790 * 00791 * Calculate total megaflops -- factorization and 00792 * solve -- for WALL and CPU time, and print output 00793 * 00794 * Print WALL time if machine supports it 00795 * 00796 IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN 00797 TMFLOPS = NOPS / 00798 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 00799 ELSE 00800 TMFLOPS = 0.0D+0 00801 END IF 00802 * 00803 IF( WTIME( 2 ).GE.0.0D+0 ) 00804 $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, N, 00805 $ NB, NRHS, NBRHS, NPROW, NPCOL, 00806 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, 00807 $ PASSED 00808 * 00809 * Print CPU time if machine supports it 00810 * 00811 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 00812 TMFLOPS = NOPS / 00813 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 00814 ELSE 00815 TMFLOPS = 0.0D+0 00816 END IF 00817 * 00818 IF( CTIME( 2 ).GE.0.0D+0 ) 00819 $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, N, 00820 $ NB, NRHS, NBRHS, NPROW, NPCOL, 00821 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, 00822 $ PASSED 00823 * 00824 END IF 00825 10 CONTINUE 00826 20 CONTINUE 00827 * 00828 IF( CHECK .AND. SRESID.GT.THRESH ) THEN 00829 * 00830 * Compute FRESID = ||A - LL'|| / (||A|| * N * eps) 00831 * 00832 CALL PCPOTRRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, 00833 $ MEM( IPW ) ) 00834 CALL PCLAFCHK( 'Symm', 'Diag', N, N, MEM( IPA ), 1, 1, 00835 $ DESCA, IASEED, ANORM, FRESID, 00836 $ MEM( IPW ) ) 00837 * 00838 * Check for memory overwrite 00839 * 00840 CALL PCCHEKPAD( ICTXT, 'PCPOTRRV', NP, NQ, 00841 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), 00842 $ IPREPAD, IPOSTPAD, PADVAL ) 00843 CALL PCCHEKPAD( ICTXT, 'PCGETRRV', 00844 $ WORKSIZ-IPOSTPAD, 1, 00845 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, 00846 $ IPREPAD, IPOSTPAD, PADVAL ) 00847 * 00848 IF( IAM.EQ.0 ) THEN 00849 IF( LSAME( UPLO, 'L' ) ) THEN 00850 WRITE( NOUT, FMT = 9986 ) 'L*L''', FRESID 00851 ELSE 00852 WRITE( NOUT, FMT = 9986 ) 'U''*U', FRESID 00853 END IF 00854 END IF 00855 END IF 00856 * 00857 30 CONTINUE 00858 40 CONTINUE 00859 CALL BLACS_GRIDEXIT( ICTXT ) 00860 50 CONTINUE 00861 * 00862 * Print ending messages and close output file 00863 * 00864 60 CONTINUE 00865 IF( IAM.EQ.0 ) THEN 00866 KTESTS = KPASS + KFAIL + KSKIP 00867 WRITE( NOUT, FMT = * ) 00868 WRITE( NOUT, FMT = 9992 ) KTESTS 00869 IF( CHECK ) THEN 00870 WRITE( NOUT, FMT = 9991 ) KPASS 00871 WRITE( NOUT, FMT = 9989 ) KFAIL 00872 ELSE 00873 WRITE( NOUT, FMT = 9990 ) KPASS 00874 END IF 00875 WRITE( NOUT, FMT = 9988 ) KSKIP 00876 WRITE( NOUT, FMT = * ) 00877 WRITE( NOUT, FMT = * ) 00878 WRITE( NOUT, FMT = 9987 ) 00879 IF( NOUT.NE.6 .AND. NOUT.NE.0 ) 00880 $ CLOSE ( NOUT ) 00881 END IF 00882 * 00883 CALL BLACS_EXIT( 0 ) 00884 * 00885 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, 00886 $ '; It should be at least 1' ) 00887 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', 00888 $ I4 ) 00889 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 00890 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', 00891 $ I11 ) 00892 9995 FORMAT( 'TIME UPLO N NB NRHS NBRHS P Q LLt Time ', 00893 $ 'Slv Time MFLOPS CHECK' ) 00894 9994 FORMAT( '---- ---- ----- --- ---- ----- ---- ---- -------- ', 00895 $ '-------- -------- ------' ) 00896 9993 FORMAT( A4, 4X, A1, 1X, I5, 1X, I3, 1X, I4, 1X, I5, 1X, I4, 1X, 00897 $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 00898 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 00899 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 00900 9990 FORMAT( I5, ' tests completed without checking.' ) 00901 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 00902 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 00903 9987 FORMAT( 'END OF TESTS.' ) 00904 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 00905 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) 00906 * 00907 STOP 00908 * 00909 * End of PCLLTDRIVER 00910 * 00911 END