|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 PROGRAM PCDTDRIVER 00002 * 00003 * 00004 * -- ScaLAPACK routine (version 1.7) -- 00005 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00006 * and University of California, Berkeley. 00007 * November 15, 1997 00008 * 00009 * Purpose 00010 * ======= 00011 * 00012 * PCDTDRIVER is a test program for the 00013 * ScaLAPACK Band Cholesky routines corresponding to the options 00014 * indicated by CDT. This test driver performs an 00015 * A = L*U factorization 00016 * and solves a linear system with the factors for 1 or more RHS. 00017 * 00018 * The program must be driven by a short data file. 00019 * Here's an example file: 00020 *'ScaLAPACK, Version 1.2, banded linear systems input file' 00021 *'PVM.' 00022 *'' output file name (if any) 00023 *6 device out 00024 *'L' define Lower or Upper 00025 *9 number of problem sizes 00026 *1 5 17 28 37 121 200 1023 2048 3073 values of N 00027 *6 number of bandwidths 00028 *1 2 4 10 31 64 values of BW 00029 *1 number of NB's 00030 *-1 3 4 5 values of NB (-1 for automatic choice) 00031 *1 number of NRHS's (must be 1) 00032 *8 values of NRHS 00033 *1 number of NBRHS's (ignored) 00034 *1 values of NBRHS (ignored) 00035 *6 number of process grids 00036 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" 00037 *3.0 threshold 00038 * 00039 * Internal Parameters 00040 * =================== 00041 * 00042 * TOTMEM INTEGER, default = 6200000. 00043 * TOTMEM is a machine-specific parameter indicating the 00044 * maximum amount of available memory in bytes. 00045 * The user should customize TOTMEM to his platform. Remember 00046 * to leave room in memory for the operating system, the BLACS 00047 * buffer, etc. For example, on a system with 8 MB of memory 00048 * per process (e.g., one processor on an Intel iPSC/860), the 00049 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, 00050 * code, BLACS buffer, etc). However, for PVM, we usually set 00051 * TOTMEM = 2000000. Some experimenting with the maximum value 00052 * of TOTMEM may be required. 00053 * 00054 * INTGSZ INTEGER, default = 4 bytes. 00055 * CPLXSZ INTEGER, default = 8 bytes. 00056 * INTGSZ and CPLXSZ indicate the length in bytes on the 00057 * given platform for an integer and a single precision 00058 * complex. 00059 * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ ) 00060 * All arrays used by ScaLAPACK routines are allocated from 00061 * this array and referenced by pointers. The integer IPB, 00062 * for example, is a pointer to the starting element of MEM for 00063 * the solution vector(s) B. 00064 * 00065 * ===================================================================== 00066 * 00067 * Code Developer: Andrew J. Cleary, University of Tennessee. 00068 * Current address: Lawrence Livermore National Labs. 00069 * This version released: August, 2001. 00070 * 00071 * ===================================================================== 00072 * 00073 * .. Parameters .. 00074 INTEGER TOTMEM 00075 PARAMETER ( TOTMEM = 3000000 ) 00076 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, 00077 $ LLD_, MB_, M_, NB_, N_, RSRC_ 00078 PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, 00079 $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, 00080 $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) 00081 * 00082 REAL ZERO 00083 INTEGER CPLXSZ, MEMSIZ, NTESTS 00084 COMPLEX PADVAL 00085 PARAMETER ( CPLXSZ = 8, 00086 $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, 00087 $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), 00088 $ ZERO = 0.0E+0 ) 00089 INTEGER INT_ONE 00090 PARAMETER ( INT_ONE = 1 ) 00091 * .. 00092 * .. Local Scalars .. 00093 LOGICAL CHECK 00094 CHARACTER TRANS 00095 CHARACTER*6 PASSED 00096 CHARACTER*80 OUTFILE 00097 INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, 00098 $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, 00099 $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, 00100 $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, 00101 $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, 00102 $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, 00103 $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, 00104 $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, 00105 $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ 00106 REAL ANORM, SRESID, THRESH 00107 DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 00108 * .. 00109 * .. Local Arrays .. 00110 INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), 00111 $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), 00112 $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), 00113 $ NRVAL( NTESTS ), NVAL( NTESTS ), 00114 $ PVAL( NTESTS ), QVAL( NTESTS ) 00115 DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) 00116 COMPLEX MEM( MEMSIZ ) 00117 * .. 00118 * .. External Subroutines .. 00119 EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, 00120 $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, 00121 $ BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN, 00122 $ PCCHEKPAD, PCDTINFO, PCDTLASCHK, PCDTTRF, 00123 $ PCDTTRS, PCFILLPAD, PCMATGEN, SLBOOT, 00124 $ SLCOMBINE, SLTIMER 00125 * .. 00126 * .. External Functions .. 00127 INTEGER NUMROC 00128 LOGICAL LSAME 00129 REAL PCLANGE 00130 EXTERNAL LSAME, NUMROC, PCLANGE 00131 * .. 00132 * .. Intrinsic Functions .. 00133 INTRINSIC DBLE, MAX, MIN, MOD 00134 * .. 00135 * .. Data Statements .. 00136 DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / 00137 * .. 00138 * 00139 * 00140 * 00141 * .. Executable Statements .. 00142 * 00143 * Get starting information 00144 * 00145 CALL BLACS_PINFO( IAM, NPROCS ) 00146 IASEED = 100 00147 IBSEED = 200 00148 * 00149 CALL PCDTINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, 00150 $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, 00151 $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, 00152 $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) 00153 * 00154 CHECK = ( THRESH.GE.0.0E+0 ) 00155 * 00156 * Print headings 00157 * 00158 IF( IAM.EQ.0 ) THEN 00159 WRITE( NOUT, FMT = * ) 00160 WRITE( NOUT, FMT = 9995 ) 00161 WRITE( NOUT, FMT = 9994 ) 00162 WRITE( NOUT, FMT = * ) 00163 END IF 00164 * 00165 * Loop over different process grids 00166 * 00167 DO 60 I = 1, NGRIDS 00168 * 00169 NPROW = PVAL( I ) 00170 NPCOL = QVAL( I ) 00171 * 00172 * Make sure grid information is correct 00173 * 00174 IERR( 1 ) = 0 00175 IF( NPROW.LT.1 ) THEN 00176 IF( IAM.EQ.0 ) 00177 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW 00178 IERR( 1 ) = 1 00179 ELSE IF( NPCOL.LT.1 ) THEN 00180 IF( IAM.EQ.0 ) 00181 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL 00182 IERR( 1 ) = 1 00183 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN 00184 IF( IAM.EQ.0 ) 00185 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS 00186 IERR( 1 ) = 1 00187 END IF 00188 * 00189 IF( IERR( 1 ).GT.0 ) THEN 00190 IF( IAM.EQ.0 ) 00191 $ WRITE( NOUT, FMT = 9997 ) 'grid' 00192 KSKIP = KSKIP + 1 00193 GO TO 50 00194 END IF 00195 * 00196 * Define process grid 00197 * 00198 CALL BLACS_GET( -1, 0, ICTXT ) 00199 CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) 00200 * 00201 * 00202 * Define transpose process grid 00203 * 00204 CALL BLACS_GET( -1, 0, ICTXTB ) 00205 CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) 00206 * 00207 * Go to bottom of process grid loop if this case doesn't use my 00208 * process 00209 * 00210 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 00211 * 00212 IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN 00213 GO TO 50 00214 ENDIF 00215 * 00216 DO 40 J = 1, NMAT 00217 * 00218 IERR( 1 ) = 0 00219 * 00220 N = NVAL( J ) 00221 * 00222 * Make sure matrix information is correct 00223 * 00224 IF( N.LT.1 ) THEN 00225 IF( IAM.EQ.0 ) 00226 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 00227 IERR( 1 ) = 1 00228 END IF 00229 * 00230 * Check all processes for an error 00231 * 00232 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00233 $ -1, 0 ) 00234 * 00235 IF( IERR( 1 ).GT.0 ) THEN 00236 IF( IAM.EQ.0 ) 00237 $ WRITE( NOUT, FMT = 9997 ) 'size' 00238 KSKIP = KSKIP + 1 00239 GO TO 40 00240 END IF 00241 * 00242 * 00243 DO 45 BW_NUM = 1, NBW 00244 * 00245 IERR( 1 ) = 0 00246 * 00247 BWL = 1 00248 IF( BWL.LT.1 ) THEN 00249 IF( IAM.EQ.0 ) 00250 $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL 00251 IERR( 1 ) = 1 00252 END IF 00253 * 00254 BWU = 1 00255 IF( BWU.LT.1 ) THEN 00256 IF( IAM.EQ.0 ) 00257 $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU 00258 IERR( 1 ) = 1 00259 END IF 00260 * 00261 IF( BWL.GT.N-1 ) THEN 00262 IF( IAM.EQ.0 ) THEN 00263 IERR( 1 ) = 1 00264 ENDIF 00265 END IF 00266 * 00267 IF( BWU.GT.N-1 ) THEN 00268 IF( IAM.EQ.0 ) THEN 00269 IERR( 1 ) = 1 00270 ENDIF 00271 END IF 00272 * 00273 * Check all processes for an error 00274 * 00275 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00276 $ -1, 0 ) 00277 * 00278 IF( IERR( 1 ).GT.0 ) THEN 00279 KSKIP = KSKIP + 1 00280 GO TO 45 00281 END IF 00282 * 00283 DO 30 K = 1, NNB 00284 * 00285 IERR( 1 ) = 0 00286 * 00287 NB = NBVAL( K ) 00288 IF( NB.LT.0 ) THEN 00289 NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) 00290 $ + INT_ONE 00291 NB = MAX( NB, 2*INT_ONE ) 00292 NB = MIN( N, NB ) 00293 END IF 00294 * 00295 * Make sure NB is legal 00296 * 00297 IERR( 1 ) = 0 00298 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN 00299 IERR( 1 ) = 1 00300 END IF 00301 * 00302 * Check all processes for an error 00303 * 00304 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00305 $ -1, 0 ) 00306 * 00307 IF( IERR( 1 ).GT.0 ) THEN 00308 KSKIP = KSKIP + 1 00309 GO TO 30 00310 END IF 00311 * 00312 * Padding constants 00313 * 00314 NP = NUMROC( (3), (3), 00315 $ MYROW, 0, NPROW ) 00316 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) 00317 * 00318 IF( CHECK ) THEN 00319 IPREPAD = ((3)+10) 00320 IMIDPAD = 10 00321 IPOSTPAD = ((3)+10) 00322 ELSE 00323 IPREPAD = 0 00324 IMIDPAD = 0 00325 IPOSTPAD = 0 00326 END IF 00327 * 00328 * Initialize the array descriptor for the matrix A 00329 * 00330 CALL DESCINIT( DESCA2D, N, (3), 00331 $ NB, 1, 0, 0, 00332 $ ICTXTB, NB+10, IERR( 1 ) ) 00333 * 00334 * Convert this to 1D descriptor 00335 * 00336 DESCA( 1 ) = 501 00337 DESCA( 3 ) = N 00338 DESCA( 4 ) = NB 00339 DESCA( 5 ) = 0 00340 DESCA( 2 ) = ICTXT 00341 DESCA( 6 ) = ((3)+10) 00342 DESCA( 7 ) = 0 00343 * 00344 IERR_TEMP = IERR( 1 ) 00345 IERR( 1 ) = 0 00346 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) 00347 * 00348 * Check all processes for an error 00349 * 00350 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 00351 * 00352 IF( IERR( 1 ).LT.0 ) THEN 00353 IF( IAM.EQ.0 ) 00354 $ WRITE( NOUT, FMT = 9997 ) 'descriptor' 00355 KSKIP = KSKIP + 1 00356 GO TO 30 00357 END IF 00358 * 00359 * Assign pointers into MEM for SCALAPACK arrays, A is 00360 * allocated starting at position MEM( IPREPAD+1 ) 00361 * 00362 FREE_PTR = 1 00363 IPB = 0 00364 * 00365 * Save room for prepadding 00366 FREE_PTR = FREE_PTR + IPREPAD 00367 * 00368 IPA = FREE_PTR 00369 FREE_PTR = FREE_PTR + (NB+10)*(3) 00370 $ + IPOSTPAD 00371 * 00372 * Add memory for fillin 00373 * Fillin space needs to store: 00374 * Fillin spike: 00375 * Contribution to previous proc's diagonal block of 00376 * reduced system: 00377 * Off-diagonal block of reduced system: 00378 * Diagonal block of reduced system: 00379 * 00380 FILLIN_SIZE = 00381 $ (12*NPCOL+3*NB) 00382 * 00383 * Claim memory for fillin 00384 * 00385 FREE_PTR = FREE_PTR + IPREPAD 00386 IP_FILLIN = FREE_PTR 00387 FREE_PTR = FREE_PTR + FILLIN_SIZE 00388 * 00389 * Workspace needed by computational routines: 00390 * 00391 IPW_SIZE = 0 00392 * 00393 * factorization: 00394 * 00395 IPW_SIZE = 8*NPCOL 00396 * 00397 * Claim memory for IPW 00398 * 00399 IPW = FREE_PTR 00400 FREE_PTR = FREE_PTR + IPW_SIZE 00401 * 00402 * Check for adequate memory for problem size 00403 * 00404 IERR( 1 ) = 0 00405 IF( FREE_PTR.GT.MEMSIZ ) THEN 00406 IF( IAM.EQ.0 ) 00407 $ WRITE( NOUT, FMT = 9996 ) 00408 $ 'divide and conquer factorization', 00409 $ (FREE_PTR )*CPLXSZ 00410 IERR( 1 ) = 1 00411 END IF 00412 * 00413 * Check all processes for an error 00414 * 00415 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 00416 $ 1, -1, 0 ) 00417 * 00418 IF( IERR( 1 ).GT.0 ) THEN 00419 IF( IAM.EQ.0 ) 00420 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00421 KSKIP = KSKIP + 1 00422 GO TO 30 00423 END IF 00424 * 00425 * Worksize needed for LAPRNT 00426 WORKSIZ = MAX( ((3)+10), NB ) 00427 * 00428 IF( CHECK ) THEN 00429 * 00430 * Calculate the amount of workspace required by 00431 * the checking routines. 00432 * 00433 * PCLANGE 00434 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) 00435 * 00436 * PCDTLASCHK 00437 WORKSIZ = MAX( WORKSIZ, 00438 $ MAX(5,NB)+2*NB ) 00439 END IF 00440 * 00441 FREE_PTR = FREE_PTR + IPREPAD 00442 IP_DRIVER_W = FREE_PTR 00443 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD 00444 * 00445 * 00446 * Check for adequate memory for problem size 00447 * 00448 IERR( 1 ) = 0 00449 IF( FREE_PTR.GT.MEMSIZ ) THEN 00450 IF( IAM.EQ.0 ) 00451 $ WRITE( NOUT, FMT = 9996 ) 'factorization', 00452 $ ( FREE_PTR )*CPLXSZ 00453 IERR( 1 ) = 1 00454 END IF 00455 * 00456 * Check all processes for an error 00457 * 00458 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 00459 $ 1, -1, 0 ) 00460 * 00461 IF( IERR( 1 ).GT.0 ) THEN 00462 IF( IAM.EQ.0 ) 00463 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00464 KSKIP = KSKIP + 1 00465 GO TO 30 00466 END IF 00467 * 00468 CALL PCBMATGEN( ICTXT, 'T', 'D', BWL, BWU, N, (3), NB, 00469 $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, 00470 $ MYCOL, NPROW, NPCOL ) 00471 CALL PCFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), 00472 $ NB+10, IPREPAD, IPOSTPAD, 00473 $ PADVAL ) 00474 * 00475 CALL PCFILLPAD( ICTXT, WORKSIZ, 1, 00476 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 00477 $ IPREPAD, IPOSTPAD, PADVAL ) 00478 * 00479 * Calculate norm of A for residual error-checking 00480 * 00481 IF( CHECK ) THEN 00482 * 00483 ANORM = PCLANGE( 'I', N, 00484 $ (3), MEM( IPA ), 1, 1, 00485 $ DESCA2D, MEM( IP_DRIVER_W ) ) 00486 CALL PCCHEKPAD( ICTXT, 'PCLANGE', NQ, NP, 00487 $ MEM( IPA-IPREPAD ), NB+10, 00488 $ IPREPAD, IPOSTPAD, PADVAL ) 00489 CALL PCCHEKPAD( ICTXT, 'PCLANGE', 00490 $ WORKSIZ, 1, 00491 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 00492 $ IPREPAD, IPOSTPAD, PADVAL ) 00493 END IF 00494 * 00495 * 00496 CALL SLBOOT() 00497 CALL BLACS_BARRIER( ICTXT, 'All' ) 00498 * 00499 * Perform factorization 00500 * 00501 CALL SLTIMER( 1 ) 00502 * 00503 CALL PCDTTRF( N, MEM( IPA+2*( NB+10 ) ), 00504 $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 1, 00505 $ DESCA, MEM( IP_FILLIN ), FILLIN_SIZE, 00506 $ MEM( IPW ), IPW_SIZE, INFO ) 00507 * 00508 CALL SLTIMER( 1 ) 00509 * 00510 IF( INFO.NE.0 ) THEN 00511 IF( IAM.EQ.0 ) THEN 00512 WRITE( NOUT, FMT = * ) 'PCDTTRF INFO=', INFO 00513 ENDIF 00514 KFAIL = KFAIL + 1 00515 GO TO 30 00516 END IF 00517 * 00518 IF( CHECK ) THEN 00519 * 00520 * Check for memory overwrite in factorization 00521 * 00522 CALL PCCHEKPAD( ICTXT, 'PCDTTRF', NQ, 00523 $ NP, MEM( IPA-IPREPAD ), NB+10, 00524 $ IPREPAD, IPOSTPAD, PADVAL ) 00525 END IF 00526 * 00527 * 00528 * Loop over the different values for NRHS 00529 * 00530 DO 20 HH = 1, NNR 00531 * 00532 IERR( 1 ) = 0 00533 * 00534 NRHS = NRVAL( HH ) 00535 * 00536 * Initialize Array Descriptor for rhs 00537 * 00538 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, 00539 $ ICTXTB, NB+10, IERR( 1 ) ) 00540 * 00541 * Convert this to 1D descriptor 00542 * 00543 DESCB( 1 ) = 502 00544 DESCB( 3 ) = N 00545 DESCB( 4 ) = NB 00546 DESCB( 5 ) = 0 00547 DESCB( 2 ) = ICTXT 00548 DESCB( 6 ) = DESCB2D( LLD_ ) 00549 DESCB( 7 ) = 0 00550 * 00551 * reset free_ptr to reuse space for right hand sides 00552 * 00553 IF( IPB .GT. 0 ) THEN 00554 FREE_PTR = IPB 00555 ENDIF 00556 * 00557 FREE_PTR = FREE_PTR + IPREPAD 00558 IPB = FREE_PTR 00559 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) 00560 $ + IPOSTPAD 00561 * 00562 * Allocate workspace for workspace in TRS routine: 00563 * 00564 IPW_SOLVE_SIZE = 10*NPCOL+4*NRHS 00565 * 00566 IPW_SOLVE = FREE_PTR 00567 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE 00568 * 00569 IERR( 1 ) = 0 00570 IF( FREE_PTR.GT.MEMSIZ ) THEN 00571 IF( IAM.EQ.0 ) 00572 $ WRITE( NOUT, FMT = 9996 )'solve', 00573 $ ( FREE_PTR )*CPLXSZ 00574 IERR( 1 ) = 1 00575 END IF 00576 * 00577 * Check all processes for an error 00578 * 00579 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, 00580 $ IERR, 1, -1, 0 ) 00581 * 00582 IF( IERR( 1 ).GT.0 ) THEN 00583 IF( IAM.EQ.0 ) 00584 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00585 KSKIP = KSKIP + 1 00586 GO TO 15 00587 END IF 00588 * 00589 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) 00590 * 00591 * Generate RHS 00592 * 00593 CALL PCMATGEN(ICTXTB, 'No', 'No', 00594 $ DESCB2D( M_ ), DESCB2D( N_ ), 00595 $ DESCB2D( MB_ ), DESCB2D( NB_ ), 00596 $ MEM( IPB ), 00597 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), 00598 $ DESCB2D( CSRC_ ), 00599 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, 00600 $ MYROW, NPCOL, NPROW ) 00601 * 00602 IF( CHECK ) THEN 00603 CALL PCFILLPAD( ICTXTB, NB, NRHS, 00604 $ MEM( IPB-IPREPAD ), 00605 $ DESCB2D( LLD_ ), 00606 $ IPREPAD, IPOSTPAD, 00607 $ PADVAL ) 00608 CALL PCFILLPAD( ICTXT, WORKSIZ, 1, 00609 $ MEM( IP_DRIVER_W-IPREPAD ), 00610 $ WORKSIZ, IPREPAD, 00611 $ IPOSTPAD, PADVAL ) 00612 END IF 00613 * 00614 * 00615 CALL BLACS_BARRIER( ICTXT, 'All') 00616 CALL SLTIMER( 2 ) 00617 * 00618 * Solve linear system via factorization 00619 * 00620 CALL PCDTTRS( TRANS, N, NRHS, 00621 $ MEM( IPA+2*( NB+10 ) ), 00622 $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 00623 $ 1, DESCA, MEM( IPB ), 1, DESCB, 00624 $ MEM( IP_FILLIN ), FILLIN_SIZE, 00625 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, 00626 $ INFO ) 00627 * 00628 CALL SLTIMER( 2 ) 00629 * 00630 IF( INFO.NE.0 ) THEN 00631 IF( IAM.EQ.0 ) 00632 $ WRITE( NOUT, FMT = * ) 'PCDTTRS INFO=', INFO 00633 KFAIL = KFAIL + 1 00634 PASSED = 'FAILED' 00635 GO TO 20 00636 END IF 00637 * 00638 IF( CHECK ) THEN 00639 * 00640 * check for memory overwrite 00641 * 00642 CALL PCCHEKPAD( ICTXT, 'PCDTTRS-work', 00643 $ WORKSIZ, 1, 00644 $ MEM( IP_DRIVER_W-IPREPAD ), 00645 $ WORKSIZ, IPREPAD, 00646 $ IPOSTPAD, PADVAL ) 00647 * 00648 * check the solution to rhs 00649 * 00650 SRESID = ZERO 00651 * 00652 * Reset descriptor describing A to 1-by-P grid for 00653 * use in banded utility routines 00654 * 00655 CALL DESCINIT( DESCA2D, (3), N, 00656 $ (3), NB, 0, 0, 00657 $ ICTXT, (3), IERR( 1 ) ) 00658 CALL PCDTLASCHK( 'N', 'D', TRANS, 00659 $ N, BWL, BWU, NRHS, 00660 $ MEM( IPB ), 1, 1, DESCB2D, 00661 $ IASEED, MEM( IPA ), 1, 1, DESCA2D, 00662 $ IBSEED, ANORM, SRESID, 00663 $ MEM( IP_DRIVER_W ), WORKSIZ ) 00664 * 00665 IF( IAM.EQ.0 ) THEN 00666 IF( SRESID.GT.THRESH ) 00667 $ WRITE( NOUT, FMT = 9985 ) SRESID 00668 END IF 00669 * 00670 * The second test is a NaN trap 00671 * 00672 IF( ( SRESID.LE.THRESH ).AND. 00673 $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN 00674 KPASS = KPASS + 1 00675 PASSED = 'PASSED' 00676 ELSE 00677 KFAIL = KFAIL + 1 00678 PASSED = 'FAILED' 00679 END IF 00680 * 00681 END IF 00682 * 00683 15 CONTINUE 00684 * Skipped tests jump to here to print out "SKIPPED" 00685 * 00686 * Gather maximum of all CPU and WALL clock timings 00687 * 00688 CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, 00689 $ WTIME ) 00690 CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, 00691 $ CTIME ) 00692 * 00693 * Print results 00694 * 00695 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN 00696 * 00697 NOPS = 0 00698 NOPS2 = 0 00699 * 00700 N_FIRST = NB 00701 NPROCS_REAL = ( N-1 )/NB + 1 00702 N_LAST = MOD( N-1, NB ) + 1 00703 * 00704 * 2 N bwl INT_ONE + N (bwl) flops 00705 * for LU factorization 00706 * 00707 NOPS = 2*(DBLE(N)*DBLE(BWL)* 00708 $ DBLE(INT_ONE)) + 00709 $ (DBLE(N)*DBLE(BWL)) 00710 * 00711 * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve. 00712 * 00713 NOPS = NOPS + 00714 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(INT_ONE)) 00715 $ *DBLE(NRHS)) 00716 * 00717 * Multiply by 4 to get complex count 00718 * 00719 NOPS = NOPS * DBLE(4) 00720 * 00721 * Second calc to represent actual hardware speed 00722 * 00723 * 2*N_FIRST bwl*bwu Flops for LU 00724 * factorization in proc 1 00725 * 00726 NOPS2 = 2*( (DBLE(N_FIRST)* 00727 $ DBLE(BWL)*DBLE(BWU))) 00728 * 00729 IF ( NPROCS_REAL .GT. 1) THEN 00730 * 8 N_LAST bwl*INT_ONE 00731 * flops for LU and spike 00732 * calc in last processor 00733 * 00734 NOPS2 = NOPS2 + 00735 $ 8*( (DBLE(N_LAST)*DBLE(BWL) 00736 $ *DBLE(INT_ONE)) ) 00737 ENDIF 00738 * 00739 IF ( NPROCS_REAL .GT. 2) THEN 00740 * 8 NB bwl*INT_ONE flops for LU and spike 00741 * calc in other processors 00742 * 00743 NOPS2 = NOPS2 + (NPROCS_REAL-2)* 00744 $ 8*( (DBLE(NB)*DBLE(BWL) 00745 $ *DBLE(INT_ONE)) ) 00746 ENDIF 00747 * 00748 * Reduced system 00749 * 00750 NOPS2 = NOPS2 + 00751 $ 2*( NPROCS_REAL-1 ) * 00752 $ ( BWL*INT_ONE*BWL/3 ) 00753 IF( NPROCS_REAL .GT. 1 ) THEN 00754 NOPS2 = NOPS2 + 00755 $ 2*( NPROCS_REAL-2 ) * 00756 $ (2*BWL*INT_ONE*BWL) 00757 ENDIF 00758 * 00759 * Solve stage 00760 * 00761 * nrhs*2 n_first* 00762 * (bwl+INT_ONE) 00763 * flops for L,U solve in proc 1. 00764 * 00765 NOPS2 = NOPS2 + 00766 $ 2* 00767 $ DBLE(N_FIRST)* 00768 $ DBLE(NRHS) * 00769 $ ( DBLE(BWL)+DBLE(INT_ONE)) 00770 * 00771 IF ( NPROCS_REAL .GT. 1 ) THEN 00772 * 00773 * 2*nrhs*2 n_last 00774 * (bwl+INT_ONE) 00775 * flops for LU solve in other procs 00776 * 00777 NOPS2 = NOPS2 + 00778 $ 4* 00779 $ (DBLE(N_LAST)*(DBLE(BWL)+ 00780 $ DBLE(INT_ONE)))*DBLE(NRHS) 00781 ENDIF 00782 * 00783 IF ( NPROCS_REAL .GT. 2 ) THEN 00784 * 00785 * 2*nrhs*2 NB 00786 * (bwl+INT_ONE) 00787 * flops for LU solve in other procs 00788 * 00789 NOPS2 = NOPS2 + 00790 $ ( NPROCS_REAL-2)*2* 00791 $ ( (DBLE(NB)*(DBLE(BWL)+ 00792 $ DBLE(INT_ONE)))*DBLE(NRHS) ) 00793 ENDIF 00794 * 00795 * Reduced system 00796 * 00797 NOPS2 = NOPS2 + 00798 $ NRHS*( NPROCS_REAL-1)*2*(BWL*INT_ONE ) 00799 IF( NPROCS_REAL .GT. 1 ) THEN 00800 NOPS2 = NOPS2 + 00801 $ NRHS*( NPROCS_REAL-2 ) * 00802 $ ( 6 * BWL*INT_ONE ) 00803 ENDIF 00804 * 00805 * 00806 * Multiply by 4 to get complex count 00807 * 00808 NOPS2 = NOPS2 * DBLE(4) 00809 * 00810 * Calculate total megaflops - factorization and/or 00811 * solve -- for WALL and CPU time, and print output 00812 * 00813 * Print WALL time if machine supports it 00814 * 00815 IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN 00816 TMFLOPS = NOPS / 00817 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 00818 ELSE 00819 TMFLOPS = 0.0D+0 00820 END IF 00821 * 00822 IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN 00823 TMFLOPS2 = NOPS2 / 00824 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 00825 ELSE 00826 TMFLOPS2 = 0.0D+0 00827 END IF 00828 * 00829 IF( WTIME( 2 ).GE.0.0D+0 ) 00830 $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, 00831 $ N, 00832 $ BWL, BWU, 00833 $ NB, NRHS, NPROW, NPCOL, 00834 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, 00835 $ TMFLOPS2, PASSED 00836 * 00837 * Print CPU time if machine supports it 00838 * 00839 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 00840 TMFLOPS = NOPS / 00841 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 00842 ELSE 00843 TMFLOPS = 0.0D+0 00844 END IF 00845 * 00846 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 00847 TMFLOPS2 = NOPS2 / 00848 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 00849 ELSE 00850 TMFLOPS2 = 0.0D+0 00851 END IF 00852 * 00853 IF( CTIME( 2 ).GE.0.0D+0 ) 00854 $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, 00855 $ N, 00856 $ BWL, BWU, 00857 $ NB, NRHS, NPROW, NPCOL, 00858 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, 00859 $ TMFLOPS2, PASSED 00860 * 00861 END IF 00862 20 CONTINUE 00863 * 00864 * 00865 30 CONTINUE 00866 * NNB loop 00867 * 00868 45 CONTINUE 00869 * BW[] loop 00870 * 00871 40 CONTINUE 00872 * NMAT loop 00873 * 00874 CALL BLACS_GRIDEXIT( ICTXT ) 00875 CALL BLACS_GRIDEXIT( ICTXTB ) 00876 * 00877 50 CONTINUE 00878 * NGRIDS DROPOUT 00879 60 CONTINUE 00880 * NGRIDS loop 00881 * 00882 * Print ending messages and close output file 00883 * 00884 IF( IAM.EQ.0 ) THEN 00885 KTESTS = KPASS + KFAIL + KSKIP 00886 WRITE( NOUT, FMT = * ) 00887 WRITE( NOUT, FMT = 9992 ) KTESTS 00888 IF( CHECK ) THEN 00889 WRITE( NOUT, FMT = 9991 ) KPASS 00890 WRITE( NOUT, FMT = 9989 ) KFAIL 00891 ELSE 00892 WRITE( NOUT, FMT = 9990 ) KPASS 00893 END IF 00894 WRITE( NOUT, FMT = 9988 ) KSKIP 00895 WRITE( NOUT, FMT = * ) 00896 WRITE( NOUT, FMT = * ) 00897 WRITE( NOUT, FMT = 9987 ) 00898 IF( NOUT.NE.6 .AND. NOUT.NE.0 ) 00899 $ CLOSE ( NOUT ) 00900 END IF 00901 * 00902 CALL BLACS_EXIT( 0 ) 00903 * 00904 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, 00905 $ '; It should be at least 1' ) 00906 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', 00907 $ I4 ) 00908 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 00909 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', 00910 $ I11 ) 00911 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', 00912 $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 00913 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', 00914 $ '-------- -------- -------- ------' ) 00915 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, 00916 $ 1X,I4,1X,I4,1X,F9.3, 00917 $ F9.4, F9.2, F9.2, 1X, A6 ) 00918 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 00919 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 00920 9990 FORMAT( I5, ' tests completed without checking.' ) 00921 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 00922 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 00923 9987 FORMAT( 'END OF TESTS.' ) 00924 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 00925 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) 00926 * 00927 STOP 00928 * 00929 * End of PCDTTRS_DRIVER 00930 * 00931 END 00932 *