|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 PROGRAM PDGBDRIVER 00002 * 00003 * 00004 * -- ScaLAPACK routine (version 1.7) -- 00005 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00006 * and University of California, Berkeley. 00007 * November 15, 1997 00008 * 00009 * Purpose 00010 * ======= 00011 * 00012 * PDGBDRIVER is a test program for the 00013 * ScaLAPACK Band Cholesky routines corresponding to the options 00014 * indicated by DGB. This test driver performs an 00015 * A = L*U factorization 00016 * and solves a linear system with the factors for 1 or more RHS. 00017 * 00018 * The program must be driven by a short data file. 00019 * Here's an example file: 00020 *'ScaLAPACK, Version 1.2, banded linear systems input file' 00021 *'PVM.' 00022 *'' output file name (if any) 00023 *6 device out 00024 *'L' define Lower or Upper 00025 *9 number of problem sizes 00026 *1 5 17 28 37 121 200 1023 2048 3073 values of N 00027 *6 number of bandwidths 00028 *1 2 4 10 31 64 values of BW 00029 *1 number of NB's 00030 *-1 3 4 5 values of NB (-1 for automatic choice) 00031 *1 number of NRHS's (must be 1) 00032 *8 values of NRHS 00033 *1 number of NBRHS's (ignored) 00034 *1 values of NBRHS (ignored) 00035 *6 number of process grids 00036 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" 00037 *3.0 threshold 00038 * 00039 * Internal Parameters 00040 * =================== 00041 * 00042 * TOTMEM INTEGER, default = 6200000. 00043 * TOTMEM is a machine-specific parameter indicating the 00044 * maximum amount of available memory in bytes. 00045 * The user should customize TOTMEM to his platform. Remember 00046 * to leave room in memory for the operating system, the BLACS 00047 * buffer, etc. For example, on a system with 8 MB of memory 00048 * per process (e.g., one processor on an Intel iPSC/860), the 00049 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, 00050 * code, BLACS buffer, etc). However, for PVM, we usually set 00051 * TOTMEM = 2000000. Some experimenting with the maximum value 00052 * of TOTMEM may be required. 00053 * 00054 * INTMEM INTEGER, default = 2048. 00055 * INTMEM is the size of the integer workspace used in this 00056 * driver as input as the IPIV vector. It represents an 00057 * upper bound on NB, the blocksize of the data 00058 * distribution. 00059 * 00060 * INTGSZ INTEGER, default = 4 bytes. 00061 * DBLESZ INTEGER, default = 8 bytes. 00062 * INTGSZ and DBLESZ indicate the length in bytes on the 00063 * given platform for an integer and a double precision real. 00064 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) 00065 * All arrays used by ScaLAPACK routines are allocated from 00066 * this array and referenced by pointers. The integer IPB, 00067 * for example, is a pointer to the starting element of MEM for 00068 * the solution vector(s) B. 00069 * 00070 * ===================================================================== 00071 * 00072 * Code Developer: Andrew J. Cleary, University of Tennessee. 00073 * Current address: Lawrence Livermore National Labs. 00074 * This version released: August, 2001. 00075 * 00076 * ===================================================================== 00077 * 00078 * .. Parameters .. 00079 INTEGER TOTMEM 00080 PARAMETER ( TOTMEM = 3000000 ) 00081 INTEGER INTMEM 00082 PARAMETER ( INTMEM = 2048 ) 00083 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, 00084 $ LLD_, MB_, M_, NB_, N_, RSRC_ 00085 PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, 00086 $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, 00087 $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) 00088 * 00089 DOUBLE PRECISION ZERO 00090 INTEGER DBLESZ, MEMSIZ, NTESTS 00091 DOUBLE PRECISION PADVAL 00092 PARAMETER ( DBLESZ = 8, 00093 $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, 00094 $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) 00095 INTEGER INT_ONE 00096 PARAMETER ( INT_ONE = 1 ) 00097 * .. 00098 * .. Local Scalars .. 00099 LOGICAL CHECK 00100 CHARACTER TRANS 00101 CHARACTER*6 PASSED 00102 CHARACTER*80 OUTFILE 00103 INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, 00104 $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, 00105 $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, 00106 $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, 00107 $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, 00108 $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, 00109 $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, 00110 $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, 00111 $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ 00112 REAL THRESH 00113 DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, 00114 $ TMFLOPS2 00115 * .. 00116 * .. Local Arrays .. 00117 INTEGER IPIV(INTMEM) 00118 INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), 00119 $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), 00120 $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), 00121 $ NRVAL( NTESTS ), NVAL( NTESTS ), 00122 $ PVAL( NTESTS ), QVAL( NTESTS ) 00123 DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) 00124 * .. 00125 * .. External Subroutines .. 00126 EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, 00127 $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, 00128 $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, 00129 $ PDCHEKPAD, PDDBLASCHK, PDFILLPAD, PDGBINFO, 00130 $ PDGBTRF, PDGBTRS, PDMATGEN, SLBOOT, SLCOMBINE, 00131 $ SLTIMER 00132 * .. 00133 * .. External Functions .. 00134 INTEGER NUMROC 00135 LOGICAL LSAME 00136 DOUBLE PRECISION PDLANGE 00137 EXTERNAL LSAME, NUMROC, PDLANGE 00138 * .. 00139 * .. Intrinsic Functions .. 00140 INTRINSIC DBLE, MAX, MIN, MOD 00141 * .. 00142 * .. Data Statements .. 00143 DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / 00144 * .. 00145 * 00146 * 00147 * 00148 * .. Executable Statements .. 00149 * 00150 * Get starting information 00151 * 00152 CALL BLACS_PINFO( IAM, NPROCS ) 00153 IASEED = 100 00154 IBSEED = 200 00155 * 00156 CALL PDGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, 00157 $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, 00158 $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, 00159 $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) 00160 * 00161 CHECK = ( THRESH.GE.0.0D+0 ) 00162 * 00163 * Print headings 00164 * 00165 IF( IAM.EQ.0 ) THEN 00166 WRITE( NOUT, FMT = * ) 00167 WRITE( NOUT, FMT = 9995 ) 00168 WRITE( NOUT, FMT = 9994 ) 00169 WRITE( NOUT, FMT = * ) 00170 END IF 00171 * 00172 * Loop over different process grids 00173 * 00174 DO 60 I = 1, NGRIDS 00175 * 00176 NPROW = PVAL( I ) 00177 NPCOL = QVAL( I ) 00178 * 00179 * Make sure grid information is correct 00180 * 00181 IERR( 1 ) = 0 00182 IF( NPROW.LT.1 ) THEN 00183 IF( IAM.EQ.0 ) 00184 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW 00185 IERR( 1 ) = 1 00186 ELSE IF( NPCOL.LT.1 ) THEN 00187 IF( IAM.EQ.0 ) 00188 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL 00189 IERR( 1 ) = 1 00190 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN 00191 IF( IAM.EQ.0 ) 00192 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS 00193 IERR( 1 ) = 1 00194 END IF 00195 * 00196 IF( IERR( 1 ).GT.0 ) THEN 00197 IF( IAM.EQ.0 ) 00198 $ WRITE( NOUT, FMT = 9997 ) 'grid' 00199 KSKIP = KSKIP + 1 00200 GO TO 50 00201 END IF 00202 * 00203 * Define process grid 00204 * 00205 CALL BLACS_GET( -1, 0, ICTXT ) 00206 CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) 00207 * 00208 * 00209 * Define transpose process grid 00210 * 00211 CALL BLACS_GET( -1, 0, ICTXTB ) 00212 CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) 00213 * 00214 * Go to bottom of process grid loop if this case doesn't use my 00215 * process 00216 * 00217 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 00218 * 00219 IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN 00220 GO TO 50 00221 ENDIF 00222 * 00223 DO 40 J = 1, NMAT 00224 * 00225 IERR( 1 ) = 0 00226 * 00227 N = NVAL( J ) 00228 * 00229 * Make sure matrix information is correct 00230 * 00231 IF( N.LT.1 ) THEN 00232 IF( IAM.EQ.0 ) 00233 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 00234 IERR( 1 ) = 1 00235 END IF 00236 * 00237 * Check all processes for an error 00238 * 00239 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00240 $ -1, 0 ) 00241 * 00242 IF( IERR( 1 ).GT.0 ) THEN 00243 IF( IAM.EQ.0 ) 00244 $ WRITE( NOUT, FMT = 9997 ) 'size' 00245 KSKIP = KSKIP + 1 00246 GO TO 40 00247 END IF 00248 * 00249 * 00250 DO 45 BW_NUM = 1, NBW 00251 * 00252 IERR( 1 ) = 0 00253 * 00254 BWL = BWLVAL( BW_NUM ) 00255 IF( BWL.LT.1 ) THEN 00256 IF( IAM.EQ.0 ) 00257 $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL 00258 IERR( 1 ) = 1 00259 END IF 00260 * 00261 BWU = BWUVAL( BW_NUM ) 00262 IF( BWU.LT.1 ) THEN 00263 IF( IAM.EQ.0 ) 00264 $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU 00265 IERR( 1 ) = 1 00266 END IF 00267 * 00268 IF( BWL.GT.N-1 ) THEN 00269 IF( IAM.EQ.0 ) THEN 00270 IERR( 1 ) = 1 00271 ENDIF 00272 END IF 00273 * 00274 IF( BWU.GT.N-1 ) THEN 00275 IF( IAM.EQ.0 ) THEN 00276 IERR( 1 ) = 1 00277 ENDIF 00278 END IF 00279 * 00280 * Check all processes for an error 00281 * 00282 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00283 $ -1, 0 ) 00284 * 00285 IF( IERR( 1 ).GT.0 ) THEN 00286 KSKIP = KSKIP + 1 00287 GO TO 45 00288 END IF 00289 * 00290 DO 30 K = 1, NNB 00291 * 00292 IERR( 1 ) = 0 00293 * 00294 NB = NBVAL( K ) 00295 IF( NB.LT.0 ) THEN 00296 NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 ) 00297 $ + (BWL+BWU) 00298 NB = MAX( NB, 2*(BWL+BWU) ) 00299 NB = MIN( N, NB ) 00300 END IF 00301 * 00302 * Make sure NB is legal 00303 * 00304 IERR( 1 ) = 0 00305 * 00306 IF( NB.GT.INTMEM ) THEN 00307 IERR( 1 ) = 1 00308 IF( IAM.EQ.0 ) THEN 00309 WRITE( NOUT,* )'You have chosen an ' 00310 $ ,'NB > INTMEM in the driver.' 00311 WRITE(NOUT, *)'Please edit the driver ' 00312 $ ,'and increase the value of INTMEM' 00313 ENDIF 00314 END IF 00315 * 00316 * Check all processes for an error 00317 * 00318 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 00319 $ -1, 0 ) 00320 * 00321 IF( IERR( 1 ).GT.0 ) THEN 00322 KSKIP = KSKIP + 1 00323 GO TO 30 00324 END IF 00325 * 00326 * Padding constants 00327 * 00328 NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1), 00329 $ MYROW, 0, NPROW ) 00330 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) 00331 * 00332 IF( CHECK ) THEN 00333 IPREPAD = ((2*BWL+2*BWU+1)+10) 00334 IMIDPAD = 10 00335 IPOSTPAD = ((2*BWL+2*BWU+1)+10) 00336 ELSE 00337 IPREPAD = 0 00338 IMIDPAD = 0 00339 IPOSTPAD = 0 00340 END IF 00341 * 00342 * Initialize the array descriptor for the matrix A 00343 * 00344 CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N, 00345 $ (2*BWL+2*BWU+1), NB, 0, 0, 00346 $ ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) ) 00347 * 00348 * Convert this to 1D descriptor 00349 * 00350 DESCA( 1 ) = 501 00351 DESCA( 3 ) = N 00352 DESCA( 4 ) = NB 00353 DESCA( 5 ) = 0 00354 DESCA( 2 ) = ICTXT 00355 DESCA( 6 ) = ((2*BWL+2*BWU+1)+10) 00356 DESCA( 7 ) = 0 00357 * 00358 IERR_TEMP = IERR( 1 ) 00359 IERR( 1 ) = 0 00360 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) 00361 * 00362 * Check all processes for an error 00363 * 00364 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 00365 * 00366 IF( IERR( 1 ).LT.0 ) THEN 00367 IF( IAM.EQ.0 ) 00368 $ WRITE( NOUT, FMT = 9997 ) 'descriptor' 00369 KSKIP = KSKIP + 1 00370 GO TO 30 00371 END IF 00372 * 00373 * Assign pointers into MEM for SCALAPACK arrays, A is 00374 * allocated starting at position MEM( IPREPAD+1 ) 00375 * 00376 FREE_PTR = 1 00377 IPB = 0 00378 * 00379 * Save room for prepadding 00380 FREE_PTR = FREE_PTR + IPREPAD 00381 * 00382 IPA = FREE_PTR 00383 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* 00384 $ DESCA2D( NB_ ) 00385 $ + IPOSTPAD 00386 * 00387 * Add memory for fillin 00388 * Fillin space needs to store: 00389 * Fillin spike: 00390 * Contribution to previous proc's diagonal block of 00391 * reduced system: 00392 * Off-diagonal block of reduced system: 00393 * Diagonal block of reduced system: 00394 * 00395 FILLIN_SIZE = 00396 $ (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) 00397 * 00398 * Claim memory for fillin 00399 * 00400 FREE_PTR = FREE_PTR + IPREPAD 00401 IP_FILLIN = FREE_PTR 00402 FREE_PTR = FREE_PTR + FILLIN_SIZE 00403 * 00404 * Workspace needed by computational routines: 00405 * 00406 IPW_SIZE = 0 00407 * 00408 * factorization: 00409 * 00410 IPW_SIZE = 1 00411 * 00412 * Claim memory for IPW 00413 * 00414 IPW = FREE_PTR 00415 FREE_PTR = FREE_PTR + IPW_SIZE 00416 * 00417 * Check for adequate memory for problem size 00418 * 00419 IERR( 1 ) = 0 00420 IF( FREE_PTR.GT.MEMSIZ ) THEN 00421 IF( IAM.EQ.0 ) 00422 $ WRITE( NOUT, FMT = 9996 ) 00423 $ 'divide and conquer factorization', 00424 $ (FREE_PTR )*DBLESZ 00425 IERR( 1 ) = 1 00426 END IF 00427 * 00428 * Check all processes for an error 00429 * 00430 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 00431 $ 1, -1, 0 ) 00432 * 00433 IF( IERR( 1 ).GT.0 ) THEN 00434 IF( IAM.EQ.0 ) 00435 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00436 KSKIP = KSKIP + 1 00437 GO TO 30 00438 END IF 00439 * 00440 * Worksize needed for LAPRNT 00441 WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB ) 00442 * 00443 IF( CHECK ) THEN 00444 * 00445 * Calculate the amount of workspace required by 00446 * the checking routines. 00447 * 00448 * PDLANGE 00449 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) 00450 * 00451 * PDDBLASCHK 00452 WORKSIZ = MAX( WORKSIZ, 00453 $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) 00454 END IF 00455 * 00456 FREE_PTR = FREE_PTR + IPREPAD 00457 IP_DRIVER_W = FREE_PTR 00458 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD 00459 * 00460 * 00461 * Check for adequate memory for problem size 00462 * 00463 IERR( 1 ) = 0 00464 IF( FREE_PTR.GT.MEMSIZ ) THEN 00465 IF( IAM.EQ.0 ) 00466 $ WRITE( NOUT, FMT = 9996 ) 'factorization', 00467 $ ( FREE_PTR )*DBLESZ 00468 IERR( 1 ) = 1 00469 END IF 00470 * 00471 * Check all processes for an error 00472 * 00473 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 00474 $ 1, -1, 0 ) 00475 * 00476 IF( IERR( 1 ).GT.0 ) THEN 00477 IF( IAM.EQ.0 ) 00478 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00479 KSKIP = KSKIP + 1 00480 GO TO 30 00481 END IF 00482 * 00483 CALL PDBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N, 00484 $ (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ), 00485 $ ((2*BWL+2*BWU+1)+10), 0, 0, IASEED, 00486 $ MYROW, MYCOL, NPROW, NPCOL ) 00487 * 00488 CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), 00489 $ ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD, 00490 $ PADVAL ) 00491 * 00492 CALL PDFILLPAD( ICTXT, WORKSIZ, 1, 00493 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 00494 $ IPREPAD, IPOSTPAD, PADVAL ) 00495 * 00496 * Calculate norm of A for residual error-checking 00497 * 00498 IF( CHECK ) THEN 00499 * 00500 ANORM = PDLANGE( '1', (2*BWL+2*BWU+1), 00501 $ N, MEM( IPA ), 1, 1, 00502 $ DESCA2D, MEM( IP_DRIVER_W ) ) 00503 CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, 00504 $ MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), 00505 $ IPREPAD, IPOSTPAD, PADVAL ) 00506 CALL PDCHEKPAD( ICTXT, 'PDLANGE', 00507 $ WORKSIZ, 1, 00508 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 00509 $ IPREPAD, IPOSTPAD, PADVAL ) 00510 END IF 00511 * 00512 * 00513 CALL SLBOOT() 00514 CALL BLACS_BARRIER( ICTXT, 'All' ) 00515 * 00516 * Perform factorization 00517 * 00518 CALL SLTIMER( 1 ) 00519 * 00520 CALL PDGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV, 00521 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), 00522 $ IPW_SIZE, INFO ) 00523 * 00524 CALL SLTIMER( 1 ) 00525 * 00526 IF( INFO.NE.0 ) THEN 00527 IF( IAM.EQ.0 ) THEN 00528 WRITE( NOUT, FMT = * ) 'PDGBTRF INFO=', INFO 00529 ENDIF 00530 KFAIL = KFAIL + 1 00531 GO TO 30 00532 END IF 00533 * 00534 IF( CHECK ) THEN 00535 * 00536 * Check for memory overwrite in factorization 00537 * 00538 CALL PDCHEKPAD( ICTXT, 'PDGBTRF', NP, 00539 $ NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), 00540 $ IPREPAD, IPOSTPAD, PADVAL ) 00541 END IF 00542 * 00543 * 00544 * Loop over the different values for NRHS 00545 * 00546 DO 20 HH = 1, NNR 00547 * 00548 IERR( 1 ) = 0 00549 * 00550 NRHS = NRVAL( HH ) 00551 * 00552 * Initialize Array Descriptor for rhs 00553 * 00554 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, 00555 $ ICTXTB, NB+10, IERR( 1 ) ) 00556 * 00557 * Convert this to 1D descriptor 00558 * 00559 DESCB( 1 ) = 502 00560 DESCB( 3 ) = N 00561 DESCB( 4 ) = NB 00562 DESCB( 5 ) = 0 00563 DESCB( 2 ) = ICTXT 00564 DESCB( 6 ) = DESCB2D( LLD_ ) 00565 DESCB( 7 ) = 0 00566 * 00567 * reset free_ptr to reuse space for right hand sides 00568 * 00569 IF( IPB .GT. 0 ) THEN 00570 FREE_PTR = IPB 00571 ENDIF 00572 * 00573 FREE_PTR = FREE_PTR + IPREPAD 00574 IPB = FREE_PTR 00575 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) 00576 $ + IPOSTPAD 00577 * 00578 * Allocate workspace for workspace in TRS routine: 00579 * 00580 IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU) 00581 * 00582 IPW_SOLVE = FREE_PTR 00583 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE 00584 * 00585 IERR( 1 ) = 0 00586 IF( FREE_PTR.GT.MEMSIZ ) THEN 00587 IF( IAM.EQ.0 ) 00588 $ WRITE( NOUT, FMT = 9996 )'solve', 00589 $ ( FREE_PTR )*DBLESZ 00590 IERR( 1 ) = 1 00591 END IF 00592 * 00593 * Check all processes for an error 00594 * 00595 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, 00596 $ IERR, 1, -1, 0 ) 00597 * 00598 IF( IERR( 1 ).GT.0 ) THEN 00599 IF( IAM.EQ.0 ) 00600 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 00601 KSKIP = KSKIP + 1 00602 GO TO 15 00603 END IF 00604 * 00605 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) 00606 * 00607 * Generate RHS 00608 * 00609 CALL PDMATGEN(ICTXTB, 'No', 'No', 00610 $ DESCB2D( M_ ), DESCB2D( N_ ), 00611 $ DESCB2D( MB_ ), DESCB2D( NB_ ), 00612 $ MEM( IPB ), 00613 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), 00614 $ DESCB2D( CSRC_ ), 00615 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, 00616 $ MYROW, NPCOL, NPROW ) 00617 * 00618 IF( CHECK ) THEN 00619 CALL PDFILLPAD( ICTXTB, NB, NRHS, 00620 $ MEM( IPB-IPREPAD ), 00621 $ DESCB2D( LLD_ ), 00622 $ IPREPAD, IPOSTPAD, 00623 $ PADVAL ) 00624 CALL PDFILLPAD( ICTXT, WORKSIZ, 1, 00625 $ MEM( IP_DRIVER_W-IPREPAD ), 00626 $ WORKSIZ, IPREPAD, 00627 $ IPOSTPAD, PADVAL ) 00628 END IF 00629 * 00630 * 00631 CALL BLACS_BARRIER( ICTXT, 'All') 00632 CALL SLTIMER( 2 ) 00633 * 00634 * Solve linear system via factorization 00635 * 00636 CALL PDGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), 00637 $ 1, DESCA, IPIV, MEM( IPB ), 1, DESCB, 00638 $ MEM( IP_FILLIN ), FILLIN_SIZE, 00639 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, 00640 $ INFO ) 00641 * 00642 CALL SLTIMER( 2 ) 00643 * 00644 IF( INFO.NE.0 ) THEN 00645 IF( IAM.EQ.0 ) 00646 $ WRITE( NOUT, FMT = * ) 'PDGBTRS INFO=', INFO 00647 KFAIL = KFAIL + 1 00648 PASSED = 'FAILED' 00649 GO TO 20 00650 END IF 00651 * 00652 IF( CHECK ) THEN 00653 * 00654 * check for memory overwrite 00655 * 00656 CALL PDCHEKPAD( ICTXT, 'PDGBTRS-work', 00657 $ WORKSIZ, 1, 00658 $ MEM( IP_DRIVER_W-IPREPAD ), 00659 $ WORKSIZ, IPREPAD, 00660 $ IPOSTPAD, PADVAL ) 00661 * 00662 * check the solution to rhs 00663 * 00664 SRESID = ZERO 00665 * 00666 CALL PDDBLASCHK( 'N', 'N', TRANS, 00667 $ N, BWL, BWU, NRHS, 00668 $ MEM( IPB ), 1, 1, DESCB2D, 00669 $ IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D, 00670 $ IBSEED, ANORM, SRESID, 00671 $ MEM( IP_DRIVER_W ), WORKSIZ ) 00672 * 00673 IF( IAM.EQ.0 ) THEN 00674 IF( SRESID.GT.THRESH ) 00675 $ WRITE( NOUT, FMT = 9985 ) SRESID 00676 END IF 00677 * 00678 * The second test is a NaN trap 00679 * 00680 IF( ( SRESID.LE.THRESH ).AND. 00681 $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN 00682 KPASS = KPASS + 1 00683 PASSED = 'PASSED' 00684 ELSE 00685 KFAIL = KFAIL + 1 00686 PASSED = 'FAILED' 00687 END IF 00688 * 00689 END IF 00690 * 00691 15 CONTINUE 00692 * Skipped tests jump to here to print out "SKIPPED" 00693 * 00694 * Gather maximum of all CPU and WALL clock timings 00695 * 00696 CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, 00697 $ WTIME ) 00698 CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, 00699 $ CTIME ) 00700 * 00701 * Print results 00702 * 00703 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN 00704 * 00705 NOPS = 0 00706 NOPS2 = 0 00707 * 00708 N_FIRST = NB 00709 NPROCS_REAL = ( N-1 )/NB + 1 00710 N_LAST = MOD( N-1, NB ) + 1 00711 * 00712 * 2 N bwl (bwl+bwu) + N (bwl) flops 00713 * for LU factorization 00714 * 00715 NOPS = 2*(DBLE(N)*DBLE(BWL)* 00716 $ DBLE((BWL+BWU))) + 00717 $ (DBLE(N)*DBLE(BWL)) 00718 * 00719 * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve. 00720 * 00721 NOPS = NOPS + 00722 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU))) 00723 $ *DBLE(NRHS)) 00724 * 00725 * Second calc to represent actual hardware speed 00726 * 00727 * 2*N_FIRST (bwl+bwu)*bwu Flops for LU 00728 * factorization in proc 1 00729 * 00730 NOPS2 = 2*( (DBLE(N_FIRST)* 00731 $ DBLE((BWL+BWU))*DBLE(BWU))) 00732 * 00733 IF ( NPROCS_REAL .GT. 1) THEN 00734 * 8 N_LAST (bwl+bwu)*(bwl+bwu) 00735 * flops for LU and spike 00736 * calc in last processor 00737 * 00738 NOPS2 = NOPS2 + 00739 $ 8*( (DBLE(N_LAST)*DBLE((BWL+BWU)) 00740 $ *DBLE((BWL+BWU))) ) 00741 ENDIF 00742 * 00743 IF ( NPROCS_REAL .GT. 2) THEN 00744 * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike 00745 * calc in other processors 00746 * 00747 NOPS2 = NOPS2 + (NPROCS_REAL-2)* 00748 $ 8*( (DBLE(NB)*DBLE((BWL+BWU)) 00749 $ *DBLE((BWL+BWU))) ) 00750 ENDIF 00751 * 00752 * Reduced system 00753 * 00754 NOPS2 = NOPS2 + 00755 $ 2*( NPROCS_REAL-1 ) * 00756 $ ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 ) 00757 IF( NPROCS_REAL .GT. 1 ) THEN 00758 NOPS2 = NOPS2 + 00759 $ 2*( NPROCS_REAL-2 ) * 00760 $ (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU)) 00761 ENDIF 00762 * 00763 * Solve stage 00764 * 00765 * nrhs*2 n_first* 00766 * (bwl+(bwl+bwu)) 00767 * flops for L,U solve in proc 1. 00768 * 00769 NOPS2 = NOPS2 + 00770 $ 2* 00771 $ DBLE(N_FIRST)* 00772 $ DBLE(NRHS) * 00773 $ ( DBLE(BWL)+DBLE((BWL+BWU))) 00774 * 00775 IF ( NPROCS_REAL .GT. 1 ) THEN 00776 * 00777 * 2*nrhs*2 n_last 00778 * ((bwl+bwu)+(bwl+bwu)) 00779 * flops for LU solve in other procs 00780 * 00781 NOPS2 = NOPS2 + 00782 $ 4* 00783 $ (DBLE(N_LAST)*(DBLE((BWL+BWU))+ 00784 $ DBLE((BWL+BWU))))*DBLE(NRHS) 00785 ENDIF 00786 * 00787 IF ( NPROCS_REAL .GT. 2 ) THEN 00788 * 00789 * 2*nrhs*2 NB 00790 * ((bwl+bwu)+(bwl+bwu)) 00791 * flops for LU solve in other procs 00792 * 00793 NOPS2 = NOPS2 + 00794 $ ( NPROCS_REAL-2)*2* 00795 $ ( (DBLE(NB)*(DBLE((BWL+BWU))+ 00796 $ DBLE((BWL+BWU))))*DBLE(NRHS) ) 00797 ENDIF 00798 * 00799 * Reduced system 00800 * 00801 NOPS2 = NOPS2 + 00802 $ NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) ) 00803 IF( NPROCS_REAL .GT. 1 ) THEN 00804 NOPS2 = NOPS2 + 00805 $ NRHS*( NPROCS_REAL-2 ) * 00806 $ ( 6 * (BWL+BWU)*(BWL+BWU) ) 00807 ENDIF 00808 * 00809 * 00810 * Calculate total megaflops - factorization and/or 00811 * solve -- for WALL and CPU time, and print output 00812 * 00813 * Print WALL time if machine supports it 00814 * 00815 IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN 00816 TMFLOPS = NOPS / 00817 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 00818 ELSE 00819 TMFLOPS = 0.0D+0 00820 END IF 00821 * 00822 IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN 00823 TMFLOPS2 = NOPS2 / 00824 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 00825 ELSE 00826 TMFLOPS2 = 0.0D+0 00827 END IF 00828 * 00829 IF( WTIME( 2 ).GE.0.0D+0 ) 00830 $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, 00831 $ N, 00832 $ BWL, BWU, 00833 $ NB, NRHS, NPROW, NPCOL, 00834 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, 00835 $ TMFLOPS2, PASSED 00836 * 00837 * Print CPU time if machine supports it 00838 * 00839 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 00840 TMFLOPS = NOPS / 00841 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 00842 ELSE 00843 TMFLOPS = 0.0D+0 00844 END IF 00845 * 00846 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 00847 TMFLOPS2 = NOPS2 / 00848 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 00849 ELSE 00850 TMFLOPS2 = 0.0D+0 00851 END IF 00852 * 00853 IF( CTIME( 2 ).GE.0.0D+0 ) 00854 $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, 00855 $ N, 00856 $ BWL, BWU, 00857 $ NB, NRHS, NPROW, NPCOL, 00858 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, 00859 $ TMFLOPS2, PASSED 00860 * 00861 END IF 00862 20 CONTINUE 00863 * 00864 * 00865 30 CONTINUE 00866 * NNB loop 00867 * 00868 45 CONTINUE 00869 * BW[] loop 00870 * 00871 40 CONTINUE 00872 * NMAT loop 00873 * 00874 CALL BLACS_GRIDEXIT( ICTXT ) 00875 CALL BLACS_GRIDEXIT( ICTXTB ) 00876 * 00877 50 CONTINUE 00878 * NGRIDS DROPOUT 00879 60 CONTINUE 00880 * NGRIDS loop 00881 * 00882 * Print ending messages and close output file 00883 * 00884 IF( IAM.EQ.0 ) THEN 00885 KTESTS = KPASS + KFAIL + KSKIP 00886 WRITE( NOUT, FMT = * ) 00887 WRITE( NOUT, FMT = 9992 ) KTESTS 00888 IF( CHECK ) THEN 00889 WRITE( NOUT, FMT = 9991 ) KPASS 00890 WRITE( NOUT, FMT = 9989 ) KFAIL 00891 ELSE 00892 WRITE( NOUT, FMT = 9990 ) KPASS 00893 END IF 00894 WRITE( NOUT, FMT = 9988 ) KSKIP 00895 WRITE( NOUT, FMT = * ) 00896 WRITE( NOUT, FMT = * ) 00897 WRITE( NOUT, FMT = 9987 ) 00898 IF( NOUT.NE.6 .AND. NOUT.NE.0 ) 00899 $ CLOSE ( NOUT ) 00900 END IF 00901 * 00902 CALL BLACS_EXIT( 0 ) 00903 * 00904 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, 00905 $ '; It should be at least 1' ) 00906 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', 00907 $ I4 ) 00908 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 00909 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', 00910 $ I11 ) 00911 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', 00912 $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 00913 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', 00914 $ '-------- -------- -------- ------' ) 00915 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, 00916 $ 1X,I4,1X,I4,1X,F9.3, 00917 $ F9.4, F9.2, F9.2, 1X, A6 ) 00918 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 00919 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 00920 9990 FORMAT( I5, ' tests completed without checking.' ) 00921 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 00922 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 00923 9987 FORMAT( 'END OF TESTS.' ) 00924 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 00925 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) 00926 * 00927 STOP 00928 * 00929 * End of PDGBTRS_DRIVER 00930 * 00931 END 00932 *