ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pddbdriver.f
Go to the documentation of this file.
00001       PROGRAM PDDBDRIVER
00002 *
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     November 15, 1997
00008 *
00009 *  Purpose
00010 *  =======
00011 *
00012 *  PDDBDRIVER is a test program for the
00013 *  ScaLAPACK Band Cholesky routines corresponding to the options
00014 *  indicated by DDB.  This test driver performs an
00015 *  A = L*U factorization
00016 *  and solves a linear system with the factors for 1 or more RHS.
00017 *
00018 *  The program must be driven by a short data file.
00019 *  Here's an example file:
00020 *'ScaLAPACK, Version 1.2, banded linear systems input file'
00021 *'PVM.'
00022 *''                              output file name (if any)
00023 *6                               device out
00024 *'L'                             define Lower or Upper
00025 *9                               number of problem sizes
00026 *1 5 17 28 37 121 200 1023 2048 3073     values of N
00027 *6                               number of bandwidths
00028 *1 2 4 10 31 64                values of BW
00029 *1                               number of NB's
00030 *-1 3 4 5                        values of NB (-1 for automatic choice)
00031 *1                               number of NRHS's (must be 1)
00032 *8                               values of NRHS
00033 *1                               number of NBRHS's (ignored)
00034 *1                               values of NBRHS (ignored)
00035 *6                               number of process grids
00036 *1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"
00037 *3.0                             threshold
00038 *
00039 *  Internal Parameters
00040 *  ===================
00041 *
00042 *  TOTMEM   INTEGER, default = 6200000.
00043 *           TOTMEM is a machine-specific parameter indicating the
00044 *           maximum amount of available memory in bytes.
00045 *           The user should customize TOTMEM to his platform.  Remember
00046 *           to leave room in memory for the operating system, the BLACS
00047 *           buffer, etc.  For example, on a system with 8 MB of memory
00048 *           per process (e.g., one processor on an Intel iPSC/860), the
00049 *           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
00050 *           code, BLACS buffer, etc).  However, for PVM, we usually set
00051 *           TOTMEM = 2000000.  Some experimenting with the maximum value
00052 *           of TOTMEM may be required.
00053 *
00054 *  INTGSZ   INTEGER, default = 4 bytes.
00055 *  DBLESZ   INTEGER, default = 8 bytes.
00056 *           INTGSZ and DBLESZ indicate the length in bytes on the
00057 *           given platform for an integer and a double precision real.
00058 *  MEM      DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
00059 *           All arrays used by ScaLAPACK routines are allocated from
00060 *           this array and referenced by pointers.  The integer IPB,
00061 *           for example, is a pointer to the starting element of MEM for
00062 *           the solution vector(s) B.
00063 *
00064 *  =====================================================================
00065 *
00066 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00067 *    Current address: Lawrence Livermore National Labs.
00068 *  This version released: August, 2001.
00069 *
00070 *  =====================================================================
00071 *
00072 *     .. Parameters ..
00073       INTEGER            TOTMEM
00074       PARAMETER          ( TOTMEM = 3000000 )
00075       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00076      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00077       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00078      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00079      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00080 *
00081       DOUBLE PRECISION   ZERO
00082       INTEGER            DBLESZ, MEMSIZ, NTESTS
00083       DOUBLE PRECISION   PADVAL
00084       PARAMETER          ( DBLESZ = 8,
00085      $                     MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20,
00086      $                     PADVAL = -9923.0D+0, ZERO = 0.0D+0 )
00087       INTEGER            INT_ONE
00088       PARAMETER          ( INT_ONE = 1 )
00089 *     ..
00090 *     .. Local Scalars ..
00091       LOGICAL            CHECK
00092       CHARACTER          TRANS
00093       CHARACTER*6        PASSED
00094       CHARACTER*80       OUTFILE
00095       INTEGER            BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH,
00096      $                   I, IAM, IASEED, IBSEED, ICTXT, ICTXTB,
00097      $                   IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD,
00098      $                   IPREPAD, IPW, IPW_SIZE, IPW_SOLVE,
00099      $                   IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K,
00100      $                   KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE,
00101      $                   MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR,
00102      $                   NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL,
00103      $                   NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ
00104       REAL               THRESH
00105             DOUBLE PRECISION    ANORM, NOPS, NOPS2, SRESID, TMFLOPS,
00106      $                          TMFLOPS2
00107 *     ..
00108 *     .. Local Arrays ..
00109       INTEGER            BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ),
00110      $                   DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ),
00111      $                   IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ),
00112      $                   NRVAL( NTESTS ), NVAL( NTESTS ),
00113      $                   PVAL( NTESTS ), QVAL( NTESTS )
00114       DOUBLE PRECISION   CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 )
00115 *     ..
00116 *     .. External Subroutines ..
00117       EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
00118      $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
00119      $                   BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN,
00120      $                   PDCHEKPAD, PDDBINFO, PDDBLASCHK, PDDBTRF,
00121      $                   PDDBTRS, PDFILLPAD, PDMATGEN, SLBOOT,
00122      $                   SLCOMBINE, SLTIMER
00123 *     ..
00124 *     .. External Functions ..
00125       INTEGER            NUMROC
00126       LOGICAL            LSAME
00127       DOUBLE PRECISION   PDLANGE
00128       EXTERNAL           LSAME, NUMROC, PDLANGE
00129 *     ..
00130 *     .. Intrinsic Functions ..
00131       INTRINSIC          DBLE, MAX, MIN, MOD
00132 *     ..
00133 *     .. Data Statements ..
00134       DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
00135 *     ..
00136 *
00137 *
00138 *
00139 *     .. Executable Statements ..
00140 *
00141 *     Get starting information
00142 *
00143       CALL BLACS_PINFO( IAM, NPROCS )
00144       IASEED = 100
00145       IBSEED = 200
00146 *
00147       CALL PDDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW,
00148      $               BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR,
00149      $               NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL,
00150      $               NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS )
00151 *
00152       CHECK = ( THRESH.GE.0.0D+0 )
00153 *
00154 *     Print headings
00155 *
00156       IF( IAM.EQ.0 ) THEN
00157          WRITE( NOUT, FMT = * )
00158          WRITE( NOUT, FMT = 9995 )
00159          WRITE( NOUT, FMT = 9994 )
00160          WRITE( NOUT, FMT = * )
00161       END IF
00162 *
00163 *     Loop over different process grids
00164 *
00165       DO 60 I = 1, NGRIDS
00166 *
00167          NPROW = PVAL( I )
00168          NPCOL = QVAL( I )
00169 *
00170 *        Make sure grid information is correct
00171 *
00172          IERR( 1 ) = 0
00173          IF( NPROW.LT.1 ) THEN
00174             IF( IAM.EQ.0 )
00175      $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW
00176             IERR( 1 ) = 1
00177          ELSE IF( NPCOL.LT.1 ) THEN
00178             IF( IAM.EQ.0 )
00179      $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL
00180             IERR( 1 ) = 1
00181          ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
00182             IF( IAM.EQ.0 )
00183      $         WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
00184             IERR( 1 ) = 1
00185          END IF
00186 *
00187          IF( IERR( 1 ).GT.0 ) THEN
00188             IF( IAM.EQ.0 )
00189      $         WRITE( NOUT, FMT = 9997 ) 'grid'
00190             KSKIP = KSKIP + 1
00191             GO TO 50
00192          END IF
00193 *
00194 *        Define process grid
00195 *
00196          CALL BLACS_GET( -1, 0, ICTXT )
00197          CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
00198 *
00199 *
00200 *        Define transpose process grid
00201 *
00202          CALL BLACS_GET( -1, 0, ICTXTB )
00203          CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW )
00204 *
00205 *        Go to bottom of process grid loop if this case doesn't use my
00206 *        process
00207 *
00208          CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00209 *
00210          IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN
00211             GO TO 50
00212          ENDIF
00213 *
00214          DO 40 J = 1, NMAT
00215 *
00216            IERR( 1 ) = 0
00217 *
00218            N = NVAL( J )
00219 *
00220 *          Make sure matrix information is correct
00221 *
00222            IF( N.LT.1 ) THEN
00223                IF( IAM.EQ.0 )
00224      $            WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N
00225                IERR( 1 ) = 1
00226            END IF
00227 *
00228 *          Check all processes for an error
00229 *
00230            CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00231      $                    -1, 0 )
00232 *
00233            IF( IERR( 1 ).GT.0 ) THEN
00234                IF( IAM.EQ.0 )
00235      $            WRITE( NOUT, FMT = 9997 ) 'size'
00236                KSKIP = KSKIP + 1
00237                GO TO 40
00238            END IF
00239 *
00240 *
00241            DO 45 BW_NUM = 1, NBW
00242 *
00243              IERR( 1 ) = 0
00244 *
00245              BWL = BWLVAL( BW_NUM )
00246              IF( BWL.LT.1 ) THEN
00247                IF( IAM.EQ.0 )
00248      $            WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL
00249                IERR( 1 ) = 1
00250              END IF
00251 *
00252              BWU = BWUVAL( BW_NUM )
00253              IF( BWU.LT.1 ) THEN
00254                IF( IAM.EQ.0 )
00255      $            WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU
00256                IERR( 1 ) = 1
00257              END IF
00258 *
00259              IF( BWL.GT.N-1 ) THEN
00260                IF( IAM.EQ.0 ) THEN
00261                  IERR( 1 ) = 1
00262                ENDIF
00263              END IF
00264 *
00265              IF( BWU.GT.N-1 ) THEN
00266                IF( IAM.EQ.0 ) THEN
00267                  IERR( 1 ) = 1
00268                ENDIF
00269              END IF
00270 *
00271 *            Check all processes for an error
00272 *
00273              CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00274      $                    -1, 0 )
00275 *
00276              IF( IERR( 1 ).GT.0 ) THEN
00277                KSKIP = KSKIP + 1
00278                GO TO 45
00279              END IF
00280 *
00281              DO 30 K = 1, NNB
00282 *
00283                IERR( 1 ) = 0
00284 *
00285                NB = NBVAL( K )
00286                IF( NB.LT.0 ) THEN
00287                   NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 )
00288      $               + MAX(BWL,BWU)
00289                   NB = MAX( NB, 2*MAX(BWL,BWU) )
00290                   NB = MIN( N, NB )
00291                END IF
00292 *
00293 *              Make sure NB is legal
00294 *
00295                IERR( 1 ) = 0
00296                IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN
00297                   IERR( 1 ) = 1
00298                END IF
00299 *
00300 *              Check all processes for an error
00301 *
00302                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00303      $                       -1, 0 )
00304 *
00305                IF( IERR( 1 ).GT.0 ) THEN
00306                   KSKIP = KSKIP + 1
00307                   GO TO 30
00308                END IF
00309 *
00310 *              Padding constants
00311 *
00312                NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1),
00313      $                      MYROW, 0, NPROW )
00314                NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
00315 *
00316                IF( CHECK ) THEN
00317                   IPREPAD  = ((BWL+BWU+1)+10)
00318                   IMIDPAD  = 10
00319                   IPOSTPAD = ((BWL+BWU+1)+10)
00320                ELSE
00321                   IPREPAD  = 0
00322                   IMIDPAD  = 0
00323                   IPOSTPAD = 0
00324                END IF
00325 *
00326 *              Initialize the array descriptor for the matrix A
00327 *
00328                CALL DESCINIT( DESCA2D, (BWL+BWU+1), N,
00329      $                       (BWL+BWU+1), NB, 0, 0,
00330      $                       ICTXT,((BWL+BWU+1)+10), IERR( 1 ) )
00331 *
00332 *              Convert this to 1D descriptor
00333 *
00334                DESCA( 1 ) = 501
00335                DESCA( 3 ) = N
00336                DESCA( 4 ) = NB
00337                DESCA( 5 ) = 0
00338                DESCA( 2 ) = ICTXT
00339                DESCA( 6 ) = ((BWL+BWU+1)+10)
00340                DESCA( 7 ) = 0
00341 *
00342                IERR_TEMP = IERR( 1 )
00343                IERR( 1 ) = 0
00344                IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
00345 *
00346 *              Check all processes for an error
00347 *
00348                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
00349 *
00350                IF( IERR( 1 ).LT.0 ) THEN
00351                   IF( IAM.EQ.0 )
00352      $               WRITE( NOUT, FMT = 9997 ) 'descriptor'
00353                   KSKIP = KSKIP + 1
00354                   GO TO 30
00355                END IF
00356 *
00357 *              Assign pointers into MEM for SCALAPACK arrays, A is
00358 *              allocated starting at position MEM( IPREPAD+1 )
00359 *
00360                FREE_PTR = 1
00361                IPB = 0
00362 *
00363 *              Save room for prepadding
00364                FREE_PTR = FREE_PTR + IPREPAD
00365 *
00366                IPA = FREE_PTR
00367                FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
00368      $                     DESCA2D( NB_ )
00369      $                     + IPOSTPAD
00370 *
00371 *              Add memory for fillin
00372 *              Fillin space needs to store:
00373 *                Fillin spike:
00374 *                Contribution to previous proc's diagonal block of
00375 *                  reduced system:
00376 *                Off-diagonal block of reduced system:
00377 *                Diagonal block of reduced system:
00378 *
00379                FILLIN_SIZE =
00380      $            NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU)
00381 *
00382 *              Claim memory for fillin
00383 *
00384                FREE_PTR = FREE_PTR + IPREPAD
00385                IP_FILLIN = FREE_PTR
00386                FREE_PTR = FREE_PTR + FILLIN_SIZE
00387 *
00388 *              Workspace needed by computational routines:
00389 *
00390                IPW_SIZE = 0
00391 *
00392 *              factorization:
00393 *
00394                IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU)
00395 *
00396 *              Claim memory for IPW
00397 *
00398                IPW = FREE_PTR
00399                FREE_PTR = FREE_PTR + IPW_SIZE
00400 *
00401 *              Check for adequate memory for problem size
00402 *
00403                IERR( 1 ) = 0
00404                IF( FREE_PTR.GT.MEMSIZ ) THEN
00405                   IF( IAM.EQ.0 )
00406      $               WRITE( NOUT, FMT = 9996 )
00407      $               'divide and conquer factorization',
00408      $               (FREE_PTR )*DBLESZ
00409                   IERR( 1 ) = 1
00410                END IF
00411 *
00412 *              Check all processes for an error
00413 *
00414                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
00415      $                       1, -1, 0 )
00416 *
00417                IF( IERR( 1 ).GT.0 ) THEN
00418                   IF( IAM.EQ.0 )
00419      $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00420                   KSKIP = KSKIP + 1
00421                   GO TO 30
00422                END IF
00423 *
00424 *              Worksize needed for LAPRNT
00425                WORKSIZ = MAX( ((BWL+BWU+1)+10), NB )
00426 *
00427                IF( CHECK ) THEN
00428 *
00429 *                 Calculate the amount of workspace required by
00430 *                 the checking routines.
00431 *
00432 *                 PDLANGE
00433                   WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
00434 *
00435 *                 PDDBLASCHK
00436                   WORKSIZ = MAX( WORKSIZ,
00437      $          MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB )
00438                END IF
00439 *
00440                FREE_PTR = FREE_PTR + IPREPAD
00441                IP_DRIVER_W = FREE_PTR
00442                FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
00443 *
00444 *
00445 *              Check for adequate memory for problem size
00446 *
00447                IERR( 1 ) = 0
00448                IF( FREE_PTR.GT.MEMSIZ ) THEN
00449                   IF( IAM.EQ.0 )
00450      $               WRITE( NOUT, FMT = 9996 ) 'factorization',
00451      $               ( FREE_PTR )*DBLESZ
00452                   IERR( 1 ) = 1
00453                END IF
00454 *
00455 *              Check all processes for an error
00456 *
00457                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
00458      $                       1, -1, 0 )
00459 *
00460                IF( IERR( 1 ).GT.0 ) THEN
00461                   IF( IAM.EQ.0 )
00462      $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00463                   KSKIP = KSKIP + 1
00464                   GO TO 30
00465                END IF
00466 *
00467                CALL PDBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N,
00468      $                         (BWL+BWU+1), NB, MEM( IPA ),
00469      $                         ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW,
00470      $                         MYCOL, NPROW, NPCOL )
00471 *
00472                CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
00473      $                          ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD,
00474      $                          PADVAL )
00475 *
00476                CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
00477      $                          MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
00478      $                          IPREPAD, IPOSTPAD, PADVAL )
00479 *
00480 *              Calculate norm of A for residual error-checking
00481 *
00482                IF( CHECK ) THEN
00483 *
00484                   ANORM = PDLANGE( '1', (BWL+BWU+1),
00485      $                            N, MEM( IPA ), 1, 1,
00486      $                            DESCA2D, MEM( IP_DRIVER_W ) )
00487                   CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ,
00488      $                         MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
00489      $                         IPREPAD, IPOSTPAD, PADVAL )
00490                   CALL PDCHEKPAD( ICTXT, 'PDLANGE',
00491      $                            WORKSIZ, 1,
00492      $                            MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
00493      $                            IPREPAD, IPOSTPAD, PADVAL )
00494                END IF
00495 *
00496 *
00497                CALL SLBOOT()
00498                CALL BLACS_BARRIER( ICTXT, 'All' )
00499 *
00500 *              Perform factorization
00501 *
00502                CALL SLTIMER( 1 )
00503 *
00504                CALL PDDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA,
00505      $                       MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
00506      $                       IPW_SIZE, INFO )
00507 *
00508                CALL SLTIMER( 1 )
00509 *
00510                IF( INFO.NE.0 ) THEN
00511                   IF( IAM.EQ.0 ) THEN
00512                     WRITE( NOUT, FMT = * ) 'PDDBTRF INFO=', INFO
00513                   ENDIF
00514                   KFAIL = KFAIL + 1
00515                   GO TO 30
00516                END IF
00517 *
00518                IF( CHECK ) THEN
00519 *
00520 *                 Check for memory overwrite in factorization
00521 *
00522                   CALL PDCHEKPAD( ICTXT, 'PDDBTRF', NP,
00523      $                    NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
00524      $                    IPREPAD, IPOSTPAD, PADVAL )
00525                END IF
00526 *
00527 *
00528 *              Loop over the different values for NRHS
00529 *
00530                DO 20 HH = 1, NNR
00531 *
00532                   IERR( 1 ) = 0
00533 *
00534                   NRHS = NRVAL( HH )
00535 *
00536 *                    Initialize Array Descriptor for rhs
00537 *
00538                      CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
00539      $                             ICTXTB, NB+10, IERR( 1 ) )
00540 *
00541 *                    Convert this to 1D descriptor
00542 *
00543                      DESCB( 1 ) = 502
00544                      DESCB( 3 ) = N
00545                      DESCB( 4 ) = NB
00546                      DESCB( 5 ) = 0
00547                      DESCB( 2 ) = ICTXT
00548                      DESCB( 6 ) = DESCB2D( LLD_ )
00549                      DESCB( 7 ) = 0
00550 *
00551 *                    reset free_ptr to reuse space for right hand sides
00552 *
00553                      IF( IPB .GT. 0 ) THEN
00554                        FREE_PTR = IPB
00555                      ENDIF
00556 *
00557                      FREE_PTR = FREE_PTR + IPREPAD
00558                      IPB = FREE_PTR
00559                      FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
00560      $                          + IPOSTPAD
00561 *
00562 *                    Allocate workspace for workspace in TRS routine:
00563 *
00564                      IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS)
00565 *
00566                      IPW_SOLVE = FREE_PTR
00567                      FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
00568 *
00569                      IERR( 1 ) = 0
00570                      IF( FREE_PTR.GT.MEMSIZ ) THEN
00571                         IF( IAM.EQ.0 )
00572      $                     WRITE( NOUT, FMT = 9996 )'solve',
00573      $                            ( FREE_PTR )*DBLESZ
00574                         IERR( 1 ) = 1
00575                      END IF
00576 *
00577 *                    Check all processes for an error
00578 *
00579                      CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1,
00580      $                             IERR, 1, -1, 0 )
00581 *
00582                      IF( IERR( 1 ).GT.0 ) THEN
00583                         IF( IAM.EQ.0 )
00584      $                     WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00585                         KSKIP = KSKIP + 1
00586                         GO TO 15
00587                      END IF
00588 *
00589                      MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
00590 *
00591 *                    Generate RHS
00592 *
00593                      CALL PDMATGEN(ICTXTB, 'No', 'No',
00594      $                        DESCB2D( M_ ), DESCB2D( N_ ),
00595      $                        DESCB2D( MB_ ), DESCB2D( NB_ ),
00596      $                        MEM( IPB ),
00597      $                        DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
00598      $                        DESCB2D( CSRC_ ),
00599      $                        IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
00600      $                        MYROW, NPCOL, NPROW )
00601 *
00602                      IF( CHECK ) THEN
00603                         CALL PDFILLPAD( ICTXTB, NB, NRHS,
00604      $                                  MEM( IPB-IPREPAD ),
00605      $                                  DESCB2D( LLD_ ),
00606      $                                  IPREPAD, IPOSTPAD,
00607      $                                  PADVAL )
00608                         CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
00609      $                                  MEM( IP_DRIVER_W-IPREPAD ),
00610      $                                  WORKSIZ, IPREPAD,
00611      $                                  IPOSTPAD, PADVAL )
00612                      END IF
00613 *
00614 *
00615                      CALL BLACS_BARRIER( ICTXT, 'All')
00616                      CALL SLTIMER( 2 )
00617 *
00618 *                    Solve linear system via factorization
00619 *
00620                      CALL PDDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ),
00621      $                             1, DESCA, MEM( IPB ), 1, DESCB,
00622      $                             MEM( IP_FILLIN ), FILLIN_SIZE,
00623      $                             MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
00624      $                             INFO )
00625 *
00626                      CALL SLTIMER( 2 )
00627 *
00628                      IF( INFO.NE.0 ) THEN
00629                        IF( IAM.EQ.0 )
00630      $  WRITE( NOUT, FMT = * ) 'PDDBTRS INFO=', INFO
00631                        KFAIL = KFAIL + 1
00632                        PASSED = 'FAILED'
00633                        GO TO 20
00634                      END IF
00635 *
00636                      IF( CHECK ) THEN
00637 *
00638 *                       check for memory overwrite
00639 *
00640                         CALL PDCHEKPAD( ICTXT, 'PDDBTRS-work',
00641      $                                  WORKSIZ, 1,
00642      $                                  MEM( IP_DRIVER_W-IPREPAD ),
00643      $                                  WORKSIZ, IPREPAD,
00644      $                                  IPOSTPAD, PADVAL )
00645 *
00646 *                       check the solution to rhs
00647 *
00648                         SRESID = ZERO
00649 *
00650                         CALL PDDBLASCHK( 'N', 'D', TRANS,
00651      $                       N, BWL, BWU, NRHS,
00652      $                       MEM( IPB ), 1, 1, DESCB2D,
00653      $                       IASEED, MEM( IPA ), 1, 1, DESCA2D,
00654      $                       IBSEED, ANORM, SRESID,
00655      $                       MEM( IP_DRIVER_W ), WORKSIZ )
00656 *
00657                         IF( IAM.EQ.0 ) THEN
00658                            IF( SRESID.GT.THRESH )
00659      $                        WRITE( NOUT, FMT = 9985 ) SRESID
00660                         END IF
00661 *
00662 *                       The second test is a NaN trap
00663 *
00664                         IF( ( SRESID.LE.THRESH          ).AND.
00665      $                      ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN
00666                            KPASS = KPASS + 1
00667                            PASSED = 'PASSED'
00668                         ELSE
00669                            KFAIL = KFAIL + 1
00670                            PASSED = 'FAILED'
00671                         END IF
00672 *
00673                      END IF
00674 *
00675    15                CONTINUE
00676 *                    Skipped tests jump to here to print out "SKIPPED"
00677 *
00678 *                    Gather maximum of all CPU and WALL clock timings
00679 *
00680                      CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1,
00681      $                               WTIME )
00682                      CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1,
00683      $                               CTIME )
00684 *
00685 *                    Print results
00686 *
00687                      IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
00688 *
00689                         NOPS = 0
00690                         NOPS2 = 0
00691 *
00692                         N_FIRST = NB
00693                         NPROCS_REAL = ( N-1 )/NB + 1
00694                         N_LAST = MOD( N-1, NB ) + 1
00695 *
00696 *                       2 N bwl bwu + N (bwl) flops
00697 *                          for LU factorization
00698 *
00699                         NOPS = 2*(DBLE(N)*DBLE(BWL)*
00700      $                         DBLE(BWU)) +
00701      $                         (DBLE(N)*DBLE(BWL))
00702 *
00703 *                       nrhs * 2 N*(bwl+bwu) flops for LU solve.
00704 *
00705                         NOPS = NOPS +
00706      $                  2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU))
00707      $                   *DBLE(NRHS))
00708 *
00709 *                       Second calc to represent actual hardware speed
00710 *
00711 *                     2*N_FIRST bwl*bwu Flops for LU
00712 *                       factorization in proc 1
00713 *
00714                       NOPS2 = 2*( (DBLE(N_FIRST)*
00715      $                  DBLE(BWL)*DBLE(BWU)))
00716 *
00717                       IF ( NPROCS_REAL .GT. 1) THEN
00718 *                       8 N_LAST bwl*bwu
00719 *                         flops for LU and spike
00720 *                         calc in last processor
00721 *
00722                         NOPS2 = NOPS2 +
00723      $                          8*( (DBLE(N_LAST)*DBLE(BWL)
00724      $                          *DBLE(BWU)) )
00725                       ENDIF
00726 *
00727                       IF ( NPROCS_REAL .GT. 2) THEN
00728 *                       8 NB bwl*bwu  flops for LU and spike
00729 *                         calc in other processors
00730 *
00731                         NOPS2 = NOPS2 + (NPROCS_REAL-2)*
00732      $                          8*( (DBLE(NB)*DBLE(BWL)
00733      $                          *DBLE(BWU)) )
00734                       ENDIF
00735 *
00736 *                     Reduced system
00737 *
00738                       NOPS2 = NOPS2 +
00739      $                  2*( NPROCS_REAL-1 ) *
00740      $                  ( BWL*BWU*BWL/3 )
00741                       IF( NPROCS_REAL .GT. 1 ) THEN
00742                         NOPS2 = NOPS2 +
00743      $                    2*( NPROCS_REAL-2 ) *
00744      $                    (2*BWL*BWU*BWL)
00745                       ENDIF
00746 *
00747 *                     Solve stage
00748 *
00749 *                     nrhs*2 n_first*
00750 *                        (bwl+bwu)
00751 *                        flops for L,U solve in proc 1.
00752 *
00753                       NOPS2 = NOPS2 +
00754      $                  2*
00755      $                  DBLE(N_FIRST)*
00756      $                  DBLE(NRHS) *
00757      $                  ( DBLE(BWL)+DBLE(BWU))
00758 *
00759                       IF ( NPROCS_REAL .GT. 1 ) THEN
00760 *
00761 *                       2*nrhs*2 n_last
00762 *                        (bwl+bwu)
00763 *                       flops for LU solve in other procs
00764 *
00765                         NOPS2 = NOPS2 +
00766      $                    4*
00767      $                    (DBLE(N_LAST)*(DBLE(BWL)+
00768      $                    DBLE(BWU)))*DBLE(NRHS)
00769                       ENDIF
00770 *
00771                       IF ( NPROCS_REAL .GT. 2 ) THEN
00772 *
00773 *                       2*nrhs*2 NB
00774 *                        (bwl+bwu)
00775 *                        flops for LU solve in other procs
00776 *
00777                         NOPS2 = NOPS2 +
00778      $                    ( NPROCS_REAL-2)*2*
00779      $                 ( (DBLE(NB)*(DBLE(BWL)+
00780      $                 DBLE(BWU)))*DBLE(NRHS) )
00781                       ENDIF
00782 *
00783 *                     Reduced system
00784 *
00785                       NOPS2 = NOPS2 +
00786      $                  NRHS*( NPROCS_REAL-1)*2*(BWL*BWU )
00787                       IF( NPROCS_REAL .GT. 1 ) THEN
00788                         NOPS2 = NOPS2 +
00789      $                   NRHS*( NPROCS_REAL-2 ) *
00790      $                   ( 6 * BWL*BWU )
00791                       ENDIF
00792 *
00793 *
00794 *                       Calculate total megaflops - factorization and/or
00795 *                       solve -- for WALL and CPU time, and print output
00796 *
00797 *                       Print WALL time if machine supports it
00798 *
00799                         IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN
00800                            TMFLOPS = NOPS /
00801      $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
00802                         ELSE
00803                            TMFLOPS = 0.0D+0
00804                         END IF
00805 *
00806                         IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN
00807                            TMFLOPS2 = NOPS2 /
00808      $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
00809                         ELSE
00810                            TMFLOPS2 = 0.0D+0
00811                         END IF
00812 *
00813                         IF( WTIME( 2 ).GE.0.0D+0 )
00814      $                     WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS,
00815      $                            N,
00816      $                            BWL, BWU,
00817      $                            NB, NRHS, NPROW, NPCOL,
00818      $                            WTIME( 1 ), WTIME( 2 ), TMFLOPS,
00819      $                            TMFLOPS2, PASSED
00820 *
00821 *                       Print CPU time if machine supports it
00822 *
00823                         IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
00824                            TMFLOPS = NOPS /
00825      $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
00826                         ELSE
00827                            TMFLOPS = 0.0D+0
00828                         END IF
00829 *
00830                         IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
00831                            TMFLOPS2 = NOPS2 /
00832      $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
00833                         ELSE
00834                            TMFLOPS2 = 0.0D+0
00835                         END IF
00836 *
00837                         IF( CTIME( 2 ).GE.0.0D+0 )
00838      $                     WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS,
00839      $                            N,
00840      $                            BWL, BWU,
00841      $                            NB, NRHS, NPROW, NPCOL,
00842      $                            CTIME( 1 ), CTIME( 2 ), TMFLOPS,
00843      $                            TMFLOPS2, PASSED
00844 *
00845                      END IF
00846    20          CONTINUE
00847 *
00848 *
00849    30       CONTINUE
00850 *           NNB loop
00851 *
00852    45      CONTINUE
00853 *          BW[] loop
00854 *
00855    40   CONTINUE
00856 *       NMAT loop
00857 *
00858         CALL BLACS_GRIDEXIT( ICTXT )
00859         CALL BLACS_GRIDEXIT( ICTXTB )
00860 *
00861    50   CONTINUE
00862 *       NGRIDS DROPOUT
00863    60 CONTINUE
00864 *     NGRIDS loop
00865 *
00866 *     Print ending messages and close output file
00867 *
00868       IF( IAM.EQ.0 ) THEN
00869          KTESTS = KPASS + KFAIL + KSKIP
00870          WRITE( NOUT, FMT = * )
00871          WRITE( NOUT, FMT = 9992 ) KTESTS
00872          IF( CHECK ) THEN
00873             WRITE( NOUT, FMT = 9991 ) KPASS
00874             WRITE( NOUT, FMT = 9989 ) KFAIL
00875          ELSE
00876             WRITE( NOUT, FMT = 9990 ) KPASS
00877          END IF
00878          WRITE( NOUT, FMT = 9988 ) KSKIP
00879          WRITE( NOUT, FMT = * )
00880          WRITE( NOUT, FMT = * )
00881          WRITE( NOUT, FMT = 9987 )
00882          IF( NOUT.NE.6 .AND. NOUT.NE.0 )
00883      $      CLOSE ( NOUT )
00884       END IF
00885 *
00886       CALL BLACS_EXIT( 0 )
00887 *
00888  9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
00889      $        '; It should be at least 1' )
00890  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
00891      $        I4 )
00892  9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
00893  9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
00894      $        I11 )
00895  9995 FORMAT( 'TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time ',
00896      $        'Slv Time   MFLOPS   MFLOP2  CHECK' )
00897  9994 FORMAT( '---- -- ------  --- ---  ---- ----- ---- ---- -------- ',
00898      $        '-------- -------- -------- ------' )
00899  9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
00900      $                                          1X,I4,1X,I4,1X,F9.3,
00901      $        F9.4,        F9.2,    F9.2, 1X, A6 )
00902  9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
00903  9991 FORMAT( I5, ' tests completed and passed residual checks.' )
00904  9990 FORMAT( I5, ' tests completed without checking.' )
00905  9989 FORMAT( I5, ' tests completed and failed residual checks.' )
00906  9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
00907  9987 FORMAT( 'END OF TESTS.' )
00908  9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
00909  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
00910 *
00911       STOP
00912 *
00913 *     End of PDDBTRS_DRIVER
00914 *
00915       END
00916 *