ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdgbdriver.f
Go to the documentation of this file.
00001       PROGRAM PDGBDRIVER
00002 *
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     November 15, 1997
00008 *
00009 *  Purpose
00010 *  =======
00011 *
00012 *  PDGBDRIVER is a test program for the
00013 *  ScaLAPACK Band Cholesky routines corresponding to the options
00014 *  indicated by DGB.  This test driver performs an
00015 *  A = L*U factorization
00016 *  and solves a linear system with the factors for 1 or more RHS.
00017 *
00018 *  The program must be driven by a short data file.
00019 *  Here's an example file:
00020 *'ScaLAPACK, Version 1.2, banded linear systems input file'
00021 *'PVM.'
00022 *''                              output file name (if any)
00023 *6                               device out
00024 *'L'                             define Lower or Upper
00025 *9                               number of problem sizes
00026 *1 5 17 28 37 121 200 1023 2048 3073     values of N
00027 *6                               number of bandwidths
00028 *1 2 4 10 31 64                values of BW
00029 *1                               number of NB's
00030 *-1 3 4 5                        values of NB (-1 for automatic choice)
00031 *1                               number of NRHS's (must be 1)
00032 *8                               values of NRHS
00033 *1                               number of NBRHS's (ignored)
00034 *1                               values of NBRHS (ignored)
00035 *6                               number of process grids
00036 *1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"
00037 *3.0                             threshold
00038 *
00039 *  Internal Parameters
00040 *  ===================
00041 *
00042 *  TOTMEM   INTEGER, default = 6200000.
00043 *           TOTMEM is a machine-specific parameter indicating the
00044 *           maximum amount of available memory in bytes.
00045 *           The user should customize TOTMEM to his platform.  Remember
00046 *           to leave room in memory for the operating system, the BLACS
00047 *           buffer, etc.  For example, on a system with 8 MB of memory
00048 *           per process (e.g., one processor on an Intel iPSC/860), the
00049 *           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
00050 *           code, BLACS buffer, etc).  However, for PVM, we usually set
00051 *           TOTMEM = 2000000.  Some experimenting with the maximum value
00052 *           of TOTMEM may be required.
00053 *
00054 *  INTMEM   INTEGER, default = 2048.
00055 *           INTMEM is the size of the integer workspace used in this
00056 *           driver as input as the IPIV vector. It represents an
00057 *           upper bound on NB, the blocksize of the data
00058 *           distribution.
00059 *
00060 *  INTGSZ   INTEGER, default = 4 bytes.
00061 *  DBLESZ   INTEGER, default = 8 bytes.
00062 *           INTGSZ and DBLESZ indicate the length in bytes on the
00063 *           given platform for an integer and a double precision real.
00064 *  MEM      DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
00065 *           All arrays used by ScaLAPACK routines are allocated from
00066 *           this array and referenced by pointers.  The integer IPB,
00067 *           for example, is a pointer to the starting element of MEM for
00068 *           the solution vector(s) B.
00069 *
00070 *  =====================================================================
00071 *
00072 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00073 *    Current address: Lawrence Livermore National Labs.
00074 *  This version released: August, 2001.
00075 *
00076 *  =====================================================================
00077 *
00078 *     .. Parameters ..
00079       INTEGER            TOTMEM
00080       PARAMETER          ( TOTMEM = 3000000 )
00081       INTEGER            INTMEM
00082       PARAMETER          ( INTMEM = 2048 )
00083       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00084      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00085       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00086      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00087      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00088 *
00089       DOUBLE PRECISION   ZERO
00090       INTEGER            DBLESZ, MEMSIZ, NTESTS
00091       DOUBLE PRECISION   PADVAL
00092       PARAMETER          ( DBLESZ = 8,
00093      $                     MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20,
00094      $                     PADVAL = -9923.0D+0, ZERO = 0.0D+0 )
00095       INTEGER            INT_ONE
00096       PARAMETER          ( INT_ONE = 1 )
00097 *     ..
00098 *     .. Local Scalars ..
00099       LOGICAL            CHECK
00100       CHARACTER          TRANS
00101       CHARACTER*6        PASSED
00102       CHARACTER*80       OUTFILE
00103       INTEGER            BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH,
00104      $                   I, IAM, IASEED, IBSEED, ICTXT, ICTXTB,
00105      $                   IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD,
00106      $                   IPREPAD, IPW, IPW_SIZE, IPW_SOLVE,
00107      $                   IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K,
00108      $                   KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE,
00109      $                   MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR,
00110      $                   NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL,
00111      $                   NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ
00112       REAL               THRESH
00113             DOUBLE PRECISION    ANORM, NOPS, NOPS2, SRESID, TMFLOPS,
00114      $                          TMFLOPS2
00115 *     ..
00116 *     .. Local Arrays ..
00117       INTEGER    IPIV(INTMEM)
00118       INTEGER            BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ),
00119      $                   DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ),
00120      $                   IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ),
00121      $                   NRVAL( NTESTS ), NVAL( NTESTS ),
00122      $                   PVAL( NTESTS ), QVAL( NTESTS )
00123       DOUBLE PRECISION   CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 )
00124 *     ..
00125 *     .. External Subroutines ..
00126       EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
00127      $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
00128      $                   BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN,
00129      $                   PDCHEKPAD, PDDBLASCHK, PDFILLPAD, PDGBINFO,
00130      $                   PDGBTRF, PDGBTRS, PDMATGEN, SLBOOT, SLCOMBINE,
00131      $                   SLTIMER
00132 *     ..
00133 *     .. External Functions ..
00134       INTEGER            NUMROC
00135       LOGICAL            LSAME
00136       DOUBLE PRECISION   PDLANGE
00137       EXTERNAL           LSAME, NUMROC, PDLANGE
00138 *     ..
00139 *     .. Intrinsic Functions ..
00140       INTRINSIC          DBLE, MAX, MIN, MOD
00141 *     ..
00142 *     .. Data Statements ..
00143       DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
00144 *     ..
00145 *
00146 *
00147 *
00148 *     .. Executable Statements ..
00149 *
00150 *     Get starting information
00151 *
00152       CALL BLACS_PINFO( IAM, NPROCS )
00153       IASEED = 100
00154       IBSEED = 200
00155 *
00156       CALL PDGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW,
00157      $               BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR,
00158      $               NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL,
00159      $               NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS )
00160 *
00161       CHECK = ( THRESH.GE.0.0D+0 )
00162 *
00163 *     Print headings
00164 *
00165       IF( IAM.EQ.0 ) THEN
00166          WRITE( NOUT, FMT = * )
00167          WRITE( NOUT, FMT = 9995 )
00168          WRITE( NOUT, FMT = 9994 )
00169          WRITE( NOUT, FMT = * )
00170       END IF
00171 *
00172 *     Loop over different process grids
00173 *
00174       DO 60 I = 1, NGRIDS
00175 *
00176          NPROW = PVAL( I )
00177          NPCOL = QVAL( I )
00178 *
00179 *        Make sure grid information is correct
00180 *
00181          IERR( 1 ) = 0
00182          IF( NPROW.LT.1 ) THEN
00183             IF( IAM.EQ.0 )
00184      $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW
00185             IERR( 1 ) = 1
00186          ELSE IF( NPCOL.LT.1 ) THEN
00187             IF( IAM.EQ.0 )
00188      $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL
00189             IERR( 1 ) = 1
00190          ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
00191             IF( IAM.EQ.0 )
00192      $         WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
00193             IERR( 1 ) = 1
00194          END IF
00195 *
00196          IF( IERR( 1 ).GT.0 ) THEN
00197             IF( IAM.EQ.0 )
00198      $         WRITE( NOUT, FMT = 9997 ) 'grid'
00199             KSKIP = KSKIP + 1
00200             GO TO 50
00201          END IF
00202 *
00203 *        Define process grid
00204 *
00205          CALL BLACS_GET( -1, 0, ICTXT )
00206          CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
00207 *
00208 *
00209 *        Define transpose process grid
00210 *
00211          CALL BLACS_GET( -1, 0, ICTXTB )
00212          CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW )
00213 *
00214 *        Go to bottom of process grid loop if this case doesn't use my
00215 *        process
00216 *
00217          CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00218 *
00219          IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN
00220             GO TO 50
00221          ENDIF
00222 *
00223          DO 40 J = 1, NMAT
00224 *
00225            IERR( 1 ) = 0
00226 *
00227            N = NVAL( J )
00228 *
00229 *          Make sure matrix information is correct
00230 *
00231            IF( N.LT.1 ) THEN
00232                IF( IAM.EQ.0 )
00233      $            WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N
00234                IERR( 1 ) = 1
00235            END IF
00236 *
00237 *          Check all processes for an error
00238 *
00239            CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00240      $                    -1, 0 )
00241 *
00242            IF( IERR( 1 ).GT.0 ) THEN
00243                IF( IAM.EQ.0 )
00244      $            WRITE( NOUT, FMT = 9997 ) 'size'
00245                KSKIP = KSKIP + 1
00246                GO TO 40
00247            END IF
00248 *
00249 *
00250            DO 45 BW_NUM = 1, NBW
00251 *
00252              IERR( 1 ) = 0
00253 *
00254              BWL = BWLVAL( BW_NUM )
00255              IF( BWL.LT.1 ) THEN
00256                IF( IAM.EQ.0 )
00257      $            WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL
00258                IERR( 1 ) = 1
00259              END IF
00260 *
00261              BWU = BWUVAL( BW_NUM )
00262              IF( BWU.LT.1 ) THEN
00263                IF( IAM.EQ.0 )
00264      $            WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU
00265                IERR( 1 ) = 1
00266              END IF
00267 *
00268              IF( BWL.GT.N-1 ) THEN
00269                IF( IAM.EQ.0 ) THEN
00270                  IERR( 1 ) = 1
00271                ENDIF
00272              END IF
00273 *
00274              IF( BWU.GT.N-1 ) THEN
00275                IF( IAM.EQ.0 ) THEN
00276                  IERR( 1 ) = 1
00277                ENDIF
00278              END IF
00279 *
00280 *            Check all processes for an error
00281 *
00282              CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00283      $                    -1, 0 )
00284 *
00285              IF( IERR( 1 ).GT.0 ) THEN
00286                KSKIP = KSKIP + 1
00287                GO TO 45
00288              END IF
00289 *
00290              DO 30 K = 1, NNB
00291 *
00292                IERR( 1 ) = 0
00293 *
00294                NB = NBVAL( K )
00295                IF( NB.LT.0 ) THEN
00296                   NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 )
00297      $               + (BWL+BWU)
00298                   NB = MAX( NB, 2*(BWL+BWU) )
00299                   NB = MIN( N, NB )
00300                END IF
00301 *
00302 *              Make sure NB is legal
00303 *
00304                IERR( 1 ) = 0
00305 *
00306                IF( NB.GT.INTMEM ) THEN
00307                   IERR( 1 ) = 1
00308                   IF( IAM.EQ.0 ) THEN
00309                      WRITE( NOUT,* )'You have chosen an '
00310      $            ,'NB > INTMEM in the driver.'
00311                      WRITE(NOUT, *)'Please edit the driver '
00312      $            ,'and increase the value of INTMEM'
00313                   ENDIF
00314                END IF
00315 *
00316 *              Check all processes for an error
00317 *
00318                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00319      $                       -1, 0 )
00320 *
00321                IF( IERR( 1 ).GT.0 ) THEN
00322                   KSKIP = KSKIP + 1
00323                   GO TO 30
00324                END IF
00325 *
00326 *              Padding constants
00327 *
00328                NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1),
00329      $                      MYROW, 0, NPROW )
00330                NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
00331 *
00332                IF( CHECK ) THEN
00333                   IPREPAD  = ((2*BWL+2*BWU+1)+10)
00334                   IMIDPAD  = 10
00335                   IPOSTPAD = ((2*BWL+2*BWU+1)+10)
00336                ELSE
00337                   IPREPAD  = 0
00338                   IMIDPAD  = 0
00339                   IPOSTPAD = 0
00340                END IF
00341 *
00342 *              Initialize the array descriptor for the matrix A
00343 *
00344                CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N,
00345      $                       (2*BWL+2*BWU+1), NB, 0, 0,
00346      $                       ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) )
00347 *
00348 *              Convert this to 1D descriptor
00349 *
00350                DESCA( 1 ) = 501
00351                DESCA( 3 ) = N
00352                DESCA( 4 ) = NB
00353                DESCA( 5 ) = 0
00354                DESCA( 2 ) = ICTXT
00355                DESCA( 6 ) = ((2*BWL+2*BWU+1)+10)
00356                DESCA( 7 ) = 0
00357 *
00358                IERR_TEMP = IERR( 1 )
00359                IERR( 1 ) = 0
00360                IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
00361 *
00362 *              Check all processes for an error
00363 *
00364                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
00365 *
00366                IF( IERR( 1 ).LT.0 ) THEN
00367                   IF( IAM.EQ.0 )
00368      $               WRITE( NOUT, FMT = 9997 ) 'descriptor'
00369                   KSKIP = KSKIP + 1
00370                   GO TO 30
00371                END IF
00372 *
00373 *              Assign pointers into MEM for SCALAPACK arrays, A is
00374 *              allocated starting at position MEM( IPREPAD+1 )
00375 *
00376                FREE_PTR = 1
00377                IPB = 0
00378 *
00379 *              Save room for prepadding
00380                FREE_PTR = FREE_PTR + IPREPAD
00381 *
00382                IPA = FREE_PTR
00383                FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
00384      $                     DESCA2D( NB_ )
00385      $                     + IPOSTPAD
00386 *
00387 *              Add memory for fillin
00388 *              Fillin space needs to store:
00389 *                Fillin spike:
00390 *                Contribution to previous proc's diagonal block of
00391 *                  reduced system:
00392 *                Off-diagonal block of reduced system:
00393 *                Diagonal block of reduced system:
00394 *
00395                FILLIN_SIZE =
00396      $            (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU)
00397 *
00398 *              Claim memory for fillin
00399 *
00400                FREE_PTR = FREE_PTR + IPREPAD
00401                IP_FILLIN = FREE_PTR
00402                FREE_PTR = FREE_PTR + FILLIN_SIZE
00403 *
00404 *              Workspace needed by computational routines:
00405 *
00406                IPW_SIZE = 0
00407 *
00408 *              factorization:
00409 *
00410                IPW_SIZE = 1
00411 *
00412 *              Claim memory for IPW
00413 *
00414                IPW = FREE_PTR
00415                FREE_PTR = FREE_PTR + IPW_SIZE
00416 *
00417 *              Check for adequate memory for problem size
00418 *
00419                IERR( 1 ) = 0
00420                IF( FREE_PTR.GT.MEMSIZ ) THEN
00421                   IF( IAM.EQ.0 )
00422      $               WRITE( NOUT, FMT = 9996 )
00423      $               'divide and conquer factorization',
00424      $               (FREE_PTR )*DBLESZ
00425                   IERR( 1 ) = 1
00426                END IF
00427 *
00428 *              Check all processes for an error
00429 *
00430                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
00431      $                       1, -1, 0 )
00432 *
00433                IF( IERR( 1 ).GT.0 ) THEN
00434                   IF( IAM.EQ.0 )
00435      $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00436                   KSKIP = KSKIP + 1
00437                   GO TO 30
00438                END IF
00439 *
00440 *              Worksize needed for LAPRNT
00441                WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB )
00442 *
00443                IF( CHECK ) THEN
00444 *
00445 *                 Calculate the amount of workspace required by
00446 *                 the checking routines.
00447 *
00448 *                 PDLANGE
00449                   WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
00450 *
00451 *                 PDDBLASCHK
00452                   WORKSIZ = MAX( WORKSIZ,
00453      $          MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB )
00454                END IF
00455 *
00456                FREE_PTR = FREE_PTR + IPREPAD
00457                IP_DRIVER_W = FREE_PTR
00458                FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
00459 *
00460 *
00461 *              Check for adequate memory for problem size
00462 *
00463                IERR( 1 ) = 0
00464                IF( FREE_PTR.GT.MEMSIZ ) THEN
00465                   IF( IAM.EQ.0 )
00466      $               WRITE( NOUT, FMT = 9996 ) 'factorization',
00467      $               ( FREE_PTR )*DBLESZ
00468                   IERR( 1 ) = 1
00469                END IF
00470 *
00471 *              Check all processes for an error
00472 *
00473                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
00474      $                       1, -1, 0 )
00475 *
00476                IF( IERR( 1 ).GT.0 ) THEN
00477                   IF( IAM.EQ.0 )
00478      $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00479                   KSKIP = KSKIP + 1
00480                   GO TO 30
00481                END IF
00482 *
00483                CALL PDBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N,
00484      $                         (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ),
00485      $                         ((2*BWL+2*BWU+1)+10), 0, 0, IASEED,
00486      $                         MYROW, MYCOL, NPROW, NPCOL )
00487 *
00488                CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
00489      $                          ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD,
00490      $                          PADVAL )
00491 *
00492                CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
00493      $                          MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
00494      $                          IPREPAD, IPOSTPAD, PADVAL )
00495 *
00496 *              Calculate norm of A for residual error-checking
00497 *
00498                IF( CHECK ) THEN
00499 *
00500                   ANORM = PDLANGE( '1', (2*BWL+2*BWU+1),
00501      $                            N, MEM( IPA ), 1, 1,
00502      $                            DESCA2D, MEM( IP_DRIVER_W ) )
00503                   CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ,
00504      $                         MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10),
00505      $                         IPREPAD, IPOSTPAD, PADVAL )
00506                   CALL PDCHEKPAD( ICTXT, 'PDLANGE',
00507      $                            WORKSIZ, 1,
00508      $                            MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
00509      $                            IPREPAD, IPOSTPAD, PADVAL )
00510                END IF
00511 *
00512 *
00513                CALL SLBOOT()
00514                CALL BLACS_BARRIER( ICTXT, 'All' )
00515 *
00516 *              Perform factorization
00517 *
00518                CALL SLTIMER( 1 )
00519 *
00520                CALL PDGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV,
00521      $                       MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
00522      $                       IPW_SIZE, INFO )
00523 *
00524                CALL SLTIMER( 1 )
00525 *
00526                IF( INFO.NE.0 ) THEN
00527                   IF( IAM.EQ.0 ) THEN
00528                     WRITE( NOUT, FMT = * ) 'PDGBTRF INFO=', INFO
00529                   ENDIF
00530                   KFAIL = KFAIL + 1
00531                   GO TO 30
00532                END IF
00533 *
00534                IF( CHECK ) THEN
00535 *
00536 *                 Check for memory overwrite in factorization
00537 *
00538                   CALL PDCHEKPAD( ICTXT, 'PDGBTRF', NP,
00539      $                    NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10),
00540      $                    IPREPAD, IPOSTPAD, PADVAL )
00541                END IF
00542 *
00543 *
00544 *              Loop over the different values for NRHS
00545 *
00546                DO 20 HH = 1, NNR
00547 *
00548                   IERR( 1 ) = 0
00549 *
00550                   NRHS = NRVAL( HH )
00551 *
00552 *                    Initialize Array Descriptor for rhs
00553 *
00554                      CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
00555      $                             ICTXTB, NB+10, IERR( 1 ) )
00556 *
00557 *                    Convert this to 1D descriptor
00558 *
00559                      DESCB( 1 ) = 502
00560                      DESCB( 3 ) = N
00561                      DESCB( 4 ) = NB
00562                      DESCB( 5 ) = 0
00563                      DESCB( 2 ) = ICTXT
00564                      DESCB( 6 ) = DESCB2D( LLD_ )
00565                      DESCB( 7 ) = 0
00566 *
00567 *                    reset free_ptr to reuse space for right hand sides
00568 *
00569                      IF( IPB .GT. 0 ) THEN
00570                        FREE_PTR = IPB
00571                      ENDIF
00572 *
00573                      FREE_PTR = FREE_PTR + IPREPAD
00574                      IPB = FREE_PTR
00575                      FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
00576      $                          + IPOSTPAD
00577 *
00578 *                    Allocate workspace for workspace in TRS routine:
00579 *
00580                      IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU)
00581 *
00582                      IPW_SOLVE = FREE_PTR
00583                      FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
00584 *
00585                      IERR( 1 ) = 0
00586                      IF( FREE_PTR.GT.MEMSIZ ) THEN
00587                         IF( IAM.EQ.0 )
00588      $                     WRITE( NOUT, FMT = 9996 )'solve',
00589      $                            ( FREE_PTR )*DBLESZ
00590                         IERR( 1 ) = 1
00591                      END IF
00592 *
00593 *                    Check all processes for an error
00594 *
00595                      CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1,
00596      $                             IERR, 1, -1, 0 )
00597 *
00598                      IF( IERR( 1 ).GT.0 ) THEN
00599                         IF( IAM.EQ.0 )
00600      $                     WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00601                         KSKIP = KSKIP + 1
00602                         GO TO 15
00603                      END IF
00604 *
00605                      MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
00606 *
00607 *                    Generate RHS
00608 *
00609                      CALL PDMATGEN(ICTXTB, 'No', 'No',
00610      $                        DESCB2D( M_ ), DESCB2D( N_ ),
00611      $                        DESCB2D( MB_ ), DESCB2D( NB_ ),
00612      $                        MEM( IPB ),
00613      $                        DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
00614      $                        DESCB2D( CSRC_ ),
00615      $                        IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
00616      $                        MYROW, NPCOL, NPROW )
00617 *
00618                      IF( CHECK ) THEN
00619                         CALL PDFILLPAD( ICTXTB, NB, NRHS,
00620      $                                  MEM( IPB-IPREPAD ),
00621      $                                  DESCB2D( LLD_ ),
00622      $                                  IPREPAD, IPOSTPAD,
00623      $                                  PADVAL )
00624                         CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
00625      $                                  MEM( IP_DRIVER_W-IPREPAD ),
00626      $                                  WORKSIZ, IPREPAD,
00627      $                                  IPOSTPAD, PADVAL )
00628                      END IF
00629 *
00630 *
00631                      CALL BLACS_BARRIER( ICTXT, 'All')
00632                      CALL SLTIMER( 2 )
00633 *
00634 *                    Solve linear system via factorization
00635 *
00636                      CALL PDGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ),
00637      $                             1, DESCA, IPIV, MEM( IPB ), 1, DESCB,
00638      $                             MEM( IP_FILLIN ), FILLIN_SIZE,
00639      $                             MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
00640      $                             INFO )
00641 *
00642                      CALL SLTIMER( 2 )
00643 *
00644                      IF( INFO.NE.0 ) THEN
00645                        IF( IAM.EQ.0 )
00646      $  WRITE( NOUT, FMT = * ) 'PDGBTRS INFO=', INFO
00647                        KFAIL = KFAIL + 1
00648                        PASSED = 'FAILED'
00649                        GO TO 20
00650                      END IF
00651 *
00652                      IF( CHECK ) THEN
00653 *
00654 *                       check for memory overwrite
00655 *
00656                         CALL PDCHEKPAD( ICTXT, 'PDGBTRS-work',
00657      $                                  WORKSIZ, 1,
00658      $                                  MEM( IP_DRIVER_W-IPREPAD ),
00659      $                                  WORKSIZ, IPREPAD,
00660      $                                  IPOSTPAD, PADVAL )
00661 *
00662 *                       check the solution to rhs
00663 *
00664                         SRESID = ZERO
00665 *
00666                         CALL PDDBLASCHK( 'N', 'N', TRANS,
00667      $                       N, BWL, BWU, NRHS,
00668      $                       MEM( IPB ), 1, 1, DESCB2D,
00669      $                       IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D,
00670      $                       IBSEED, ANORM, SRESID,
00671      $                       MEM( IP_DRIVER_W ), WORKSIZ )
00672 *
00673                         IF( IAM.EQ.0 ) THEN
00674                            IF( SRESID.GT.THRESH )
00675      $                        WRITE( NOUT, FMT = 9985 ) SRESID
00676                         END IF
00677 *
00678 *                       The second test is a NaN trap
00679 *
00680                         IF( ( SRESID.LE.THRESH          ).AND.
00681      $                      ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN
00682                            KPASS = KPASS + 1
00683                            PASSED = 'PASSED'
00684                         ELSE
00685                            KFAIL = KFAIL + 1
00686                            PASSED = 'FAILED'
00687                         END IF
00688 *
00689                      END IF
00690 *
00691    15                CONTINUE
00692 *                    Skipped tests jump to here to print out "SKIPPED"
00693 *
00694 *                    Gather maximum of all CPU and WALL clock timings
00695 *
00696                      CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1,
00697      $                               WTIME )
00698                      CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1,
00699      $                               CTIME )
00700 *
00701 *                    Print results
00702 *
00703                      IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
00704 *
00705                         NOPS = 0
00706                         NOPS2 = 0
00707 *
00708                         N_FIRST = NB
00709                         NPROCS_REAL = ( N-1 )/NB + 1
00710                         N_LAST = MOD( N-1, NB ) + 1
00711 *
00712 *                       2 N bwl (bwl+bwu) + N (bwl) flops
00713 *                          for LU factorization
00714 *
00715                         NOPS = 2*(DBLE(N)*DBLE(BWL)*
00716      $                         DBLE((BWL+BWU))) +
00717      $                         (DBLE(N)*DBLE(BWL))
00718 *
00719 *                       nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
00720 *
00721                         NOPS = NOPS +
00722      $                  2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU)))
00723      $                   *DBLE(NRHS))
00724 *
00725 *                       Second calc to represent actual hardware speed
00726 *
00727 *                     2*N_FIRST (bwl+bwu)*bwu Flops for LU
00728 *                       factorization in proc 1
00729 *
00730                       NOPS2 = 2*( (DBLE(N_FIRST)*
00731      $                  DBLE((BWL+BWU))*DBLE(BWU)))
00732 *
00733                       IF ( NPROCS_REAL .GT. 1) THEN
00734 *                       8 N_LAST (bwl+bwu)*(bwl+bwu)
00735 *                         flops for LU and spike
00736 *                         calc in last processor
00737 *
00738                         NOPS2 = NOPS2 +
00739      $                          8*( (DBLE(N_LAST)*DBLE((BWL+BWU))
00740      $                          *DBLE((BWL+BWU))) )
00741                       ENDIF
00742 *
00743                       IF ( NPROCS_REAL .GT. 2) THEN
00744 *                       8 NB (bwl+bwu)*(bwl+bwu)  flops for LU and spike
00745 *                         calc in other processors
00746 *
00747                         NOPS2 = NOPS2 + (NPROCS_REAL-2)*
00748      $                          8*( (DBLE(NB)*DBLE((BWL+BWU))
00749      $                          *DBLE((BWL+BWU))) )
00750                       ENDIF
00751 *
00752 *                     Reduced system
00753 *
00754                       NOPS2 = NOPS2 +
00755      $                  2*( NPROCS_REAL-1 ) *
00756      $                  ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 )
00757                       IF( NPROCS_REAL .GT. 1 ) THEN
00758                         NOPS2 = NOPS2 +
00759      $                    2*( NPROCS_REAL-2 ) *
00760      $                    (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU))
00761                       ENDIF
00762 *
00763 *                     Solve stage
00764 *
00765 *                     nrhs*2 n_first*
00766 *                        (bwl+(bwl+bwu))
00767 *                        flops for L,U solve in proc 1.
00768 *
00769                       NOPS2 = NOPS2 +
00770      $                  2*
00771      $                  DBLE(N_FIRST)*
00772      $                  DBLE(NRHS) *
00773      $                  ( DBLE(BWL)+DBLE((BWL+BWU)))
00774 *
00775                       IF ( NPROCS_REAL .GT. 1 ) THEN
00776 *
00777 *                       2*nrhs*2 n_last
00778 *                        ((bwl+bwu)+(bwl+bwu))
00779 *                       flops for LU solve in other procs
00780 *
00781                         NOPS2 = NOPS2 +
00782      $                    4*
00783      $                    (DBLE(N_LAST)*(DBLE((BWL+BWU))+
00784      $                    DBLE((BWL+BWU))))*DBLE(NRHS)
00785                       ENDIF
00786 *
00787                       IF ( NPROCS_REAL .GT. 2 ) THEN
00788 *
00789 *                       2*nrhs*2 NB
00790 *                        ((bwl+bwu)+(bwl+bwu))
00791 *                        flops for LU solve in other procs
00792 *
00793                         NOPS2 = NOPS2 +
00794      $                    ( NPROCS_REAL-2)*2*
00795      $                 ( (DBLE(NB)*(DBLE((BWL+BWU))+
00796      $                 DBLE((BWL+BWU))))*DBLE(NRHS) )
00797                       ENDIF
00798 *
00799 *                     Reduced system
00800 *
00801                       NOPS2 = NOPS2 +
00802      $                  NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) )
00803                       IF( NPROCS_REAL .GT. 1 ) THEN
00804                         NOPS2 = NOPS2 +
00805      $                   NRHS*( NPROCS_REAL-2 ) *
00806      $                   ( 6 * (BWL+BWU)*(BWL+BWU) )
00807                       ENDIF
00808 *
00809 *
00810 *                       Calculate total megaflops - factorization and/or
00811 *                       solve -- for WALL and CPU time, and print output
00812 *
00813 *                       Print WALL time if machine supports it
00814 *
00815                         IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN
00816                            TMFLOPS = NOPS /
00817      $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
00818                         ELSE
00819                            TMFLOPS = 0.0D+0
00820                         END IF
00821 *
00822                         IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN
00823                            TMFLOPS2 = NOPS2 /
00824      $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
00825                         ELSE
00826                            TMFLOPS2 = 0.0D+0
00827                         END IF
00828 *
00829                         IF( WTIME( 2 ).GE.0.0D+0 )
00830      $                     WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS,
00831      $                            N,
00832      $                            BWL, BWU,
00833      $                            NB, NRHS, NPROW, NPCOL,
00834      $                            WTIME( 1 ), WTIME( 2 ), TMFLOPS,
00835      $                            TMFLOPS2, PASSED
00836 *
00837 *                       Print CPU time if machine supports it
00838 *
00839                         IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
00840                            TMFLOPS = NOPS /
00841      $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
00842                         ELSE
00843                            TMFLOPS = 0.0D+0
00844                         END IF
00845 *
00846                         IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
00847                            TMFLOPS2 = NOPS2 /
00848      $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
00849                         ELSE
00850                            TMFLOPS2 = 0.0D+0
00851                         END IF
00852 *
00853                         IF( CTIME( 2 ).GE.0.0D+0 )
00854      $                     WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS,
00855      $                            N,
00856      $                            BWL, BWU,
00857      $                            NB, NRHS, NPROW, NPCOL,
00858      $                            CTIME( 1 ), CTIME( 2 ), TMFLOPS,
00859      $                            TMFLOPS2, PASSED
00860 *
00861                      END IF
00862    20          CONTINUE
00863 *
00864 *
00865    30       CONTINUE
00866 *           NNB loop
00867 *
00868    45      CONTINUE
00869 *          BW[] loop
00870 *
00871    40   CONTINUE
00872 *       NMAT loop
00873 *
00874         CALL BLACS_GRIDEXIT( ICTXT )
00875         CALL BLACS_GRIDEXIT( ICTXTB )
00876 *
00877    50   CONTINUE
00878 *       NGRIDS DROPOUT
00879    60 CONTINUE
00880 *     NGRIDS loop
00881 *
00882 *     Print ending messages and close output file
00883 *
00884       IF( IAM.EQ.0 ) THEN
00885          KTESTS = KPASS + KFAIL + KSKIP
00886          WRITE( NOUT, FMT = * )
00887          WRITE( NOUT, FMT = 9992 ) KTESTS
00888          IF( CHECK ) THEN
00889             WRITE( NOUT, FMT = 9991 ) KPASS
00890             WRITE( NOUT, FMT = 9989 ) KFAIL
00891          ELSE
00892             WRITE( NOUT, FMT = 9990 ) KPASS
00893          END IF
00894          WRITE( NOUT, FMT = 9988 ) KSKIP
00895          WRITE( NOUT, FMT = * )
00896          WRITE( NOUT, FMT = * )
00897          WRITE( NOUT, FMT = 9987 )
00898          IF( NOUT.NE.6 .AND. NOUT.NE.0 )
00899      $      CLOSE ( NOUT )
00900       END IF
00901 *
00902       CALL BLACS_EXIT( 0 )
00903 *
00904  9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
00905      $        '; It should be at least 1' )
00906  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
00907      $        I4 )
00908  9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
00909  9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
00910      $        I11 )
00911  9995 FORMAT( 'TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time ',
00912      $        'Slv Time   MFLOPS   MFLOP2  CHECK' )
00913  9994 FORMAT( '---- -- ------  --- ---  ---- ----- ---- ---- -------- ',
00914      $        '-------- -------- -------- ------' )
00915  9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
00916      $                                          1X,I4,1X,I4,1X,F9.3,
00917      $        F9.4,        F9.2,    F9.2, 1X, A6 )
00918  9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
00919  9991 FORMAT( I5, ' tests completed and passed residual checks.' )
00920  9990 FORMAT( I5, ' tests completed without checking.' )
00921  9989 FORMAT( I5, ' tests completed and failed residual checks.' )
00922  9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
00923  9987 FORMAT( 'END OF TESTS.' )
00924  9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
00925  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
00926 *
00927       STOP
00928 *
00929 *     End of PDGBTRS_DRIVER
00930 *
00931       END
00932 *