ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pcdtdriver.f
Go to the documentation of this file.
00001       PROGRAM PCDTDRIVER
00002 *
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     November 15, 1997
00008 *
00009 *  Purpose
00010 *  =======
00011 *
00012 *  PCDTDRIVER is a test program for the
00013 *  ScaLAPACK Band Cholesky routines corresponding to the options
00014 *  indicated by CDT.  This test driver performs an
00015 *  A = L*U factorization
00016 *  and solves a linear system with the factors for 1 or more RHS.
00017 *
00018 *  The program must be driven by a short data file.
00019 *  Here's an example file:
00020 *'ScaLAPACK, Version 1.2, banded linear systems input file'
00021 *'PVM.'
00022 *''                              output file name (if any)
00023 *6                               device out
00024 *'L'                             define Lower or Upper
00025 *9                               number of problem sizes
00026 *1 5 17 28 37 121 200 1023 2048 3073     values of N
00027 *6                               number of bandwidths
00028 *1 2 4 10 31 64                values of BW
00029 *1                               number of NB's
00030 *-1 3 4 5                        values of NB (-1 for automatic choice)
00031 *1                               number of NRHS's (must be 1)
00032 *8                               values of NRHS
00033 *1                               number of NBRHS's (ignored)
00034 *1                               values of NBRHS (ignored)
00035 *6                               number of process grids
00036 *1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"
00037 *3.0                             threshold
00038 *
00039 *  Internal Parameters
00040 *  ===================
00041 *
00042 *  TOTMEM   INTEGER, default = 6200000.
00043 *           TOTMEM is a machine-specific parameter indicating the
00044 *           maximum amount of available memory in bytes.
00045 *           The user should customize TOTMEM to his platform.  Remember
00046 *           to leave room in memory for the operating system, the BLACS
00047 *           buffer, etc.  For example, on a system with 8 MB of memory
00048 *           per process (e.g., one processor on an Intel iPSC/860), the
00049 *           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
00050 *           code, BLACS buffer, etc).  However, for PVM, we usually set
00051 *           TOTMEM = 2000000.  Some experimenting with the maximum value
00052 *           of TOTMEM may be required.
00053 *
00054 *  INTGSZ   INTEGER, default = 4 bytes.
00055 *  CPLXSZ   INTEGER, default = 8 bytes.
00056 *           INTGSZ and CPLXSZ indicate the length in bytes on the
00057 *           given platform for an integer and a single precision
00058 *           complex.
00059 *  MEM      DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
00060 *           All arrays used by ScaLAPACK routines are allocated from
00061 *           this array and referenced by pointers.  The integer IPB,
00062 *           for example, is a pointer to the starting element of MEM for
00063 *           the solution vector(s) B.
00064 *
00065 *  =====================================================================
00066 *
00067 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00068 *    Current address: Lawrence Livermore National Labs.
00069 *  This version released: August, 2001.
00070 *
00071 *  =====================================================================
00072 *
00073 *     .. Parameters ..
00074       INTEGER            TOTMEM
00075       PARAMETER          ( TOTMEM = 3000000 )
00076       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00077      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00078       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00079      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00080      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00081 *
00082       REAL               ZERO
00083       INTEGER            CPLXSZ, MEMSIZ, NTESTS
00084       COMPLEX            PADVAL
00085       PARAMETER          ( CPLXSZ = 8,
00086      $                     MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20,
00087      $                     PADVAL = ( -9923.0E+0, -9923.0E+0 ),
00088      $                     ZERO = 0.0E+0 )
00089       INTEGER            INT_ONE
00090       PARAMETER          ( INT_ONE = 1 )
00091 *     ..
00092 *     .. Local Scalars ..
00093       LOGICAL            CHECK
00094       CHARACTER          TRANS
00095       CHARACTER*6        PASSED
00096       CHARACTER*80       OUTFILE
00097       INTEGER            BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH,
00098      $                   I, IAM, IASEED, IBSEED, ICTXT, ICTXTB,
00099      $                   IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD,
00100      $                   IPREPAD, IPW, IPW_SIZE, IPW_SOLVE,
00101      $                   IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K,
00102      $                   KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE,
00103      $                   MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR,
00104      $                   NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL,
00105      $                   NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ
00106       REAL               ANORM, SRESID, THRESH
00107       DOUBLE PRECISION   NOPS, NOPS2, TMFLOPS, TMFLOPS2
00108 *     ..
00109 *     .. Local Arrays ..
00110       INTEGER            BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ),
00111      $                   DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ),
00112      $                   IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ),
00113      $                   NRVAL( NTESTS ), NVAL( NTESTS ),
00114      $                   PVAL( NTESTS ), QVAL( NTESTS )
00115       DOUBLE PRECISION   CTIME( 2 ), WTIME( 2 )
00116       COMPLEX            MEM( MEMSIZ )
00117 *     ..
00118 *     .. External Subroutines ..
00119       EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
00120      $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
00121      $                   BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN,
00122      $                   PCCHEKPAD, PCDTINFO, PCDTLASCHK, PCDTTRF,
00123      $                   PCDTTRS, PCFILLPAD, PCMATGEN, SLBOOT,
00124      $                   SLCOMBINE, SLTIMER
00125 *     ..
00126 *     .. External Functions ..
00127       INTEGER            NUMROC
00128       LOGICAL            LSAME
00129       REAL               PCLANGE
00130       EXTERNAL           LSAME, NUMROC, PCLANGE
00131 *     ..
00132 *     .. Intrinsic Functions ..
00133       INTRINSIC          DBLE, MAX, MIN, MOD
00134 *     ..
00135 *     .. Data Statements ..
00136       DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
00137 *     ..
00138 *
00139 *
00140 *
00141 *     .. Executable Statements ..
00142 *
00143 *     Get starting information
00144 *
00145       CALL BLACS_PINFO( IAM, NPROCS )
00146       IASEED = 100
00147       IBSEED = 200
00148 *
00149       CALL PCDTINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW,
00150      $               BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR,
00151      $               NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL,
00152      $               NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS )
00153 *
00154       CHECK = ( THRESH.GE.0.0E+0 )
00155 *
00156 *     Print headings
00157 *
00158       IF( IAM.EQ.0 ) THEN
00159          WRITE( NOUT, FMT = * )
00160          WRITE( NOUT, FMT = 9995 )
00161          WRITE( NOUT, FMT = 9994 )
00162          WRITE( NOUT, FMT = * )
00163       END IF
00164 *
00165 *     Loop over different process grids
00166 *
00167       DO 60 I = 1, NGRIDS
00168 *
00169          NPROW = PVAL( I )
00170          NPCOL = QVAL( I )
00171 *
00172 *        Make sure grid information is correct
00173 *
00174          IERR( 1 ) = 0
00175          IF( NPROW.LT.1 ) THEN
00176             IF( IAM.EQ.0 )
00177      $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW
00178             IERR( 1 ) = 1
00179          ELSE IF( NPCOL.LT.1 ) THEN
00180             IF( IAM.EQ.0 )
00181      $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL
00182             IERR( 1 ) = 1
00183          ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
00184             IF( IAM.EQ.0 )
00185      $         WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
00186             IERR( 1 ) = 1
00187          END IF
00188 *
00189          IF( IERR( 1 ).GT.0 ) THEN
00190             IF( IAM.EQ.0 )
00191      $         WRITE( NOUT, FMT = 9997 ) 'grid'
00192             KSKIP = KSKIP + 1
00193             GO TO 50
00194          END IF
00195 *
00196 *        Define process grid
00197 *
00198          CALL BLACS_GET( -1, 0, ICTXT )
00199          CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
00200 *
00201 *
00202 *        Define transpose process grid
00203 *
00204          CALL BLACS_GET( -1, 0, ICTXTB )
00205          CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW )
00206 *
00207 *        Go to bottom of process grid loop if this case doesn't use my
00208 *        process
00209 *
00210          CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00211 *
00212          IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN
00213             GO TO 50
00214          ENDIF
00215 *
00216          DO 40 J = 1, NMAT
00217 *
00218            IERR( 1 ) = 0
00219 *
00220            N = NVAL( J )
00221 *
00222 *          Make sure matrix information is correct
00223 *
00224            IF( N.LT.1 ) THEN
00225                IF( IAM.EQ.0 )
00226      $            WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N
00227                IERR( 1 ) = 1
00228            END IF
00229 *
00230 *          Check all processes for an error
00231 *
00232            CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00233      $                    -1, 0 )
00234 *
00235            IF( IERR( 1 ).GT.0 ) THEN
00236                IF( IAM.EQ.0 )
00237      $            WRITE( NOUT, FMT = 9997 ) 'size'
00238                KSKIP = KSKIP + 1
00239                GO TO 40
00240            END IF
00241 *
00242 *
00243            DO 45 BW_NUM = 1, NBW
00244 *
00245              IERR( 1 ) = 0
00246 *
00247              BWL = 1
00248              IF( BWL.LT.1 ) THEN
00249                IF( IAM.EQ.0 )
00250      $            WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL
00251                IERR( 1 ) = 1
00252              END IF
00253 *
00254              BWU = 1
00255              IF( BWU.LT.1 ) THEN
00256                IF( IAM.EQ.0 )
00257      $            WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU
00258                IERR( 1 ) = 1
00259              END IF
00260 *
00261              IF( BWL.GT.N-1 ) THEN
00262                IF( IAM.EQ.0 ) THEN
00263                  IERR( 1 ) = 1
00264                ENDIF
00265              END IF
00266 *
00267              IF( BWU.GT.N-1 ) THEN
00268                IF( IAM.EQ.0 ) THEN
00269                  IERR( 1 ) = 1
00270                ENDIF
00271              END IF
00272 *
00273 *            Check all processes for an error
00274 *
00275              CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00276      $                    -1, 0 )
00277 *
00278              IF( IERR( 1 ).GT.0 ) THEN
00279                KSKIP = KSKIP + 1
00280                GO TO 45
00281              END IF
00282 *
00283              DO 30 K = 1, NNB
00284 *
00285                IERR( 1 ) = 0
00286 *
00287                NB = NBVAL( K )
00288                IF( NB.LT.0 ) THEN
00289                   NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 )
00290      $               + INT_ONE
00291                   NB = MAX( NB, 2*INT_ONE )
00292                   NB = MIN( N, NB )
00293                END IF
00294 *
00295 *              Make sure NB is legal
00296 *
00297                IERR( 1 ) = 0
00298                IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN
00299                   IERR( 1 ) = 1
00300                END IF
00301 *
00302 *              Check all processes for an error
00303 *
00304                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
00305      $                       -1, 0 )
00306 *
00307                IF( IERR( 1 ).GT.0 ) THEN
00308                   KSKIP = KSKIP + 1
00309                   GO TO 30
00310                END IF
00311 *
00312 *              Padding constants
00313 *
00314                NP = NUMROC( (3), (3),
00315      $                      MYROW, 0, NPROW )
00316                NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
00317 *
00318                IF( CHECK ) THEN
00319                   IPREPAD  = ((3)+10)
00320                   IMIDPAD  = 10
00321                   IPOSTPAD = ((3)+10)
00322                ELSE
00323                   IPREPAD  = 0
00324                   IMIDPAD  = 0
00325                   IPOSTPAD = 0
00326                END IF
00327 *
00328 *              Initialize the array descriptor for the matrix A
00329 *
00330                CALL DESCINIT( DESCA2D, N, (3),
00331      $                       NB, 1, 0, 0,
00332      $                       ICTXTB, NB+10, IERR( 1 ) )
00333 *
00334 *              Convert this to 1D descriptor
00335 *
00336                DESCA( 1 ) = 501
00337                DESCA( 3 ) = N
00338                DESCA( 4 ) = NB
00339                DESCA( 5 ) = 0
00340                DESCA( 2 ) = ICTXT
00341                DESCA( 6 ) = ((3)+10)
00342                DESCA( 7 ) = 0
00343 *
00344                IERR_TEMP = IERR( 1 )
00345                IERR( 1 ) = 0
00346                IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
00347 *
00348 *              Check all processes for an error
00349 *
00350                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
00351 *
00352                IF( IERR( 1 ).LT.0 ) THEN
00353                   IF( IAM.EQ.0 )
00354      $               WRITE( NOUT, FMT = 9997 ) 'descriptor'
00355                   KSKIP = KSKIP + 1
00356                   GO TO 30
00357                END IF
00358 *
00359 *              Assign pointers into MEM for SCALAPACK arrays, A is
00360 *              allocated starting at position MEM( IPREPAD+1 )
00361 *
00362                FREE_PTR = 1
00363                IPB = 0
00364 *
00365 *              Save room for prepadding
00366                FREE_PTR = FREE_PTR + IPREPAD
00367 *
00368                IPA = FREE_PTR
00369                FREE_PTR = FREE_PTR + (NB+10)*(3)
00370      $                     + IPOSTPAD
00371 *
00372 *              Add memory for fillin
00373 *              Fillin space needs to store:
00374 *                Fillin spike:
00375 *                Contribution to previous proc's diagonal block of
00376 *                  reduced system:
00377 *                Off-diagonal block of reduced system:
00378 *                Diagonal block of reduced system:
00379 *
00380                FILLIN_SIZE =
00381      $            (12*NPCOL+3*NB)
00382 *
00383 *              Claim memory for fillin
00384 *
00385                FREE_PTR = FREE_PTR + IPREPAD
00386                IP_FILLIN = FREE_PTR
00387                FREE_PTR = FREE_PTR + FILLIN_SIZE
00388 *
00389 *              Workspace needed by computational routines:
00390 *
00391                IPW_SIZE = 0
00392 *
00393 *              factorization:
00394 *
00395                IPW_SIZE = 8*NPCOL
00396 *
00397 *              Claim memory for IPW
00398 *
00399                IPW = FREE_PTR
00400                FREE_PTR = FREE_PTR + IPW_SIZE
00401 *
00402 *              Check for adequate memory for problem size
00403 *
00404                IERR( 1 ) = 0
00405                IF( FREE_PTR.GT.MEMSIZ ) THEN
00406                   IF( IAM.EQ.0 )
00407      $               WRITE( NOUT, FMT = 9996 )
00408      $               'divide and conquer factorization',
00409      $               (FREE_PTR )*CPLXSZ
00410                   IERR( 1 ) = 1
00411                END IF
00412 *
00413 *              Check all processes for an error
00414 *
00415                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
00416      $                       1, -1, 0 )
00417 *
00418                IF( IERR( 1 ).GT.0 ) THEN
00419                   IF( IAM.EQ.0 )
00420      $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00421                   KSKIP = KSKIP + 1
00422                   GO TO 30
00423                END IF
00424 *
00425 *              Worksize needed for LAPRNT
00426                WORKSIZ = MAX( ((3)+10), NB )
00427 *
00428                IF( CHECK ) THEN
00429 *
00430 *                 Calculate the amount of workspace required by
00431 *                 the checking routines.
00432 *
00433 *                 PCLANGE
00434                   WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
00435 *
00436 *                 PCDTLASCHK
00437                   WORKSIZ = MAX( WORKSIZ,
00438      $          MAX(5,NB)+2*NB )
00439                END IF
00440 *
00441                FREE_PTR = FREE_PTR + IPREPAD
00442                IP_DRIVER_W = FREE_PTR
00443                FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
00444 *
00445 *
00446 *              Check for adequate memory for problem size
00447 *
00448                IERR( 1 ) = 0
00449                IF( FREE_PTR.GT.MEMSIZ ) THEN
00450                   IF( IAM.EQ.0 )
00451      $               WRITE( NOUT, FMT = 9996 ) 'factorization',
00452      $               ( FREE_PTR )*CPLXSZ
00453                   IERR( 1 ) = 1
00454                END IF
00455 *
00456 *              Check all processes for an error
00457 *
00458                CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
00459      $                       1, -1, 0 )
00460 *
00461                IF( IERR( 1 ).GT.0 ) THEN
00462                   IF( IAM.EQ.0 )
00463      $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00464                   KSKIP = KSKIP + 1
00465                   GO TO 30
00466                END IF
00467 *
00468                CALL PCBMATGEN( ICTXT, 'T', 'D', BWL, BWU, N, (3), NB,
00469      $                         MEM( IPA ), NB+10, 0, 0, IASEED, MYROW,
00470      $                         MYCOL, NPROW, NPCOL )
00471                CALL PCFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ),
00472      $                          NB+10, IPREPAD, IPOSTPAD,
00473      $                          PADVAL )
00474 *
00475                CALL PCFILLPAD( ICTXT, WORKSIZ, 1,
00476      $                          MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
00477      $                          IPREPAD, IPOSTPAD, PADVAL )
00478 *
00479 *              Calculate norm of A for residual error-checking
00480 *
00481                IF( CHECK ) THEN
00482 *
00483                   ANORM = PCLANGE( 'I', N,
00484      $                            (3), MEM( IPA ), 1, 1,
00485      $                            DESCA2D, MEM( IP_DRIVER_W ) )
00486                   CALL PCCHEKPAD( ICTXT, 'PCLANGE', NQ, NP,
00487      $                         MEM( IPA-IPREPAD ), NB+10,
00488      $                         IPREPAD, IPOSTPAD, PADVAL )
00489                   CALL PCCHEKPAD( ICTXT, 'PCLANGE',
00490      $                            WORKSIZ, 1,
00491      $                            MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
00492      $                            IPREPAD, IPOSTPAD, PADVAL )
00493                END IF
00494 *
00495 *
00496                CALL SLBOOT()
00497                CALL BLACS_BARRIER( ICTXT, 'All' )
00498 *
00499 *              Perform factorization
00500 *
00501                CALL SLTIMER( 1 )
00502 *
00503                CALL PCDTTRF( N, MEM( IPA+2*( NB+10 ) ),
00504      $                       MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 1,
00505      $                       DESCA, MEM( IP_FILLIN ), FILLIN_SIZE,
00506      $                       MEM( IPW ), IPW_SIZE, INFO )
00507 *
00508                CALL SLTIMER( 1 )
00509 *
00510                IF( INFO.NE.0 ) THEN
00511                   IF( IAM.EQ.0 ) THEN
00512                     WRITE( NOUT, FMT = * ) 'PCDTTRF INFO=', INFO
00513                   ENDIF
00514                   KFAIL = KFAIL + 1
00515                   GO TO 30
00516                END IF
00517 *
00518                IF( CHECK ) THEN
00519 *
00520 *                 Check for memory overwrite in factorization
00521 *
00522                   CALL PCCHEKPAD( ICTXT, 'PCDTTRF', NQ,
00523      $                         NP, MEM( IPA-IPREPAD ), NB+10,
00524      $                         IPREPAD, IPOSTPAD, PADVAL )
00525                END IF
00526 *
00527 *
00528 *              Loop over the different values for NRHS
00529 *
00530                DO 20 HH = 1, NNR
00531 *
00532                   IERR( 1 ) = 0
00533 *
00534                   NRHS = NRVAL( HH )
00535 *
00536 *                    Initialize Array Descriptor for rhs
00537 *
00538                      CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
00539      $                             ICTXTB, NB+10, IERR( 1 ) )
00540 *
00541 *                    Convert this to 1D descriptor
00542 *
00543                      DESCB( 1 ) = 502
00544                      DESCB( 3 ) = N
00545                      DESCB( 4 ) = NB
00546                      DESCB( 5 ) = 0
00547                      DESCB( 2 ) = ICTXT
00548                      DESCB( 6 ) = DESCB2D( LLD_ )
00549                      DESCB( 7 ) = 0
00550 *
00551 *                    reset free_ptr to reuse space for right hand sides
00552 *
00553                      IF( IPB .GT. 0 ) THEN
00554                        FREE_PTR = IPB
00555                      ENDIF
00556 *
00557                      FREE_PTR = FREE_PTR + IPREPAD
00558                      IPB = FREE_PTR
00559                      FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
00560      $                          + IPOSTPAD
00561 *
00562 *                    Allocate workspace for workspace in TRS routine:
00563 *
00564                      IPW_SOLVE_SIZE = 10*NPCOL+4*NRHS
00565 *
00566                      IPW_SOLVE = FREE_PTR
00567                      FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
00568 *
00569                      IERR( 1 ) = 0
00570                      IF( FREE_PTR.GT.MEMSIZ ) THEN
00571                         IF( IAM.EQ.0 )
00572      $                     WRITE( NOUT, FMT = 9996 )'solve',
00573      $                            ( FREE_PTR )*CPLXSZ
00574                         IERR( 1 ) = 1
00575                      END IF
00576 *
00577 *                    Check all processes for an error
00578 *
00579                      CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1,
00580      $                             IERR, 1, -1, 0 )
00581 *
00582                      IF( IERR( 1 ).GT.0 ) THEN
00583                         IF( IAM.EQ.0 )
00584      $                     WRITE( NOUT, FMT = 9997 ) 'MEMORY'
00585                         KSKIP = KSKIP + 1
00586                         GO TO 15
00587                      END IF
00588 *
00589                      MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
00590 *
00591 *                    Generate RHS
00592 *
00593                      CALL PCMATGEN(ICTXTB, 'No', 'No',
00594      $                        DESCB2D( M_ ), DESCB2D( N_ ),
00595      $                        DESCB2D( MB_ ), DESCB2D( NB_ ),
00596      $                        MEM( IPB ),
00597      $                        DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
00598      $                        DESCB2D( CSRC_ ),
00599      $                        IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
00600      $                        MYROW, NPCOL, NPROW )
00601 *
00602                      IF( CHECK ) THEN
00603                         CALL PCFILLPAD( ICTXTB, NB, NRHS,
00604      $                                  MEM( IPB-IPREPAD ),
00605      $                                  DESCB2D( LLD_ ),
00606      $                                  IPREPAD, IPOSTPAD,
00607      $                                  PADVAL )
00608                         CALL PCFILLPAD( ICTXT, WORKSIZ, 1,
00609      $                                  MEM( IP_DRIVER_W-IPREPAD ),
00610      $                                  WORKSIZ, IPREPAD,
00611      $                                  IPOSTPAD, PADVAL )
00612                      END IF
00613 *
00614 *
00615                      CALL BLACS_BARRIER( ICTXT, 'All')
00616                      CALL SLTIMER( 2 )
00617 *
00618 *                    Solve linear system via factorization
00619 *
00620                      CALL PCDTTRS( TRANS, N, NRHS,
00621      $                             MEM( IPA+2*( NB+10 ) ),
00622      $                             MEM( IPA+1*( NB+10 ) ), MEM( IPA ),
00623      $                             1, DESCA, MEM( IPB ), 1, DESCB,
00624      $                             MEM( IP_FILLIN ), FILLIN_SIZE,
00625      $                             MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
00626      $                             INFO )
00627 *
00628                      CALL SLTIMER( 2 )
00629 *
00630                      IF( INFO.NE.0 ) THEN
00631                        IF( IAM.EQ.0 )
00632      $  WRITE( NOUT, FMT = * ) 'PCDTTRS INFO=', INFO
00633                        KFAIL = KFAIL + 1
00634                        PASSED = 'FAILED'
00635                        GO TO 20
00636                      END IF
00637 *
00638                      IF( CHECK ) THEN
00639 *
00640 *                       check for memory overwrite
00641 *
00642                         CALL PCCHEKPAD( ICTXT, 'PCDTTRS-work',
00643      $                                  WORKSIZ, 1,
00644      $                                  MEM( IP_DRIVER_W-IPREPAD ),
00645      $                                  WORKSIZ, IPREPAD,
00646      $                                  IPOSTPAD, PADVAL )
00647 *
00648 *                       check the solution to rhs
00649 *
00650                         SRESID = ZERO
00651 *
00652 *                       Reset descriptor describing A to 1-by-P grid for
00653 *                          use in banded utility routines
00654 *
00655                         CALL DESCINIT( DESCA2D, (3), N,
00656      $                       (3), NB, 0, 0,
00657      $                       ICTXT, (3), IERR( 1 ) )
00658                         CALL PCDTLASCHK( 'N', 'D', TRANS,
00659      $                       N, BWL, BWU, NRHS,
00660      $                       MEM( IPB ), 1, 1, DESCB2D,
00661      $                       IASEED, MEM( IPA ), 1, 1, DESCA2D,
00662      $                       IBSEED, ANORM, SRESID,
00663      $                       MEM( IP_DRIVER_W ), WORKSIZ )
00664 *
00665                         IF( IAM.EQ.0 ) THEN
00666                            IF( SRESID.GT.THRESH )
00667      $                        WRITE( NOUT, FMT = 9985 ) SRESID
00668                         END IF
00669 *
00670 *                       The second test is a NaN trap
00671 *
00672                         IF( ( SRESID.LE.THRESH          ).AND.
00673      $                      ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN
00674                            KPASS = KPASS + 1
00675                            PASSED = 'PASSED'
00676                         ELSE
00677                            KFAIL = KFAIL + 1
00678                            PASSED = 'FAILED'
00679                         END IF
00680 *
00681                      END IF
00682 *
00683    15                CONTINUE
00684 *                    Skipped tests jump to here to print out "SKIPPED"
00685 *
00686 *                    Gather maximum of all CPU and WALL clock timings
00687 *
00688                      CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1,
00689      $                               WTIME )
00690                      CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1,
00691      $                               CTIME )
00692 *
00693 *                    Print results
00694 *
00695                      IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
00696 *
00697                         NOPS = 0
00698                         NOPS2 = 0
00699 *
00700                         N_FIRST = NB
00701                         NPROCS_REAL = ( N-1 )/NB + 1
00702                         N_LAST = MOD( N-1, NB ) + 1
00703 *
00704 *                       2 N bwl INT_ONE + N (bwl) flops
00705 *                          for LU factorization
00706 *
00707                         NOPS = 2*(DBLE(N)*DBLE(BWL)*
00708      $                         DBLE(INT_ONE)) +
00709      $                         (DBLE(N)*DBLE(BWL))
00710 *
00711 *                       nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
00712 *
00713                         NOPS = NOPS +
00714      $                  2 * (DBLE(N)*(DBLE(BWL)+DBLE(INT_ONE))
00715      $                   *DBLE(NRHS))
00716 *
00717 *                       Multiply by 4 to get complex count
00718 *
00719                         NOPS = NOPS * DBLE(4)
00720 *
00721 *                       Second calc to represent actual hardware speed
00722 *
00723 *                     2*N_FIRST bwl*bwu Flops for LU
00724 *                       factorization in proc 1
00725 *
00726                       NOPS2 = 2*( (DBLE(N_FIRST)*
00727      $                  DBLE(BWL)*DBLE(BWU)))
00728 *
00729                       IF ( NPROCS_REAL .GT. 1) THEN
00730 *                       8 N_LAST bwl*INT_ONE
00731 *                         flops for LU and spike
00732 *                         calc in last processor
00733 *
00734                         NOPS2 = NOPS2 +
00735      $                          8*( (DBLE(N_LAST)*DBLE(BWL)
00736      $                          *DBLE(INT_ONE)) )
00737                       ENDIF
00738 *
00739                       IF ( NPROCS_REAL .GT. 2) THEN
00740 *                       8 NB bwl*INT_ONE  flops for LU and spike
00741 *                         calc in other processors
00742 *
00743                         NOPS2 = NOPS2 + (NPROCS_REAL-2)*
00744      $                          8*( (DBLE(NB)*DBLE(BWL)
00745      $                          *DBLE(INT_ONE)) )
00746                       ENDIF
00747 *
00748 *                     Reduced system
00749 *
00750                       NOPS2 = NOPS2 +
00751      $                  2*( NPROCS_REAL-1 ) *
00752      $                  ( BWL*INT_ONE*BWL/3 )
00753                       IF( NPROCS_REAL .GT. 1 ) THEN
00754                         NOPS2 = NOPS2 +
00755      $                    2*( NPROCS_REAL-2 ) *
00756      $                    (2*BWL*INT_ONE*BWL)
00757                       ENDIF
00758 *
00759 *                     Solve stage
00760 *
00761 *                     nrhs*2 n_first*
00762 *                        (bwl+INT_ONE)
00763 *                        flops for L,U solve in proc 1.
00764 *
00765                       NOPS2 = NOPS2 +
00766      $                  2*
00767      $                  DBLE(N_FIRST)*
00768      $                  DBLE(NRHS) *
00769      $                  ( DBLE(BWL)+DBLE(INT_ONE))
00770 *
00771                       IF ( NPROCS_REAL .GT. 1 ) THEN
00772 *
00773 *                       2*nrhs*2 n_last
00774 *                        (bwl+INT_ONE)
00775 *                       flops for LU solve in other procs
00776 *
00777                         NOPS2 = NOPS2 +
00778      $                    4*
00779      $                    (DBLE(N_LAST)*(DBLE(BWL)+
00780      $                    DBLE(INT_ONE)))*DBLE(NRHS)
00781                       ENDIF
00782 *
00783                       IF ( NPROCS_REAL .GT. 2 ) THEN
00784 *
00785 *                       2*nrhs*2 NB
00786 *                        (bwl+INT_ONE)
00787 *                        flops for LU solve in other procs
00788 *
00789                         NOPS2 = NOPS2 +
00790      $                    ( NPROCS_REAL-2)*2*
00791      $                 ( (DBLE(NB)*(DBLE(BWL)+
00792      $                 DBLE(INT_ONE)))*DBLE(NRHS) )
00793                       ENDIF
00794 *
00795 *                     Reduced system
00796 *
00797                       NOPS2 = NOPS2 +
00798      $                  NRHS*( NPROCS_REAL-1)*2*(BWL*INT_ONE )
00799                       IF( NPROCS_REAL .GT. 1 ) THEN
00800                         NOPS2 = NOPS2 +
00801      $                   NRHS*( NPROCS_REAL-2 ) *
00802      $                   ( 6 * BWL*INT_ONE )
00803                       ENDIF
00804 *
00805 *
00806 *                     Multiply by 4 to get complex count
00807 *
00808                       NOPS2 = NOPS2 * DBLE(4)
00809 *
00810 *                       Calculate total megaflops - factorization and/or
00811 *                       solve -- for WALL and CPU time, and print output
00812 *
00813 *                       Print WALL time if machine supports it
00814 *
00815                         IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN
00816                            TMFLOPS = NOPS /
00817      $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
00818                         ELSE
00819                            TMFLOPS = 0.0D+0
00820                         END IF
00821 *
00822                         IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN
00823                            TMFLOPS2 = NOPS2 /
00824      $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
00825                         ELSE
00826                            TMFLOPS2 = 0.0D+0
00827                         END IF
00828 *
00829                         IF( WTIME( 2 ).GE.0.0D+0 )
00830      $                     WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS,
00831      $                            N,
00832      $                            BWL, BWU,
00833      $                            NB, NRHS, NPROW, NPCOL,
00834      $                            WTIME( 1 ), WTIME( 2 ), TMFLOPS,
00835      $                            TMFLOPS2, PASSED
00836 *
00837 *                       Print CPU time if machine supports it
00838 *
00839                         IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
00840                            TMFLOPS = NOPS /
00841      $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
00842                         ELSE
00843                            TMFLOPS = 0.0D+0
00844                         END IF
00845 *
00846                         IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
00847                            TMFLOPS2 = NOPS2 /
00848      $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
00849                         ELSE
00850                            TMFLOPS2 = 0.0D+0
00851                         END IF
00852 *
00853                         IF( CTIME( 2 ).GE.0.0D+0 )
00854      $                     WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS,
00855      $                            N,
00856      $                            BWL, BWU,
00857      $                            NB, NRHS, NPROW, NPCOL,
00858      $                            CTIME( 1 ), CTIME( 2 ), TMFLOPS,
00859      $                            TMFLOPS2, PASSED
00860 *
00861                      END IF
00862    20          CONTINUE
00863 *
00864 *
00865    30       CONTINUE
00866 *           NNB loop
00867 *
00868    45      CONTINUE
00869 *          BW[] loop
00870 *
00871    40   CONTINUE
00872 *       NMAT loop
00873 *
00874         CALL BLACS_GRIDEXIT( ICTXT )
00875         CALL BLACS_GRIDEXIT( ICTXTB )
00876 *
00877    50   CONTINUE
00878 *       NGRIDS DROPOUT
00879    60 CONTINUE
00880 *     NGRIDS loop
00881 *
00882 *     Print ending messages and close output file
00883 *
00884       IF( IAM.EQ.0 ) THEN
00885          KTESTS = KPASS + KFAIL + KSKIP
00886          WRITE( NOUT, FMT = * )
00887          WRITE( NOUT, FMT = 9992 ) KTESTS
00888          IF( CHECK ) THEN
00889             WRITE( NOUT, FMT = 9991 ) KPASS
00890             WRITE( NOUT, FMT = 9989 ) KFAIL
00891          ELSE
00892             WRITE( NOUT, FMT = 9990 ) KPASS
00893          END IF
00894          WRITE( NOUT, FMT = 9988 ) KSKIP
00895          WRITE( NOUT, FMT = * )
00896          WRITE( NOUT, FMT = * )
00897          WRITE( NOUT, FMT = 9987 )
00898          IF( NOUT.NE.6 .AND. NOUT.NE.0 )
00899      $      CLOSE ( NOUT )
00900       END IF
00901 *
00902       CALL BLACS_EXIT( 0 )
00903 *
00904  9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
00905      $        '; It should be at least 1' )
00906  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
00907      $        I4 )
00908  9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
00909  9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
00910      $        I11 )
00911  9995 FORMAT( 'TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time ',
00912      $        'Slv Time   MFLOPS   MFLOP2  CHECK' )
00913  9994 FORMAT( '---- -- ------  --- ---  ---- ----- ---- ---- -------- ',
00914      $        '-------- -------- -------- ------' )
00915  9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
00916      $                                          1X,I4,1X,I4,1X,F9.3,
00917      $        F9.4,        F9.2,    F9.2, 1X, A6 )
00918  9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
00919  9991 FORMAT( I5, ' tests completed and passed residual checks.' )
00920  9990 FORMAT( I5, ' tests completed without checking.' )
00921  9989 FORMAT( I5, ' tests completed and failed residual checks.' )
00922  9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
00923  9987 FORMAT( 'END OF TESTS.' )
00924  9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
00925  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
00926 *
00927       STOP
00928 *
00929 *     End of PCDTTRS_DRIVER
00930 *
00931       END
00932 *