75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER cplxsz, memsiz, ntests
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
90 parameter( int_one = 1 )
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
110 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
111 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
112 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
133 INTRINSIC dble,
max,
min, mod
136 DATA kfail, kpass, kskip, ktests / 4*0 /
145 CALL blacs_pinfo( iam, nprocs )
149 CALL pcdtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154 check = ( thresh.GE.0.0e+0 )
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
175 IF( nprow.LT.1 )
THEN
177 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
179 ELSE IF( npcol.LT.1 )
THEN
181 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
183 ELSE IF( nprow*npcol.GT.nprocs )
THEN
185 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
189 IF( ierr( 1 ).GT.0 )
THEN
191 $
WRITE( nout, fmt = 9997 )
'grid'
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
226 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
232 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'size'
243 DO 45 bw_num = 1, nbw
250 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
257 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
261 IF( bwl.GT.n-1 )
THEN
267 IF( bwu.GT.n-1 )
THEN
275 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
278 IF( ierr( 1 ).GT.0 )
THEN
289 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
291 nb =
max( nb, 2*int_one )
298 IF( nb.LT.
min( 2*int_one, n ) )
THEN
304 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
307 IF( ierr( 1 ).GT.0 )
THEN
316 nq =
numroc( n, nb, mycol, 0, npcol )
332 $ ictxtb, nb+10, ierr( 1 ) )
341 desca( 6 ) = ((3)+10)
344 ierr_temp = ierr( 1 )
346 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
350 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
352 IF( ierr( 1 ).LT.0 )
THEN
354 $
WRITE( nout, fmt = 9997 )
'descriptor'
366 free_ptr = free_ptr + iprepad
369 free_ptr = free_ptr + (nb+10)*(3)
385 free_ptr = free_ptr + iprepad
387 free_ptr = free_ptr + fillin_size
400 free_ptr = free_ptr + ipw_size
405 IF( free_ptr.GT.memsiz )
THEN
407 $
WRITE( nout, fmt = 9996 )
408 $
'divide and conquer factorization',
415 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
418 IF( ierr( 1 ).GT.0 )
THEN
420 $
WRITE( nout, fmt = 9997 )
'MEMORY'
426 worksiz =
max( ((3)+10), nb )
434 worksiz =
max( worksiz, desca2d( nb_ ) )
437 worksiz =
max( worksiz,
441 free_ptr = free_ptr + iprepad
442 ip_driver_w = free_ptr
443 free_ptr = free_ptr + worksiz + ipostpad
449 IF( free_ptr.GT.memsiz )
THEN
451 $
WRITE( nout, fmt = 9996 )
'factorization',
452 $ ( free_ptr )*cplxsz
458 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
461 IF( ierr( 1 ).GT.0 )
THEN
463 $
WRITE( nout, fmt = 9997 )
'MEMORY'
468 CALL pcbmatgen( ictxt,
'T',
'D', bwl, bwu, n, (3), nb,
469 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
470 $ mycol, nprow, npcol )
471 CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),
472 $ nb+10, iprepad, ipostpad,
476 $ mem( ip_driver_w-iprepad ), worksiz,
477 $ iprepad, ipostpad, padval )
484 $ (3), mem( ipa ), 1, 1,
485 $ desca2d, mem( ip_driver_w ) )
486 CALL pcchekpad( ictxt,
'PCLANGE', nq, np,
487 $ mem( ipa-iprepad ), nb+10,
488 $ iprepad, ipostpad, padval )
491 $ mem( ip_driver_w-iprepad ), worksiz,
492 $ iprepad, ipostpad, padval )
497 CALL blacs_barrier( ictxt,
'All' )
503 CALL pcdttrf( n, mem( ipa+2*( nb+10 ) ),
504 $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
505 $ desca, mem( ip_fillin ), fillin_size,
506 $ mem( ipw ), ipw_size, info )
512 WRITE( nout, fmt = * )
'PCDTTRF INFO=', info
523 $ np, mem( ipa-iprepad ), nb+10,
524 $ iprepad, ipostpad, padval )
538 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
539 $ ictxtb, nb+10, ierr( 1 ) )
548 descb( 6 ) = descb2d( lld_ )
553 IF( ipb .GT. 0 )
THEN
557 free_ptr = free_ptr + iprepad
559 free_ptr = free_ptr + nrhs*descb2d( lld_ )
564 ipw_solve_size = 10*npcol+4*nrhs
567 free_ptr = free_ptr + ipw_solve_size
570 IF( free_ptr.GT.memsiz )
THEN
572 $
WRITE( nout, fmt = 9996 )
'solve',
573 $ ( free_ptr )*cplxsz
579 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
582 IF( ierr( 1 ).GT.0 )
THEN
584 $
WRITE( nout, fmt = 9997 )
'MEMORY'
589 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
594 $ descb2d( m_ ), descb2d( n_ ),
595 $ descb2d( mb_ ), descb2d( nb_ ),
597 $ descb2d( lld_ ), descb2d( rsrc_ ),
599 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
600 $ myrow, npcol, nprow )
604 $ mem( ipb-iprepad ),
609 $ mem( ip_driver_w-iprepad ),
615 CALL blacs_barrier( ictxt,
'All')
621 $ mem( ipa+2*( nb+10 ) ),
622 $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
623 $ 1, desca, mem( ipb ), 1, descb,
624 $ mem( ip_fillin ), fillin_size,
625 $ mem( ipw_solve ), ipw_solve_size,
632 $
WRITE( nout, fmt = * )
'PCDTTRS INFO=', info
644 $ mem( ip_driver_w-iprepad ),
657 $ ictxt, (3), ierr( 1 ) )
660 $ mem( ipb ), 1, 1, descb2d,
661 $ iaseed, mem( ipa ), 1, 1, desca2d,
662 $ ibseed, anorm, sresid,
663 $ mem( ip_driver_w ), worksiz )
666 IF( sresid.GT.thresh )
667 $
WRITE( nout, fmt = 9985 ) sresid
672 IF( ( sresid.LE.thresh ).AND.
673 $ ( (sresid-sresid).EQ.0.0e+0 ) )
THEN
688 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
690 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
695 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
701 nprocs_real = ( n-1 )/nb + 1
702 n_last = mod( n-1, nb ) + 1
707 nops = 2*(dble(n)*dble(bwl)*
709 $ (dble(n)*dble(bwl))
714 $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
719 nops = nops * dble(4)
726 nops2 = 2*( (dble(n_first)*
727 $ dble(bwl)*dble(bwu)))
729 IF ( nprocs_real .GT. 1)
THEN
735 $ 8*( (dble(n_last)*dble(bwl)
739 IF ( nprocs_real .GT. 2)
THEN
743 nops2 = nops2 + (nprocs_real-2)*
744 $ 8*( (dble(nb)*dble(bwl)
751 $ 2*( nprocs_real-1 ) *
752 $ ( bwl*int_one*bwl/3 )
753 IF( nprocs_real .GT. 1 )
THEN
755 $ 2*( nprocs_real-2 ) *
756 $ (2*bwl*int_one*bwl)
769 $ ( dble(bwl)+dble(int_one))
771 IF ( nprocs_real .GT. 1 )
THEN
779 $ (dble(n_last)*(dble(bwl)+
780 $ dble(int_one)))*dble(nrhs)
783 IF ( nprocs_real .GT. 2 )
THEN
790 $ ( nprocs_real-2)*2*
791 $ ( (dble(nb)*(dble(bwl)+
792 $ dble(int_one)))*dble(nrhs) )
798 $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
799 IF( nprocs_real .GT. 1 )
THEN
801 $ nrhs*( nprocs_real-2 ) *
802 $ ( 6 * bwl*int_one )
808 nops2 = nops2 * dble(4)
815 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
817 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
822 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
824 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
829 IF( wtime( 2 ).GE.0.0d+0 )
830 $
WRITE( nout, fmt = 9993 )
'WALL', trans,
833 $ nb, nrhs, nprow, npcol,
834 $ wtime( 1 ), wtime( 2 ), tmflops,
839 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
841 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
846 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
848 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
853 IF( ctime( 2 ).GE.0.0d+0 )
854 $
WRITE( nout, fmt = 9993 )
'CPU ', trans,
857 $ nb, nrhs, nprow, npcol,
858 $ ctime( 1 ), ctime( 2 ), tmflops,
874 CALL blacs_gridexit( ictxt )
875 CALL blacs_gridexit( ictxtb )
885 ktests = kpass + kfail + kskip
886 WRITE( nout, fmt = * )
887 WRITE( nout, fmt = 9992 ) ktests
889 WRITE( nout, fmt = 9991 ) kpass
890 WRITE( nout, fmt = 9989 ) kfail
892 WRITE( nout, fmt = 9990 ) kpass
894 WRITE( nout, fmt = 9988 ) kskip
895 WRITE( nout, fmt = * )
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = 9987 )
898 IF( nout.NE.6 .AND. nout.NE.0 )
904 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
905 $
'; It should be at least 1' )
906 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
908 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
909 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
911 9995
FORMAT(
'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
912 $
'Slv Time MFLOPS MFLOP2 CHECK' )
913 9994
FORMAT(
'---- -- ------ --- --- ---- ----- ---- ---- -------- ',
914 $
'-------- -------- -------- ------' )
915 9993
FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
916 $ 1x,i4,1x,i4,1x,f9.3,
917 $ f9.4, f9.2, f9.2, 1x, a6 )
918 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
919 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
920 9990
FORMAT( i5,
' tests completed without checking.' )
921 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
922 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
923 9987
FORMAT(
'END OF TESTS.' )
924 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
925 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pcdtinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pcdtlaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pcdttrf(n, dl, d, du, ja, desca, af, laf, work, lwork, info)
subroutine pcdttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
real function pclange(norm, m, n, a, ia, ja, desca, work)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)