75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER memsiz, ntests, zplxsz
85 parameter( zplxsz = 16,
86 $ memsiz = totmem / zplxsz, ntests = 20,
87 $ padval = ( -9923.0d+0, -9923.0d+0 ),
90 parameter( int_one = 1 )
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
107 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
111 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114 $ nrval( ntests ), nval( ntests ),
115 $ pval( ntests ), qval( ntests )
116 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 COMPLEX*16 mem( memsiz )
120 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
134 INTRINSIC dble,
max,
min, mod
137 DATA kfail, kpass, kskip, ktests / 4*0 /
146 CALL blacs_pinfo( iam, nprocs )
150 CALL pzdtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
155 check = ( thresh.GE.0.0d+0 )
160 WRITE( nout, fmt = * )
161 WRITE( nout, fmt = 9995 )
162 WRITE( nout, fmt = 9994 )
163 WRITE( nout, fmt = * )
176 IF( nprow.LT.1 )
THEN
178 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
180 ELSE IF( npcol.LT.1 )
THEN
182 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
184 ELSE IF( nprow*npcol.GT.nprocs )
THEN
186 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
190 IF( ierr( 1 ).GT.0 )
THEN
192 $
WRITE( nout, fmt = 9997 )
'grid'
199 CALL blacs_get( -1, 0, ictxt )
200 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
205 CALL blacs_get( -1, 0, ictxtb )
206 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
211 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
213 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
227 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
233 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
236 IF( ierr( 1 ).GT.0 )
THEN
238 $
WRITE( nout, fmt = 9997 )
'size'
244 DO 45 bw_num = 1, nbw
251 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
258 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
262 IF( bwl.GT.n-1 )
THEN
268 IF( bwu.GT.n-1 )
THEN
276 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
279 IF( ierr( 1 ).GT.0 )
THEN
290 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
292 nb =
max( nb, 2*int_one )
299 IF( nb.LT.
min( 2*int_one, n ) )
THEN
305 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
308 IF( ierr( 1 ).GT.0 )
THEN
317 nq =
numroc( n, nb, mycol, 0, npcol )
333 $ ictxtb, nb+10, ierr( 1 ) )
342 desca( 6 ) = ((3)+10)
345 ierr_temp = ierr( 1 )
347 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
351 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
353 IF( ierr( 1 ).LT.0 )
THEN
355 $
WRITE( nout, fmt = 9997 )
'descriptor'
367 free_ptr = free_ptr + iprepad
370 free_ptr = free_ptr + (nb+10)*(3)
386 free_ptr = free_ptr + iprepad
388 free_ptr = free_ptr + fillin_size
401 free_ptr = free_ptr + ipw_size
406 IF( free_ptr.GT.memsiz )
THEN
408 $
WRITE( nout, fmt = 9996 )
409 $
'divide and conquer factorization',
416 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
419 IF( ierr( 1 ).GT.0 )
THEN
421 $
WRITE( nout, fmt = 9997 )
'MEMORY'
427 worksiz =
max( ((3)+10), nb )
435 worksiz =
max( worksiz, desca2d( nb_ ) )
438 worksiz =
max( worksiz,
442 free_ptr = free_ptr + iprepad
443 ip_driver_w = free_ptr
444 free_ptr = free_ptr + worksiz + ipostpad
450 IF( free_ptr.GT.memsiz )
THEN
452 $
WRITE( nout, fmt = 9996 )
'factorization',
453 $ ( free_ptr )*zplxsz
459 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
462 IF( ierr( 1 ).GT.0 )
THEN
464 $
WRITE( nout, fmt = 9997 )
'MEMORY'
469 CALL pzbmatgen( ictxt,
'T',
'D', bwl, bwu, n, (3), nb,
470 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
471 $ mycol, nprow, npcol )
472 CALL pzfillpad( ictxt, nq, np, mem( ipa-iprepad ),
473 $ nb+10, iprepad, ipostpad,
477 $ mem( ip_driver_w-iprepad ), worksiz,
478 $ iprepad, ipostpad, padval )
485 $ (3), mem( ipa ), 1, 1,
486 $ desca2d, mem( ip_driver_w ) )
487 CALL pzchekpad( ictxt,
'PZLANGE', nq, np,
488 $ mem( ipa-iprepad ), nb+10,
489 $ iprepad, ipostpad, padval )
492 $ mem( ip_driver_w-iprepad ), worksiz,
493 $ iprepad, ipostpad, padval )
498 CALL blacs_barrier( ictxt,
'All' )
504 CALL pzdttrf( n, mem( ipa+2*( nb+10 ) ),
505 $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
506 $ desca, mem( ip_fillin ), fillin_size,
507 $ mem( ipw ), ipw_size, info )
513 WRITE( nout, fmt = * )
'PZDTTRF INFO=', info
524 $ np, mem( ipa-iprepad ), nb+10,
525 $ iprepad, ipostpad, padval )
539 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
540 $ ictxtb, nb+10, ierr( 1 ) )
549 descb( 6 ) = descb2d( lld_ )
554 IF( ipb .GT. 0 )
THEN
558 free_ptr = free_ptr + iprepad
560 free_ptr = free_ptr + nrhs*descb2d( lld_ )
565 ipw_solve_size = 10*npcol+4*nrhs
568 free_ptr = free_ptr + ipw_solve_size
571 IF( free_ptr.GT.memsiz )
THEN
573 $
WRITE( nout, fmt = 9996 )
'solve',
574 $ ( free_ptr )*zplxsz
580 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
583 IF( ierr( 1 ).GT.0 )
THEN
585 $
WRITE( nout, fmt = 9997 )
'MEMORY'
590 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
595 $ descb2d( m_ ), descb2d( n_ ),
596 $ descb2d( mb_ ), descb2d( nb_ ),
598 $ descb2d( lld_ ), descb2d( rsrc_ ),
600 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
601 $ myrow, npcol, nprow )
605 $ mem( ipb-iprepad ),
610 $ mem( ip_driver_w-iprepad ),
616 CALL blacs_barrier( ictxt,
'All')
622 $ mem( ipa+2*( nb+10 ) ),
623 $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
624 $ 1, desca, mem( ipb ), 1, descb,
625 $ mem( ip_fillin ), fillin_size,
626 $ mem( ipw_solve ), ipw_solve_size,
633 $
WRITE( nout, fmt = * )
'PZDTTRS INFO=', info
645 $ mem( ip_driver_w-iprepad ),
658 $ ictxt, (3), ierr( 1 ) )
661 $ mem( ipb ), 1, 1, descb2d,
662 $ iaseed, mem( ipa ), 1, 1, desca2d,
663 $ ibseed, anorm, sresid,
664 $ mem( ip_driver_w ), worksiz )
667 IF( sresid.GT.thresh )
668 $
WRITE( nout, fmt = 9985 ) sresid
673 IF( ( sresid.LE.thresh ).AND.
674 $ ( (sresid-sresid).EQ.0.0d+0 ) )
THEN
689 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
691 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
696 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
702 nprocs_real = ( n-1 )/nb + 1
703 n_last = mod( n-1, nb ) + 1
708 nops = 2*(dble(n)*dble(bwl)*
710 $ (dble(n)*dble(bwl))
715 $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
720 nops = nops * dble(4)
727 nops2 = 2*( (dble(n_first)*
728 $ dble(bwl)*dble(bwu)))
730 IF ( nprocs_real .GT. 1)
THEN
736 $ 8*( (dble(n_last)*dble(bwl)
740 IF ( nprocs_real .GT. 2)
THEN
744 nops2 = nops2 + (nprocs_real-2)*
745 $ 8*( (dble(nb)*dble(bwl)
752 $ 2*( nprocs_real-1 ) *
753 $ ( bwl*int_one*bwl/3 )
754 IF( nprocs_real .GT. 1 )
THEN
756 $ 2*( nprocs_real-2 ) *
757 $ (2*bwl*int_one*bwl)
770 $ ( dble(bwl)+dble(int_one))
772 IF ( nprocs_real .GT. 1 )
THEN
780 $ (dble(n_last)*(dble(bwl)+
781 $ dble(int_one)))*dble(nrhs)
784 IF ( nprocs_real .GT. 2 )
THEN
791 $ ( nprocs_real-2)*2*
792 $ ( (dble(nb)*(dble(bwl)+
793 $ dble(int_one)))*dble(nrhs) )
799 $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
800 IF( nprocs_real .GT. 1 )
THEN
802 $ nrhs*( nprocs_real-2 ) *
803 $ ( 6 * bwl*int_one )
809 nops2 = nops2 * dble(4)
816 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
818 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
823 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
825 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
830 IF( wtime( 2 ).GE.0.0d+0 )
831 $
WRITE( nout, fmt = 9993 )
'WALL', trans,
834 $ nb, nrhs, nprow, npcol,
835 $ wtime( 1 ), wtime( 2 ), tmflops,
840 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
842 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
847 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
849 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
854 IF( ctime( 2 ).GE.0.0d+0 )
855 $
WRITE( nout, fmt = 9993 )
'CPU ', trans,
858 $ nb, nrhs, nprow, npcol,
859 $ ctime( 1 ), ctime( 2 ), tmflops,
875 CALL blacs_gridexit( ictxt )
876 CALL blacs_gridexit( ictxtb )
886 ktests = kpass + kfail + kskip
887 WRITE( nout, fmt = * )
888 WRITE( nout, fmt = 9992 ) ktests
890 WRITE( nout, fmt = 9991 ) kpass
891 WRITE( nout, fmt = 9989 ) kfail
893 WRITE( nout, fmt = 9990 ) kpass
895 WRITE( nout, fmt = 9988 ) kskip
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = * )
898 WRITE( nout, fmt = 9987 )
899 IF( nout.NE.6 .AND. nout.NE.0 )
905 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
906 $
'; It should be at least 1' )
907 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
909 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
910 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
912 9995
FORMAT(
'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
913 $
'Slv Time MFLOPS MFLOP2 CHECK' )
914 9994
FORMAT(
'---- -- ------ --- --- ---- ----- ---- ---- -------- ',
915 $
'-------- -------- -------- ------' )
916 9993
FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
917 $ 1x,i4,1x,i4,1x,f9.3,
918 $ f9.4, f9.2, f9.2, 1x, a6 )
919 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
920 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
921 9990
FORMAT( i5,
' tests completed without checking.' )
922 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
923 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
924 9987
FORMAT(
'END OF TESTS.' )
925 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
926 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pzbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pzdtinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pzdtlaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pzdttrf(n, dl, d, du, ja, desca, af, laf, work, lwork, info)
subroutine pzdttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)