75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER cplxsz, memsiz, ntests
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
90 parameter( int_one = 1 )
97 INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
98 $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
99 $ imidpad, info, ipa, ipb, ipostpad, iprepad,
100 $ ipw, ipw_size, ipw_solve, ipw_solve_size,
101 $ ip_driver_w, ip_fillin, j, k, kfail, kpass,
102 $ kskip, ktests, mycol, myrhs_size, myrow, n, nb,
103 $ nbw, ngrids, nmat, nnb, nnbr, nnr, nout, np,
104 $ npcol, nprocs, nprocs_real, nprow, nq, nrhs,
105 $ n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
110 INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
111 $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
112 $ nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
133 INTRINSIC dble,
max,
min, mod
136 DATA kfail, kpass, kskip, ktests / 4*0 /
145 CALL blacs_pinfo( iam, nprocs )
149 CALL pcpbinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
150 $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
151 $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
152 $ qval, ntests, thresh, mem, iam, nprocs )
154 check = ( thresh.GE.0.0e+0 )
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
175 IF( nprow.LT.1 )
THEN
177 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
179 ELSE IF( npcol.LT.1 )
THEN
181 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
183 ELSE IF( nprow*npcol.GT.nprocs )
THEN
185 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
189 IF( ierr( 1 ).GT.0 )
THEN
191 $
WRITE( nout, fmt = 9997 )
'grid'
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
226 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
232 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'size'
243 DO 45 bw_num = 1, nbw
250 $
WRITE( nout, fmt = 9999 )
'Band',
'bw', bw
260 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
263 IF( ierr( 1 ).GT.0 )
THEN
274 nb =( (n-(npcol-1)*bw-1)/npcol + 1 )
283 IF( nb.LT.
min( 2*bw, n ) )
THEN
289 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
292 IF( ierr( 1 ).GT.0 )
THEN
299 np =
numroc( (bw+1), (bw+1),
301 nq =
numroc( n, nb, mycol, 0, npcol )
304 iprepad = ((bw+1)+10)
306 ipostpad = ((bw+1)+10)
317 $ ictxt,((bw+1)+10), ierr( 1 ) )
326 desca( 6 ) = ((bw+1)+10)
329 ierr_temp = ierr( 1 )
331 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
335 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
337 IF( ierr( 1 ).LT.0 )
THEN
339 $
WRITE( nout, fmt = 9997 )
'descriptor'
351 free_ptr = free_ptr + iprepad
354 free_ptr = free_ptr + desca2d( lld_ )*
371 free_ptr = free_ptr + iprepad
373 free_ptr = free_ptr + fillin_size
386 free_ptr = free_ptr + ipw_size
391 IF( free_ptr.GT.memsiz )
THEN
393 $
WRITE( nout, fmt = 9996 )
394 $
'divide and conquer factorization',
401 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
404 IF( ierr( 1 ).GT.0 )
THEN
406 $
WRITE( nout, fmt = 9997 )
'MEMORY'
412 worksiz =
max( ((bw+1)+10), nb )
420 worksiz =
max( worksiz, desca2d( nb_ ) )
423 worksiz =
max( worksiz,
424 $
max(5,
max(bw*(bw+2),nb))+2*nb )
427 free_ptr = free_ptr + iprepad
428 ip_driver_w = free_ptr
429 free_ptr = free_ptr + worksiz + ipostpad
435 IF( free_ptr.GT.memsiz )
THEN
437 $
WRITE( nout, fmt = 9996 )
'factorization',
438 $ ( free_ptr )*cplxsz
444 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
447 IF( ierr( 1 ).GT.0 )
THEN
449 $
WRITE( nout, fmt = 9997 )
'MEMORY'
454 CALL pcbmatgen( ictxt, uplo,
'B', bw, bw, n, (bw+1), nb,
455 $ mem( ipa ), ((bw+1)+10), 0, 0, iaseed,
456 $ myrow, mycol, nprow, npcol )
458 CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
459 $ ((bw+1)+10), iprepad, ipostpad,
463 $ mem( ip_driver_w-iprepad ), worksiz,
464 $ iprepad, ipostpad, padval )
471 $ n, mem( ipa ), 1, 1,
472 $ desca2d, mem( ip_driver_w ) )
473 CALL pcchekpad( ictxt,
'PCLANGE', np, nq,
474 $ mem( ipa-iprepad ), ((bw+1)+10),
475 $ iprepad, ipostpad, padval )
478 $ mem( ip_driver_w-iprepad ), worksiz,
479 $ iprepad, ipostpad, padval )
484 CALL blacs_barrier( ictxt,
'All' )
490 CALL pcpbtrf( uplo, n, bw, mem( ipa ), 1, desca,
491 $ mem( ip_fillin ), fillin_size, mem( ipw ),
498 WRITE( nout, fmt = * )
'PCPBTRF INFO=', info
509 $ nq, mem( ipa-iprepad ), ((bw+1)+10),
510 $ iprepad, ipostpad, padval )
524 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
525 $ ictxtb, nb+10, ierr( 1 ) )
534 descb( 6 ) = descb2d( lld_ )
539 IF( ipb .GT. 0 )
THEN
543 free_ptr = free_ptr + iprepad
545 free_ptr = free_ptr + nrhs*descb2d( lld_ )
550 ipw_solve_size = (bw*nrhs)
553 free_ptr = free_ptr + ipw_solve_size
556 IF( free_ptr.GT.memsiz )
THEN
558 $
WRITE( nout, fmt = 9996 )
'solve',
559 $ ( free_ptr )*cplxsz
565 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
568 IF( ierr( 1 ).GT.0 )
THEN
570 $
WRITE( nout, fmt = 9997 )
'MEMORY'
575 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
580 $ descb2d( m_ ), descb2d( n_ ),
581 $ descb2d( mb_ ), descb2d( nb_ ),
583 $ descb2d( lld_ ), descb2d( rsrc_ ),
585 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
586 $ myrow, npcol, nprow )
590 $ mem( ipb-iprepad ),
595 $ mem( ip_driver_w-iprepad ),
601 CALL blacs_barrier( ictxt,
'All')
606 CALL pcpbtrs( uplo, n, bw, nrhs, mem( ipa ), 1,
607 $ desca, mem( ipb ), 1, descb,
608 $ mem( ip_fillin ), fillin_size,
609 $ mem( ipw_solve ), ipw_solve_size,
616 $
WRITE( nout, fmt = * )
'PCPBTRS INFO=', info
628 $ mem( ip_driver_w-iprepad ),
637 $ mem( ipb ), 1, 1, descb2d,
638 $ iaseed, mem( ipa ), 1, 1, desca2d,
639 $ ibseed, anorm, sresid,
640 $ mem( ip_driver_w ), worksiz )
643 IF( sresid.GT.thresh )
644 $
WRITE( nout, fmt = 9985 ) sresid
649 IF( ( sresid.LE.thresh ).AND.
650 $ ( (sresid-sresid).EQ.0.0e+0 ) )
THEN
665 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
667 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
672 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
678 nprocs_real = ( n-1 )/nb + 1
679 n_last = mod( n-1, nb ) + 1
682 nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
683 $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
684 $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
685 $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
686 nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
687 $ *( -1.d0 /2.d0+dble(bw)
688 $ *( -1.d0 / 3.d0 ) ) ) +
689 $ dble(n)*( dble(bw) /
690 $ 2.d0*( 1.d0+dble(bw) ) )
693 $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
694 $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
695 $ ( dble(bw)*( 2*dble(n)-
696 $ ( dble(bw)+1.d0 ) ) )
703 nops2 = ( (dble(n_first))* dble(bw)**2 )
705 IF ( nprocs_real .GT. 1)
THEN
710 $ 4*( (dble(n_last)*dble(bw)**2) )
713 IF ( nprocs_real .GT. 2)
THEN
717 nops2 = nops2 + (nprocs_real-2)*
718 $ 4*( (dble(nb)*dble(bw)**2) )
724 $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
725 IF( nprocs_real .GT. 1 )
THEN
727 $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
734 $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
736 IF ( nprocs_real .GT. 1 )
THEN
741 $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
744 IF ( nprocs_real .GT. 2 )
THEN
749 $ ( nprocs_real-2)*2*
750 $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
756 $ nrhs*( nprocs_real-1 ) * ( bw*bw )
757 IF( nprocs_real .GT. 1 )
THEN
759 $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
765 nops2 = nops2 * dble(4)
772 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
774 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
779 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
781 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
786 IF( wtime( 2 ).GE.0.0d+0 )
787 $
WRITE( nout, fmt = 9993 )
'WALL', uplo,
790 $ nb, nrhs, nprow, npcol,
791 $ wtime( 1 ), wtime( 2 ), tmflops,
796 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
798 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
803 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
805 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
810 IF( ctime( 2 ).GE.0.0d+0 )
811 $
WRITE( nout, fmt = 9993 )
'CPU ', uplo,
814 $ nb, nrhs, nprow, npcol,
815 $ ctime( 1 ), ctime( 2 ), tmflops,
831 CALL blacs_gridexit( ictxt )
832 CALL blacs_gridexit( ictxtb )
842 ktests = kpass + kfail + kskip
843 WRITE( nout, fmt = * )
844 WRITE( nout, fmt = 9992 ) ktests
846 WRITE( nout, fmt = 9991 ) kpass
847 WRITE( nout, fmt = 9989 ) kfail
849 WRITE( nout, fmt = 9990 ) kpass
851 WRITE( nout, fmt = 9988 ) kskip
852 WRITE( nout, fmt = * )
853 WRITE( nout, fmt = * )
854 WRITE( nout, fmt = 9987 )
855 IF( nout.NE.6 .AND. nout.NE.0 )
861 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
862 $
'; It should be at least 1' )
863 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
865 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
866 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
868 9995
FORMAT(
'TIME UL N BW NB NRHS P Q L*U Time ',
869 $
'Slv Time MFLOPS MFLOP2 CHECK' )
870 9994
FORMAT(
'---- -- ------ --- ---- ----- -- ---- -------- ',
871 $
'-------- ------ ------ ------' )
872 9993
FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
874 $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
875 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
876 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
877 9990
FORMAT( i5,
' tests completed without checking.' )
878 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
879 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
880 9987
FORMAT(
'END OF TESTS.' )
881 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
882 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
real function pclange(norm, m, n, a, ia, ja, desca, work)
subroutine pcpbinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pcpblaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pcpbtrf(uplo, n, bw, a, ja, desca, af, laf, work, lwork, info)
subroutine pcpbtrs(uplo, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)