74 parameter( totmem = 3000000 )
75 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76 $ lld_, mb_, m_, nb_, n_, rsrc_
77 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
82 INTEGER dblesz, memsiz, ntests
83 DOUBLE PRECISION padval
84 parameter( dblesz = 8,
85 $ memsiz = totmem / dblesz, ntests = 20,
86 $ padval = -9923.0d+0, zero = 0.0d+0 )
88 parameter( int_one = 1 )
95 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
96 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
97 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
98 $ iprepad, ipw, ipw_size, ipw_solve,
99 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102 $ nnr, nout, np, npcol, nprocs, nprocs_real,
103 $ nprow, nq, nrhs, n_first, n_last, worksiz
105 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
109 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
110 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
111 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
112 $ nrval( ntests ), nval( ntests ),
113 $ pval( ntests ), qval( ntests )
114 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
117 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
131 INTRINSIC dble,
max,
min, mod
134 DATA kfail, kpass, kskip, ktests / 4*0 /
143 CALL blacs_pinfo( iam, nprocs )
147 CALL pddtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
148 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
149 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
150 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
152 check = ( thresh.GE.0.0d+0 )
157 WRITE( nout, fmt = * )
158 WRITE( nout, fmt = 9995 )
159 WRITE( nout, fmt = 9994 )
160 WRITE( nout, fmt = * )
173 IF( nprow.LT.1 )
THEN
175 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
177 ELSE IF( npcol.LT.1 )
THEN
179 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
181 ELSE IF( nprow*npcol.GT.nprocs )
THEN
183 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 IF( ierr( 1 ).GT.0 )
THEN
189 $
WRITE( nout, fmt = 9997 )
'grid'
196 CALL blacs_get( -1, 0, ictxt )
197 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
202 CALL blacs_get( -1, 0, ictxtb )
203 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
208 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
210 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
224 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
230 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
233 IF( ierr( 1 ).GT.0 )
THEN
235 $
WRITE( nout, fmt = 9997 )
'size'
241 DO 45 bw_num = 1, nbw
248 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
255 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
259 IF( bwl.GT.n-1 )
THEN
265 IF( bwu.GT.n-1 )
THEN
273 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
276 IF( ierr( 1 ).GT.0 )
THEN
287 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
289 nb =
max( nb, 2*int_one )
296 IF( nb.LT.
min( 2*int_one, n ) )
THEN
302 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
305 IF( ierr( 1 ).GT.0 )
THEN
314 nq =
numroc( n, nb, mycol, 0, npcol )
330 $ ictxtb, nb+10, ierr( 1 ) )
339 desca( 6 ) = ((3)+10)
342 ierr_temp = ierr( 1 )
344 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
348 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
350 IF( ierr( 1 ).LT.0 )
THEN
352 $
WRITE( nout, fmt = 9997 )
'descriptor'
364 free_ptr = free_ptr + iprepad
367 free_ptr = free_ptr + (nb+10)*(3)
383 free_ptr = free_ptr + iprepad
385 free_ptr = free_ptr + fillin_size
398 free_ptr = free_ptr + ipw_size
403 IF( free_ptr.GT.memsiz )
THEN
405 $
WRITE( nout, fmt = 9996 )
406 $
'divide and conquer factorization',
413 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
416 IF( ierr( 1 ).GT.0 )
THEN
418 $
WRITE( nout, fmt = 9997 )
'MEMORY'
424 worksiz =
max( ((3)+10), nb )
432 worksiz =
max( worksiz, desca2d( nb_ ) )
435 worksiz =
max( worksiz,
439 free_ptr = free_ptr + iprepad
440 ip_driver_w = free_ptr
441 free_ptr = free_ptr + worksiz + ipostpad
447 IF( free_ptr.GT.memsiz )
THEN
449 $
WRITE( nout, fmt = 9996 )
'factorization',
450 $ ( free_ptr )*dblesz
456 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
459 IF( ierr( 1 ).GT.0 )
THEN
461 $
WRITE( nout, fmt = 9997 )
'MEMORY'
466 CALL pdbmatgen( ictxt,
'T',
'D', bwl, bwu, n, (3), nb,
467 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
468 $ mycol, nprow, npcol )
469 CALL pdfillpad( ictxt, nq, np, mem( ipa-iprepad ),
470 $ nb+10, iprepad, ipostpad,
474 $ mem( ip_driver_w-iprepad ), worksiz,
475 $ iprepad, ipostpad, padval )
482 $ (3), mem( ipa ), 1, 1,
483 $ desca2d, mem( ip_driver_w ) )
484 CALL pdchekpad( ictxt,
'PDLANGE', nq, np,
485 $ mem( ipa-iprepad ), nb+10,
486 $ iprepad, ipostpad, padval )
489 $ mem( ip_driver_w-iprepad ), worksiz,
490 $ iprepad, ipostpad, padval )
495 CALL blacs_barrier( ictxt,
'All' )
501 CALL pddttrf( n, mem( ipa+2*( nb+10 ) ),
502 $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
503 $ desca, mem( ip_fillin ), fillin_size,
504 $ mem( ipw ), ipw_size, info )
510 WRITE( nout, fmt = * )
'PDDTTRF INFO=', info
521 $ np, mem( ipa-iprepad ), nb+10,
522 $ iprepad, ipostpad, padval )
536 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
537 $ ictxtb, nb+10, ierr( 1 ) )
546 descb( 6 ) = descb2d( lld_ )
551 IF( ipb .GT. 0 )
THEN
555 free_ptr = free_ptr + iprepad
557 free_ptr = free_ptr + nrhs*descb2d( lld_ )
562 ipw_solve_size = 10*npcol+4*nrhs
565 free_ptr = free_ptr + ipw_solve_size
568 IF( free_ptr.GT.memsiz )
THEN
570 $
WRITE( nout, fmt = 9996 )
'solve',
571 $ ( free_ptr )*dblesz
577 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
580 IF( ierr( 1 ).GT.0 )
THEN
582 $
WRITE( nout, fmt = 9997 )
'MEMORY'
587 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
592 $ descb2d( m_ ), descb2d( n_ ),
593 $ descb2d( mb_ ), descb2d( nb_ ),
595 $ descb2d( lld_ ), descb2d( rsrc_ ),
597 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
598 $ myrow, npcol, nprow )
602 $ mem( ipb-iprepad ),
607 $ mem( ip_driver_w-iprepad ),
613 CALL blacs_barrier( ictxt,
'All')
619 $ mem( ipa+2*( nb+10 ) ),
620 $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
621 $ 1, desca, mem( ipb ), 1, descb,
622 $ mem( ip_fillin ), fillin_size,
623 $ mem( ipw_solve ), ipw_solve_size,
630 $
WRITE( nout, fmt = * )
'PDDTTRS INFO=', info
642 $ mem( ip_driver_w-iprepad ),
655 $ ictxt, (3), ierr( 1 ) )
658 $ mem( ipb ), 1, 1, descb2d,
659 $ iaseed, mem( ipa ), 1, 1, desca2d,
660 $ ibseed, anorm, sresid,
661 $ mem( ip_driver_w ), worksiz )
664 IF( sresid.GT.thresh )
665 $
WRITE( nout, fmt = 9985 ) sresid
670 IF( ( sresid.LE.thresh ).AND.
671 $ ( (sresid-sresid).EQ.0.0d+0 ) )
THEN
686 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
688 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
693 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
699 nprocs_real = ( n-1 )/nb + 1
700 n_last = mod( n-1, nb ) + 1
705 nops = 2*(dble(n)*dble(bwl)*
707 $ (dble(n)*dble(bwl))
712 $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
720 nops2 = 2*( (dble(n_first)*
721 $ dble(bwl)*dble(bwu)))
723 IF ( nprocs_real .GT. 1)
THEN
729 $ 8*( (dble(n_last)*dble(bwl)
733 IF ( nprocs_real .GT. 2)
THEN
737 nops2 = nops2 + (nprocs_real-2)*
738 $ 8*( (dble(nb)*dble(bwl)
745 $ 2*( nprocs_real-1 ) *
746 $ ( bwl*int_one*bwl/3 )
747 IF( nprocs_real .GT. 1 )
THEN
749 $ 2*( nprocs_real-2 ) *
750 $ (2*bwl*int_one*bwl)
763 $ ( dble(bwl)+dble(int_one))
765 IF ( nprocs_real .GT. 1 )
THEN
773 $ (dble(n_last)*(dble(bwl)+
774 $ dble(int_one)))*dble(nrhs)
777 IF ( nprocs_real .GT. 2 )
THEN
784 $ ( nprocs_real-2)*2*
785 $ ( (dble(nb)*(dble(bwl)+
786 $ dble(int_one)))*dble(nrhs) )
792 $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
793 IF( nprocs_real .GT. 1 )
THEN
795 $ nrhs*( nprocs_real-2 ) *
796 $ ( 6 * bwl*int_one )
805 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
807 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
812 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
814 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
819 IF( wtime( 2 ).GE.0.0d+0 )
820 $
WRITE( nout, fmt = 9993 )
'WALL', trans,
823 $ nb, nrhs, nprow, npcol,
824 $ wtime( 1 ), wtime( 2 ), tmflops,
829 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
831 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
836 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
838 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
843 IF( ctime( 2 ).GE.0.0d+0 )
844 $
WRITE( nout, fmt = 9993 )
'CPU ', trans,
847 $ nb, nrhs, nprow, npcol,
848 $ ctime( 1 ), ctime( 2 ), tmflops,
864 CALL blacs_gridexit( ictxt )
865 CALL blacs_gridexit( ictxtb )
875 ktests = kpass + kfail + kskip
876 WRITE( nout, fmt = * )
877 WRITE( nout, fmt = 9992 ) ktests
879 WRITE( nout, fmt = 9991 ) kpass
880 WRITE( nout, fmt = 9989 ) kfail
882 WRITE( nout, fmt = 9990 ) kpass
884 WRITE( nout, fmt = 9988 ) kskip
885 WRITE( nout, fmt = * )
886 WRITE( nout, fmt = * )
887 WRITE( nout, fmt = 9987 )
888 IF( nout.NE.6 .AND. nout.NE.0 )
894 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
895 $
'; It should be at least 1' )
896 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
898 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
899 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
901 9995
FORMAT(
'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
902 $
'Slv Time MFLOPS MFLOP2 CHECK' )
903 9994
FORMAT(
'---- -- ------ --- --- ---- ----- ---- ---- -------- ',
904 $
'-------- -------- -------- ------' )
905 9993
FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
906 $ 1x,i4,1x,i4,1x,f9.3,
907 $ f9.4, f9.2, f9.2, 1x, a6 )
908 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
909 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
910 9990
FORMAT( i5,
' tests completed without checking.' )
911 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
912 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
913 9987
FORMAT(
'END OF TESTS.' )
914 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
915 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pdbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pddtinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pddtlaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pddttrf(n, dl, d, du, ja, desca, af, laf, work, lwork, info)
subroutine pddttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)