80 parameter( totmem = 3000000 )
82 parameter( intmem = 2048 )
83 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
84 $ lld_, mb_, m_, nb_, n_, rsrc_
85 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
86 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
87 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
90 INTEGER dblesz, memsiz, ntests
91 DOUBLE PRECISION padval
92 parameter( dblesz = 8,
93 $ memsiz = totmem / dblesz, ntests = 20,
94 $ padval = -9923.0d+0, zero = 0.0d+0 )
96 parameter( int_one = 1 )
103 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
104 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
105 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
106 $ iprepad, ipw, ipw_size, ipw_solve,
107 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
108 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
109 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
110 $ nnr, nout, np, npcol, nprocs, nprocs_real,
111 $ nprow, nq, nrhs, n_first, n_last, worksiz
113 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
118 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
119 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
120 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
121 $ nrval( ntests ), nval( ntests ),
122 $ pval( ntests ), qval( ntests )
123 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
126 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
127 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
140 INTRINSIC dble,
max,
min, mod
143 DATA kfail, kpass, kskip, ktests / 4*0 /
152 CALL blacs_pinfo( iam, nprocs )
156 CALL pdgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
157 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
158 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
159 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
161 check = ( thresh.GE.0.0d+0 )
166 WRITE( nout, fmt = * )
167 WRITE( nout, fmt = 9995 )
168 WRITE( nout, fmt = 9994 )
169 WRITE( nout, fmt = * )
182 IF( nprow.LT.1 )
THEN
184 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
186 ELSE IF( npcol.LT.1 )
THEN
188 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
190 ELSE IF( nprow*npcol.GT.nprocs )
THEN
192 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
196 IF( ierr( 1 ).GT.0 )
THEN
198 $
WRITE( nout, fmt = 9997 )
'grid'
205 CALL blacs_get( -1, 0, ictxt )
206 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
211 CALL blacs_get( -1, 0, ictxtb )
212 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
217 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
219 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
233 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
239 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
242 IF( ierr( 1 ).GT.0 )
THEN
244 $
WRITE( nout, fmt = 9997 )
'size'
250 DO 45 bw_num = 1, nbw
254 bwl = bwlval( bw_num )
257 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
261 bwu = bwuval( bw_num )
264 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
268 IF( bwl.GT.n-1 )
THEN
274 IF( bwu.GT.n-1 )
THEN
282 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
285 IF( ierr( 1 ).GT.0 )
THEN
296 nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
298 nb =
max( nb, 2*(bwl+bwu) )
306 IF( nb.GT.intmem )
THEN
309 WRITE( nout,* )
'You have chosen an '
310 $ ,
'NB > INTMEM in the driver.'
311 WRITE(nout, *)
'Please edit the driver '
312 $ ,
'and increase the value of INTMEM'
318 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
321 IF( ierr( 1 ).GT.0 )
THEN
328 np =
numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
330 nq =
numroc( n, nb, mycol, 0, npcol )
333 iprepad = ((2*bwl+2*bwu+1)+10)
335 ipostpad = ((2*bwl+2*bwu+1)+10)
344 CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
345 $ (2*bwl+2*bwu+1), nb, 0, 0,
346 $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
355 desca( 6 ) = ((2*bwl+2*bwu+1)+10)
358 ierr_temp = ierr( 1 )
360 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
364 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
366 IF( ierr( 1 ).LT.0 )
THEN
368 $
WRITE( nout, fmt = 9997 )
'descriptor'
380 free_ptr = free_ptr + iprepad
383 free_ptr = free_ptr + desca2d( lld_ )*
396 $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
400 free_ptr = free_ptr + iprepad
402 free_ptr = free_ptr + fillin_size
415 free_ptr = free_ptr + ipw_size
420 IF( free_ptr.GT.memsiz )
THEN
422 $
WRITE( nout, fmt = 9996 )
423 $
'divide and conquer factorization',
430 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
433 IF( ierr( 1 ).GT.0 )
THEN
435 $
WRITE( nout, fmt = 9997 )
'MEMORY'
441 worksiz =
max( ((2*bwl+2*bwu+1)+10), nb )
449 worksiz =
max( worksiz, desca2d( nb_ ) )
452 worksiz =
max( worksiz,
456 free_ptr = free_ptr + iprepad
457 ip_driver_w = free_ptr
458 free_ptr = free_ptr + worksiz + ipostpad
464 IF( free_ptr.GT.memsiz )
THEN
466 $
WRITE( nout, fmt = 9996 )
'factorization',
467 $ ( free_ptr )*dblesz
473 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
476 IF( ierr( 1 ).GT.0 )
THEN
478 $
WRITE( nout, fmt = 9997 )
'MEMORY'
483 CALL pdbmatgen( ictxt,
'G',
'N', bwl, bwu, n,
484 $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
485 $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
486 $ myrow, mycol, nprow, npcol )
488 CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
489 $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
493 $ mem( ip_driver_w-iprepad ), worksiz,
494 $ iprepad, ipostpad, padval )
500 anorm =
pdlange(
'1', (2*bwl+2*bwu+1),
501 $ n, mem( ipa ), 1, 1,
502 $ desca2d, mem( ip_driver_w ) )
503 CALL pdchekpad( ictxt,
'PDLANGE', np, nq,
504 $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
505 $ iprepad, ipostpad, padval )
508 $ mem( ip_driver_w-iprepad ), worksiz,
509 $ iprepad, ipostpad, padval )
514 CALL blacs_barrier( ictxt,
'All' )
520 CALL pdgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
521 $ mem( ip_fillin ), fillin_size, mem( ipw ),
528 WRITE( nout, fmt = * )
'PDGBTRF INFO=', info
539 $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
540 $ iprepad, ipostpad, padval )
554 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
555 $ ictxtb, nb+10, ierr( 1 ) )
564 descb( 6 ) = descb2d( lld_ )
569 IF( ipb .GT. 0 )
THEN
573 free_ptr = free_ptr + iprepad
575 free_ptr = free_ptr + nrhs*descb2d( lld_ )
580 ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
583 free_ptr = free_ptr + ipw_solve_size
586 IF( free_ptr.GT.memsiz )
THEN
588 $
WRITE( nout, fmt = 9996 )
'solve',
589 $ ( free_ptr )*dblesz
595 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
598 IF( ierr( 1 ).GT.0 )
THEN
600 $
WRITE( nout, fmt = 9997 )
'MEMORY'
605 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
610 $ descb2d( m_ ), descb2d( n_ ),
611 $ descb2d( mb_ ), descb2d( nb_ ),
613 $ descb2d( lld_ ), descb2d( rsrc_ ),
615 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
616 $ myrow, npcol, nprow )
620 $ mem( ipb-iprepad ),
625 $ mem( ip_driver_w-iprepad ),
631 CALL blacs_barrier( ictxt,
'All')
636 CALL pdgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
637 $ 1, desca, ipiv, mem( ipb ), 1, descb,
638 $ mem( ip_fillin ), fillin_size,
639 $ mem( ipw_solve ), ipw_solve_size,
646 $
WRITE( nout, fmt = * )
'PDGBTRS INFO=', info
658 $ mem( ip_driver_w-iprepad ),
668 $ mem( ipb ), 1, 1, descb2d,
669 $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
670 $ ibseed, anorm, sresid,
671 $ mem( ip_driver_w ), worksiz )
674 IF( sresid.GT.thresh )
675 $
WRITE( nout, fmt = 9985 ) sresid
680 IF( ( sresid.LE.thresh ).AND.
681 $ ( (sresid-sresid).EQ.0.0d+0 ) )
THEN
696 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
698 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
703 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
709 nprocs_real = ( n-1 )/nb + 1
710 n_last = mod( n-1, nb ) + 1
715 nops = 2*(dble(n)*dble(bwl)*
717 $ (dble(n)*dble(bwl))
722 $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
730 nops2 = 2*( (dble(n_first)*
731 $ dble((bwl+bwu))*dble(bwu)))
733 IF ( nprocs_real .GT. 1)
THEN
739 $ 8*( (dble(n_last)*dble((bwl+bwu))
740 $ *dble((bwl+bwu))) )
743 IF ( nprocs_real .GT. 2)
THEN
747 nops2 = nops2 + (nprocs_real-2)*
748 $ 8*( (dble(nb)*dble((bwl+bwu))
749 $ *dble((bwl+bwu))) )
755 $ 2*( nprocs_real-1 ) *
756 $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
757 IF( nprocs_real .GT. 1 )
THEN
759 $ 2*( nprocs_real-2 ) *
760 $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
773 $ ( dble(bwl)+dble((bwl+bwu)))
775 IF ( nprocs_real .GT. 1 )
THEN
783 $ (dble(n_last)*(dble((bwl+bwu))+
784 $ dble((bwl+bwu))))*dble(nrhs)
787 IF ( nprocs_real .GT. 2 )
THEN
794 $ ( nprocs_real-2)*2*
795 $ ( (dble(nb)*(dble((bwl+bwu))+
796 $ dble((bwl+bwu))))*dble(nrhs) )
802 $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
803 IF( nprocs_real .GT. 1 )
THEN
805 $ nrhs*( nprocs_real-2 ) *
806 $ ( 6 * (bwl+bwu)*(bwl+bwu) )
815 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
817 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
822 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
824 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
829 IF( wtime( 2 ).GE.0.0d+0 )
830 $
WRITE( nout, fmt = 9993 )
'WALL', trans,
833 $ nb, nrhs, nprow, npcol,
834 $ wtime( 1 ), wtime( 2 ), tmflops,
839 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
841 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
846 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
848 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
853 IF( ctime( 2 ).GE.0.0d+0 )
854 $
WRITE( nout, fmt = 9993 )
'CPU ', trans,
857 $ nb, nrhs, nprow, npcol,
858 $ ctime( 1 ), ctime( 2 ), tmflops,
874 CALL blacs_gridexit( ictxt )
875 CALL blacs_gridexit( ictxtb )
885 ktests = kpass + kfail + kskip
886 WRITE( nout, fmt = * )
887 WRITE( nout, fmt = 9992 ) ktests
889 WRITE( nout, fmt = 9991 ) kpass
890 WRITE( nout, fmt = 9989 ) kfail
892 WRITE( nout, fmt = 9990 ) kpass
894 WRITE( nout, fmt = 9988 ) kskip
895 WRITE( nout, fmt = * )
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = 9987 )
898 IF( nout.NE.6 .AND. nout.NE.0 )
904 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
905 $
'; It should be at least 1' )
906 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
908 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
909 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
911 9995
FORMAT(
'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
912 $
'Slv Time MFLOPS MFLOP2 CHECK' )
913 9994
FORMAT(
'---- -- ------ --- --- ---- ----- ---- ---- -------- ',
914 $
'-------- -------- -------- ------' )
915 9993
FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
916 $ 1x,i4,1x,i4,1x,f9.3,
917 $ f9.4, f9.2, f9.2, 1x, a6 )
918 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
919 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
920 9990
FORMAT( i5,
' tests completed without checking.' )
921 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
922 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
923 9987
FORMAT(
'END OF TESTS.' )
924 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
925 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pdbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pddblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine pdgbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pdgbtrf(n, bwl, bwu, a, ja, desca, ipiv, af, laf, work, lwork, info)
subroutine pdgbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, ipiv, b, ib, descb, af, laf, work, lwork, info)
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)