75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER memsiz, ntests, zplxsz
85 parameter( zplxsz = 16,
86 $ memsiz = totmem / zplxsz, ntests = 20,
87 $ padval = ( -9923.0d+0, -9923.0d+0 ),
90 parameter( int_one = 1 )
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
107 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
111 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114 $ nrval( ntests ), nval( ntests ),
115 $ pval( ntests ), qval( ntests )
116 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 COMPLEX*16 mem( memsiz )
120 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
134 INTRINSIC dble,
max,
min, mod
137 DATA kfail, kpass, kskip, ktests / 4*0 /
146 CALL blacs_pinfo( iam, nprocs )
150 CALL pzdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
155 check = ( thresh.GE.0.0d+0 )
160 WRITE( nout, fmt = * )
161 WRITE( nout, fmt = 9995 )
162 WRITE( nout, fmt = 9994 )
163 WRITE( nout, fmt = * )
176 IF( nprow.LT.1 )
THEN
178 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
180 ELSE IF( npcol.LT.1 )
THEN
182 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
184 ELSE IF( nprow*npcol.GT.nprocs )
THEN
186 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
190 IF( ierr( 1 ).GT.0 )
THEN
192 $
WRITE( nout, fmt = 9997 )
'grid'
199 CALL blacs_get( -1, 0, ictxt )
200 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
205 CALL blacs_get( -1, 0, ictxtb )
206 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
211 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
213 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
227 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
233 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
236 IF( ierr( 1 ).GT.0 )
THEN
238 $
WRITE( nout, fmt = 9997 )
'size'
244 DO 45 bw_num = 1, nbw
248 bwl = bwlval( bw_num )
251 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
255 bwu = bwuval( bw_num )
258 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
262 IF( bwl.GT.n-1 )
THEN
268 IF( bwu.GT.n-1 )
THEN
276 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
279 IF( ierr( 1 ).GT.0 )
THEN
290 nb =( (n-(npcol-1)*
max(bwl,bwu)-1)/npcol + 1 )
292 nb =
max( nb, 2*
max(bwl,bwu) )
299 IF( nb.LT.
min( 2*
max(bwl,bwu), n ) )
THEN
305 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
308 IF( ierr( 1 ).GT.0 )
THEN
315 np =
numroc( (bwl+bwu+1), (bwl+bwu+1),
317 nq =
numroc( n, nb, mycol, 0, npcol )
320 iprepad = ((bwl+bwu+1)+10)
322 ipostpad = ((bwl+bwu+1)+10)
331 CALL descinit( desca2d, (bwl+bwu+1), n,
332 $ (bwl+bwu+1), nb, 0, 0,
333 $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
342 desca( 6 ) = ((bwl+bwu+1)+10)
345 ierr_temp = ierr( 1 )
347 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
351 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
353 IF( ierr( 1 ).LT.0 )
THEN
355 $
WRITE( nout, fmt = 9997 )
'descriptor'
367 free_ptr = free_ptr + iprepad
370 free_ptr = free_ptr + desca2d( lld_ )*
383 $ nb*(bwl+bwu)+6*
max(bwl,bwu)*
max(bwl,bwu)
387 free_ptr = free_ptr + iprepad
389 free_ptr = free_ptr + fillin_size
397 ipw_size =
max(bwl,bwu)*
max(bwl,bwu)
402 free_ptr = free_ptr + ipw_size
407 IF( free_ptr.GT.memsiz )
THEN
409 $
WRITE( nout, fmt = 9996 )
410 $
'divide and conquer factorization',
417 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
420 IF( ierr( 1 ).GT.0 )
THEN
422 $
WRITE( nout, fmt = 9997 )
'MEMORY'
428 worksiz =
max( ((bwl+bwu+1)+10), nb )
436 worksiz =
max( worksiz, desca2d( nb_ ) )
439 worksiz =
max( worksiz,
443 free_ptr = free_ptr + iprepad
444 ip_driver_w = free_ptr
445 free_ptr = free_ptr + worksiz + ipostpad
451 IF( free_ptr.GT.memsiz )
THEN
453 $
WRITE( nout, fmt = 9996 )
'factorization',
454 $ ( free_ptr )*zplxsz
460 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
463 IF( ierr( 1 ).GT.0 )
THEN
465 $
WRITE( nout, fmt = 9997 )
'MEMORY'
470 CALL pzbmatgen( ictxt,
'G',
'D', bwl, bwu, n,
471 $ (bwl+bwu+1), nb, mem( ipa ),
472 $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
473 $ mycol, nprow, npcol )
475 CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
476 $ ((bwl+bwu+1)+10), iprepad, ipostpad,
480 $ mem( ip_driver_w-iprepad ), worksiz,
481 $ iprepad, ipostpad, padval )
487 anorm =
pzlange(
'1', (bwl+bwu+1),
488 $ n, mem( ipa ), 1, 1,
489 $ desca2d, mem( ip_driver_w ) )
490 CALL pzchekpad( ictxt,
'PZLANGE', np, nq,
491 $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
492 $ iprepad, ipostpad, padval )
495 $ mem( ip_driver_w-iprepad ), worksiz,
496 $ iprepad, ipostpad, padval )
501 CALL blacs_barrier( ictxt,
'All' )
507 CALL pzdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
508 $ mem( ip_fillin ), fillin_size, mem( ipw ),
515 WRITE( nout, fmt = * )
'PZDBTRF INFO=', info
526 $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
527 $ iprepad, ipostpad, padval )
541 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
542 $ ictxtb, nb+10, ierr( 1 ) )
551 descb( 6 ) = descb2d( lld_ )
556 IF( ipb .GT. 0 )
THEN
560 free_ptr = free_ptr + iprepad
562 free_ptr = free_ptr + nrhs*descb2d( lld_ )
567 ipw_solve_size = (
max(bwl,bwu)*nrhs)
570 free_ptr = free_ptr + ipw_solve_size
573 IF( free_ptr.GT.memsiz )
THEN
575 $
WRITE( nout, fmt = 9996 )
'solve',
576 $ ( free_ptr )*zplxsz
582 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
585 IF( ierr( 1 ).GT.0 )
THEN
587 $
WRITE( nout, fmt = 9997 )
'MEMORY'
592 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
597 $ descb2d( m_ ), descb2d( n_ ),
598 $ descb2d( mb_ ), descb2d( nb_ ),
600 $ descb2d( lld_ ), descb2d( rsrc_ ),
602 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
603 $ myrow, npcol, nprow )
607 $ mem( ipb-iprepad ),
612 $ mem( ip_driver_w-iprepad ),
618 CALL blacs_barrier( ictxt,
'All')
623 CALL pzdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
624 $ 1, desca, mem( ipb ), 1, descb,
625 $ mem( ip_fillin ), fillin_size,
626 $ mem( ipw_solve ), ipw_solve_size,
633 $
WRITE( nout, fmt = * )
'PZDBTRS INFO=', info
645 $ mem( ip_driver_w-iprepad ),
655 $ mem( ipb ), 1, 1, descb2d,
656 $ iaseed, mem( ipa ), 1, 1, desca2d,
657 $ ibseed, anorm, sresid,
658 $ mem( ip_driver_w ), worksiz )
661 IF( sresid.GT.thresh )
662 $
WRITE( nout, fmt = 9985 ) sresid
667 IF( ( sresid.LE.thresh ).AND.
668 $ ( (sresid-sresid).EQ.0.0d+0 ) )
THEN
683 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
685 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
690 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
696 nprocs_real = ( n-1 )/nb + 1
697 n_last = mod( n-1, nb ) + 1
702 nops = 2*(dble(n)*dble(bwl)*
704 $ (dble(n)*dble(bwl))
709 $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
714 nops = nops * dble(4)
721 nops2 = 2*( (dble(n_first)*
722 $ dble(bwl)*dble(bwu)))
724 IF ( nprocs_real .GT. 1)
THEN
730 $ 8*( (dble(n_last)*dble(bwl)
734 IF ( nprocs_real .GT. 2)
THEN
738 nops2 = nops2 + (nprocs_real-2)*
739 $ 8*( (dble(nb)*dble(bwl)
746 $ 2*( nprocs_real-1 ) *
748 IF( nprocs_real .GT. 1 )
THEN
750 $ 2*( nprocs_real-2 ) *
764 $ ( dble(bwl)+dble(bwu))
766 IF ( nprocs_real .GT. 1 )
THEN
774 $ (dble(n_last)*(dble(bwl)+
775 $ dble(bwu)))*dble(nrhs)
778 IF ( nprocs_real .GT. 2 )
THEN
785 $ ( nprocs_real-2)*2*
786 $ ( (dble(nb)*(dble(bwl)+
787 $ dble(bwu)))*dble(nrhs) )
793 $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
794 IF( nprocs_real .GT. 1 )
THEN
796 $ nrhs*( nprocs_real-2 ) *
803 nops2 = nops2 * dble(4)
810 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
812 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
817 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
819 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
824 IF( wtime( 2 ).GE.0.0d+0 )
825 $
WRITE( nout, fmt = 9993 )
'WALL', trans,
828 $ nb, nrhs, nprow, npcol,
829 $ wtime( 1 ), wtime( 2 ), tmflops,
834 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
836 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
841 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
843 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
848 IF( ctime( 2 ).GE.0.0d+0 )
849 $
WRITE( nout, fmt = 9993 )
'CPU ', trans,
852 $ nb, nrhs, nprow, npcol,
853 $ ctime( 1 ), ctime( 2 ), tmflops,
869 CALL blacs_gridexit( ictxt )
870 CALL blacs_gridexit( ictxtb )
880 ktests = kpass + kfail + kskip
881 WRITE( nout, fmt = * )
882 WRITE( nout, fmt = 9992 ) ktests
884 WRITE( nout, fmt = 9991 ) kpass
885 WRITE( nout, fmt = 9989 ) kfail
887 WRITE( nout, fmt = 9990 ) kpass
889 WRITE( nout, fmt = 9988 ) kskip
890 WRITE( nout, fmt = * )
891 WRITE( nout, fmt = * )
892 WRITE( nout, fmt = 9987 )
893 IF( nout.NE.6 .AND. nout.NE.0 )
899 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
900 $
'; It should be at least 1' )
901 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
903 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
904 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
906 9995
FORMAT(
'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
907 $
'Slv Time MFLOPS MFLOP2 CHECK' )
908 9994
FORMAT(
'---- -- ------ --- --- ---- ----- ---- ---- -------- ',
909 $
'-------- -------- -------- ------' )
910 9993
FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
911 $ 1x,i4,1x,i4,1x,f9.3,
912 $ f9.4, f9.2, f9.2, 1x, a6 )
913 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
914 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
915 9990
FORMAT( i5,
' tests completed without checking.' )
916 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
917 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
918 9987
FORMAT(
'END OF TESTS.' )
919 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
920 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pzbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pzdbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pzdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pzdbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
subroutine pzdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)