75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER cplxsz, memsiz, ntests
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
90 parameter( int_one = 1 )
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
110 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
111 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
112 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
133 INTRINSIC dble,
max,
min, mod
136 DATA kfail, kpass, kskip, ktests / 4*0 /
145 CALL blacs_pinfo( iam, nprocs )
149 CALL pcdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154 check = ( thresh.GE.0.0e+0 )
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
175 IF( nprow.LT.1 )
THEN
177 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
179 ELSE IF( npcol.LT.1 )
THEN
181 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
183 ELSE IF( nprow*npcol.GT.nprocs )
THEN
185 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
189 IF( ierr( 1 ).GT.0 )
THEN
191 $
WRITE( nout, fmt = 9997 )
'grid'
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
226 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
232 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'size'
243 DO 45 bw_num = 1, nbw
247 bwl = bwlval( bw_num )
250 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
254 bwu = bwuval( bw_num )
257 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
261 IF( bwl.GT.n-1 )
THEN
267 IF( bwu.GT.n-1 )
THEN
275 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
278 IF( ierr( 1 ).GT.0 )
THEN
289 nb =( (n-(npcol-1)*
max(bwl,bwu)-1)/npcol + 1 )
291 nb =
max( nb, 2*
max(bwl,bwu) )
298 IF( nb.LT.
min( 2*
max(bwl,bwu), n ) )
THEN
304 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
307 IF( ierr( 1 ).GT.0 )
THEN
314 np =
numroc( (bwl+bwu+1), (bwl+bwu+1),
316 nq =
numroc( n, nb, mycol, 0, npcol )
319 iprepad = ((bwl+bwu+1)+10)
321 ipostpad = ((bwl+bwu+1)+10)
330 CALL descinit( desca2d, (bwl+bwu+1), n,
331 $ (bwl+bwu+1), nb, 0, 0,
332 $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
341 desca( 6 ) = ((bwl+bwu+1)+10)
344 ierr_temp = ierr( 1 )
346 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
350 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
352 IF( ierr( 1 ).LT.0 )
THEN
354 $
WRITE( nout, fmt = 9997 )
'descriptor'
366 free_ptr = free_ptr + iprepad
369 free_ptr = free_ptr + desca2d( lld_ )*
382 $ nb*(bwl+bwu)+6*
max(bwl,bwu)*
max(bwl,bwu)
386 free_ptr = free_ptr + iprepad
388 free_ptr = free_ptr + fillin_size
396 ipw_size =
max(bwl,bwu)*
max(bwl,bwu)
401 free_ptr = free_ptr + ipw_size
406 IF( free_ptr.GT.memsiz )
THEN
408 $
WRITE( nout, fmt = 9996 )
409 $
'divide and conquer factorization',
416 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
419 IF( ierr( 1 ).GT.0 )
THEN
421 $
WRITE( nout, fmt = 9997 )
'MEMORY'
427 worksiz =
max( ((bwl+bwu+1)+10), nb )
435 worksiz =
max( worksiz, desca2d( nb_ ) )
438 worksiz =
max( worksiz,
442 free_ptr = free_ptr + iprepad
443 ip_driver_w = free_ptr
444 free_ptr = free_ptr + worksiz + ipostpad
450 IF( free_ptr.GT.memsiz )
THEN
452 $
WRITE( nout, fmt = 9996 )
'factorization',
453 $ ( free_ptr )*cplxsz
459 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
462 IF( ierr( 1 ).GT.0 )
THEN
464 $
WRITE( nout, fmt = 9997 )
'MEMORY'
469 CALL pcbmatgen( ictxt,
'G',
'D', bwl, bwu, n,
470 $ (bwl+bwu+1), nb, mem( ipa ),
471 $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
472 $ mycol, nprow, npcol )
474 CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
475 $ ((bwl+bwu+1)+10), iprepad, ipostpad,
479 $ mem( ip_driver_w-iprepad ), worksiz,
480 $ iprepad, ipostpad, padval )
486 anorm =
pclange(
'1', (bwl+bwu+1),
487 $ n, mem( ipa ), 1, 1,
488 $ desca2d, mem( ip_driver_w ) )
489 CALL pcchekpad( ictxt,
'PCLANGE', np, nq,
490 $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
491 $ iprepad, ipostpad, padval )
494 $ mem( ip_driver_w-iprepad ), worksiz,
495 $ iprepad, ipostpad, padval )
500 CALL blacs_barrier( ictxt,
'All' )
506 CALL pcdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
507 $ mem( ip_fillin ), fillin_size, mem( ipw ),
514 WRITE( nout, fmt = * )
'PCDBTRF INFO=', info
525 $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
526 $ iprepad, ipostpad, padval )
540 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
541 $ ictxtb, nb+10, ierr( 1 ) )
550 descb( 6 ) = descb2d( lld_ )
555 IF( ipb .GT. 0 )
THEN
559 free_ptr = free_ptr + iprepad
561 free_ptr = free_ptr + nrhs*descb2d( lld_ )
566 ipw_solve_size = (
max(bwl,bwu)*nrhs)
569 free_ptr = free_ptr + ipw_solve_size
572 IF( free_ptr.GT.memsiz )
THEN
574 $
WRITE( nout, fmt = 9996 )
'solve',
575 $ ( free_ptr )*cplxsz
581 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
584 IF( ierr( 1 ).GT.0 )
THEN
586 $
WRITE( nout, fmt = 9997 )
'MEMORY'
591 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
596 $ descb2d( m_ ), descb2d( n_ ),
597 $ descb2d( mb_ ), descb2d( nb_ ),
599 $ descb2d( lld_ ), descb2d( rsrc_ ),
601 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
602 $ myrow, npcol, nprow )
606 $ mem( ipb-iprepad ),
611 $ mem( ip_driver_w-iprepad ),
617 CALL blacs_barrier( ictxt,
'All')
622 CALL pcdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
623 $ 1, desca, mem( ipb ), 1, descb,
624 $ mem( ip_fillin ), fillin_size,
625 $ mem( ipw_solve ), ipw_solve_size,
632 $
WRITE( nout, fmt = * )
'PCDBTRS INFO=', info
644 $ mem( ip_driver_w-iprepad ),
654 $ mem( ipb ), 1, 1, descb2d,
655 $ iaseed, mem( ipa ), 1, 1, desca2d,
656 $ ibseed, anorm, sresid,
657 $ mem( ip_driver_w ), worksiz )
660 IF( sresid.GT.thresh )
661 $
WRITE( nout, fmt = 9985 ) sresid
666 IF( ( sresid.LE.thresh ).AND.
667 $ ( (sresid-sresid).EQ.0.0e+0 ) )
THEN
682 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
684 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
689 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
695 nprocs_real = ( n-1 )/nb + 1
696 n_last = mod( n-1, nb ) + 1
701 nops = 2*(dble(n)*dble(bwl)*
703 $ (dble(n)*dble(bwl))
708 $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
713 nops = nops * dble(4)
720 nops2 = 2*( (dble(n_first)*
721 $ dble(bwl)*dble(bwu)))
723 IF ( nprocs_real .GT. 1)
THEN
729 $ 8*( (dble(n_last)*dble(bwl)
733 IF ( nprocs_real .GT. 2)
THEN
737 nops2 = nops2 + (nprocs_real-2)*
738 $ 8*( (dble(nb)*dble(bwl)
745 $ 2*( nprocs_real-1 ) *
747 IF( nprocs_real .GT. 1 )
THEN
749 $ 2*( nprocs_real-2 ) *
763 $ ( dble(bwl)+dble(bwu))
765 IF ( nprocs_real .GT. 1 )
THEN
773 $ (dble(n_last)*(dble(bwl)+
774 $ dble(bwu)))*dble(nrhs)
777 IF ( nprocs_real .GT. 2 )
THEN
784 $ ( nprocs_real-2)*2*
785 $ ( (dble(nb)*(dble(bwl)+
786 $ dble(bwu)))*dble(nrhs) )
792 $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
793 IF( nprocs_real .GT. 1 )
THEN
795 $ nrhs*( nprocs_real-2 ) *
802 nops2 = nops2 * dble(4)
809 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
811 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
816 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
818 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
823 IF( wtime( 2 ).GE.0.0d+0 )
824 $
WRITE( nout, fmt = 9993 )
'WALL', trans,
827 $ nb, nrhs, nprow, npcol,
828 $ wtime( 1 ), wtime( 2 ), tmflops,
833 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
835 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
840 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
842 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
847 IF( ctime( 2 ).GE.0.0d+0 )
848 $
WRITE( nout, fmt = 9993 )
'CPU ', trans,
851 $ nb, nrhs, nprow, npcol,
852 $ ctime( 1 ), ctime( 2 ), tmflops,
868 CALL blacs_gridexit( ictxt )
869 CALL blacs_gridexit( ictxtb )
879 ktests = kpass + kfail + kskip
880 WRITE( nout, fmt = * )
881 WRITE( nout, fmt = 9992 ) ktests
883 WRITE( nout, fmt = 9991 ) kpass
884 WRITE( nout, fmt = 9989 ) kfail
886 WRITE( nout, fmt = 9990 ) kpass
888 WRITE( nout, fmt = 9988 ) kskip
889 WRITE( nout, fmt = * )
890 WRITE( nout, fmt = * )
891 WRITE( nout, fmt = 9987 )
892 IF( nout.NE.6 .AND. nout.NE.0 )
898 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
899 $
'; It should be at least 1' )
900 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
902 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
903 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
905 9995
FORMAT(
'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
906 $
'Slv Time MFLOPS MFLOP2 CHECK' )
907 9994
FORMAT(
'---- -- ------ --- --- ---- ----- ---- ---- -------- ',
908 $
'-------- -------- -------- ------' )
909 9993
FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
910 $ 1x,i4,1x,i4,1x,f9.3,
911 $ f9.4, f9.2, f9.2, 1x, a6 )
912 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
913 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
914 9990
FORMAT( i5,
' tests completed without checking.' )
915 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
916 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
917 9987
FORMAT(
'END OF TESTS.' )
918 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
919 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pcdbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pcdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pcdbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
subroutine pcdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
real function pclange(norm, m, n, a, ia, ja, desca, work)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)