75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER cplxsz, memsiz, ntests
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
90 parameter( int_one = 1 )
97 INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
98 $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
99 $ imidpad, info, int_temp, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
110 INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
111 $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
112 $ nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
133 INTRINSIC dble,
max,
min, mod
136 DATA kfail, kpass, kskip, ktests / 4*0 /
145 CALL blacs_pinfo( iam, nprocs )
149 CALL pcptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
150 $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
151 $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
152 $ qval, ntests, thresh, mem, iam, nprocs )
154 check = ( thresh.GE.0.0e+0 )
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
175 IF( nprow.LT.1 )
THEN
177 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
179 ELSE IF( npcol.LT.1 )
THEN
181 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
183 ELSE IF( nprow*npcol.GT.nprocs )
THEN
185 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
189 IF( ierr( 1 ).GT.0 )
THEN
191 $
WRITE( nout, fmt = 9997 )
'grid'
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
226 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
232 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'size'
243 DO 45 bw_num = 1, nbw
250 $
WRITE( nout, fmt = 9999 )
'Band',
'bw', bw
260 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
263 IF( ierr( 1 ).GT.0 )
THEN
274 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
276 nb =
max( nb, 2*int_one )
283 IF( nb.LT.
min( 2*int_one, n ) )
THEN
289 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
292 IF( ierr( 1 ).GT.0 )
THEN
301 nq =
numroc( n, nb, mycol, 0, npcol )
317 $ ictxtb, nb+10, ierr( 1 ) )
326 desca( 6 ) = ((2)+10)
329 ierr_temp = ierr( 1 )
331 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
335 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
337 IF( ierr( 1 ).LT.0 )
THEN
339 $
WRITE( nout, fmt = 9997 )
'descriptor'
351 free_ptr = free_ptr + iprepad
354 free_ptr = free_ptr + (nb+10)*(2)
370 free_ptr = free_ptr + iprepad
372 free_ptr = free_ptr + fillin_size
385 free_ptr = free_ptr + ipw_size
390 IF( free_ptr.GT.memsiz )
THEN
392 $
WRITE( nout, fmt = 9996 )
393 $
'divide and conquer factorization',
400 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
403 IF( ierr( 1 ).GT.0 )
THEN
405 $
WRITE( nout, fmt = 9997 )
'MEMORY'
411 worksiz =
max( ((2)+10), nb )
419 worksiz =
max( worksiz, desca2d( nb_ ) )
422 worksiz =
max( worksiz,
426 free_ptr = free_ptr + iprepad
427 ip_driver_w = free_ptr
428 free_ptr = free_ptr + worksiz + ipostpad
434 IF( free_ptr.GT.memsiz )
THEN
436 $
WRITE( nout, fmt = 9996 )
'factorization',
437 $ ( free_ptr )*cplxsz
443 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
446 IF( ierr( 1 ).GT.0 )
THEN
448 $
WRITE( nout, fmt = 9997 )
'MEMORY'
453 CALL pcbmatgen( ictxt, uplo,
'T', bw, bw, n, (2), nb,
454 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
455 $ mycol, nprow, npcol )
456 CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),
457 $ nb+10, iprepad, ipostpad,
461 $ mem( ip_driver_w-iprepad ), worksiz,
462 $ iprepad, ipostpad, padval )
469 $ (2), mem( ipa ), 1, 1,
470 $ desca2d, mem( ip_driver_w ) )
471 CALL pcchekpad( ictxt,
'PCLANGE', nq, np,
472 $ mem( ipa-iprepad ), nb+10,
473 $ iprepad, ipostpad, padval )
476 $ mem( ip_driver_w-iprepad ), worksiz,
477 $ iprepad, ipostpad, padval )
480 IF(
lsame( uplo,
'L' ) )
THEN
483 int_temp = desca2d( lld_ )
489 DO 10 h=1,
numroc(n,nb,mycol,0,npcol)/2
490 mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
491 $ +mem( ipa+int_temp+2*h-1 )*( 0.0e+0, 1.0e+0 )
493 IF( 2*(
numroc(n,nb,mycol,0,npcol)/2).NE.
494 $
numroc(n,nb,mycol,0,npcol) )
THEN
495 h=
numroc(n,nb,mycol,0,npcol)/2+1
496 mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
501 CALL blacs_barrier( ictxt,
'All' )
507 CALL pcpttrf( n, mem( ipa+int_temp ),
508 $ mem( ipa+1*( nb+10-int_temp ) ), 1, desca,
509 $ mem( ip_fillin ), fillin_size, mem( ipw ),
516 WRITE( nout, fmt = * )
'PCPTTRF INFO=', info
527 $ np, mem( ipa-iprepad ), nb+10,
528 $ iprepad, ipostpad, padval )
542 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
543 $ ictxtb, nb+10, ierr( 1 ) )
552 descb( 6 ) = descb2d( lld_ )
557 IF( ipb .GT. 0 )
THEN
561 free_ptr = free_ptr + iprepad
563 free_ptr = free_ptr + nrhs*descb2d( lld_ )
568 ipw_solve_size = (10+2*
min(100,nrhs))*npcol+4*nrhs
571 free_ptr = free_ptr + ipw_solve_size
574 IF( free_ptr.GT.memsiz )
THEN
576 $
WRITE( nout, fmt = 9996 )
'solve',
577 $ ( free_ptr )*cplxsz
583 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
586 IF( ierr( 1 ).GT.0 )
THEN
588 $
WRITE( nout, fmt = 9997 )
'MEMORY'
593 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
598 $ descb2d( m_ ), descb2d( n_ ),
599 $ descb2d( mb_ ), descb2d( nb_ ),
601 $ descb2d( lld_ ), descb2d( rsrc_ ),
603 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
604 $ myrow, npcol, nprow )
608 $ mem( ipb-iprepad ),
613 $ mem( ip_driver_w-iprepad ),
619 CALL blacs_barrier( ictxt,
'All')
624 CALL pcpttrs( uplo, n, nrhs, mem( ipa+int_temp ),
625 $ mem( ipa+1*( nb+10-int_temp ) ), 1,
626 $ desca, mem( ipb ), 1, descb,
627 $ mem( ip_fillin ), fillin_size,
628 $ mem( ipw_solve ), ipw_solve_size,
635 $
WRITE( nout, fmt = * )
'PCPTTRS INFO=', info
647 $ mem( ip_driver_w-iprepad ),
660 $ ictxt, (2), ierr( 1 ) )
662 $ mem( ipb ), 1, 1, descb2d,
663 $ iaseed, mem( ipa ), 1, 1, desca2d,
664 $ ibseed, anorm, sresid,
665 $ mem( ip_driver_w ), worksiz )
668 IF( sresid.GT.thresh )
669 $
WRITE( nout, fmt = 9985 ) sresid
674 IF( ( sresid.LE.thresh ).AND.
675 $ ( (sresid-sresid).EQ.0.0e+0 ) )
THEN
690 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
692 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
697 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
703 nprocs_real = ( n-1 )/nb + 1
704 n_last = mod( n-1, nb ) + 1
707 nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
708 $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
709 $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
710 $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
711 nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
712 $ *( -1.d0 /2.d0+dble(bw)
713 $ *( -1.d0 / 3.d0 ) ) ) +
714 $ dble(n)*( dble(bw) /
715 $ 2.d0*( 1.d0+dble(bw) ) )
718 $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
719 $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
720 $ ( dble(bw)*( 2*dble(n)-
721 $ ( dble(bw)+1.d0 ) ) )
728 nops2 = ( (dble(n_first))* dble(bw)**2 )
730 IF ( nprocs_real .GT. 1)
THEN
735 $ 4*( (dble(n_last)*dble(bw)**2) )
738 IF ( nprocs_real .GT. 2)
THEN
742 nops2 = nops2 + (nprocs_real-2)*
743 $ 4*( (dble(nb)*dble(bw)**2) )
749 $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
750 IF( nprocs_real .GT. 1 )
THEN
752 $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
759 $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
761 IF ( nprocs_real .GT. 1 )
THEN
766 $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
769 IF ( nprocs_real .GT. 2 )
THEN
774 $ ( nprocs_real-2)*2*
775 $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
781 $ nrhs*( nprocs_real-1 ) * ( bw*bw )
782 IF( nprocs_real .GT. 1 )
THEN
784 $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
790 nops2 = nops2 * dble(4)
797 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
799 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
804 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
806 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
811 IF( wtime( 2 ).GE.0.0d+0 )
812 $
WRITE( nout, fmt = 9993 )
'WALL', uplo,
815 $ nb, nrhs, nprow, npcol,
816 $ wtime( 1 ), wtime( 2 ), tmflops,
821 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
823 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
828 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
830 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
835 IF( ctime( 2 ).GE.0.0d+0 )
836 $
WRITE( nout, fmt = 9993 )
'CPU ', uplo,
839 $ nb, nrhs, nprow, npcol,
840 $ ctime( 1 ), ctime( 2 ), tmflops,
856 CALL blacs_gridexit( ictxt )
857 CALL blacs_gridexit( ictxtb )
867 ktests = kpass + kfail + kskip
868 WRITE( nout, fmt = * )
869 WRITE( nout, fmt = 9992 ) ktests
871 WRITE( nout, fmt = 9991 ) kpass
872 WRITE( nout, fmt = 9989 ) kfail
874 WRITE( nout, fmt = 9990 ) kpass
876 WRITE( nout, fmt = 9988 ) kskip
877 WRITE( nout, fmt = * )
878 WRITE( nout, fmt = * )
879 WRITE( nout, fmt = 9987 )
880 IF( nout.NE.6 .AND. nout.NE.0 )
886 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
887 $
'; It should be at least 1' )
888 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
890 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
891 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
893 9995
FORMAT(
'TIME UL N BW NB NRHS P Q L*U Time ',
894 $
'Slv Time MFLOPS MFLOP2 CHECK' )
895 9994
FORMAT(
'---- -- ------ --- ---- ----- -- ---- -------- ',
896 $
'-------- ------ ------ ------' )
897 9993
FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
899 $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
900 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
901 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
902 9990
FORMAT( i5,
' tests completed without checking.' )
903 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
904 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
905 9987
FORMAT(
'END OF TESTS.' )
906 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
907 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
real function pclange(norm, m, n, a, ia, ja, desca, work)
subroutine pcptinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pcptlaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pcpttrf(n, d, e, ja, desca, af, laf, work, lwork, info)
subroutine pcpttrs(uplo, n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)