74 parameter( totmem = 3000000 )
75 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76 $ lld_, mb_, m_, nb_, n_, rsrc_
77 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
82 INTEGER memsiz, ntests, realsz
84 parameter( realsz = 4,
85 $ memsiz = totmem / realsz, ntests = 20,
86 $ padval = -9923.0e+0, zero = 0.0e+0 )
88 parameter( int_one = 1 )
95 INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
96 $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
97 $ imidpad, info, ipa, ipb, ipostpad, iprepad,
98 $ ipw, ipw_size, ipw_solve, ipw_solve_size,
99 $ ip_driver_w, ip_fillin, j, k, kfail, kpass,
100 $ kskip, ktests, mycol, myrhs_size, myrow, n, nb,
101 $ nbw, ngrids, nmat, nnb, nnbr, nnr, nout, np,
102 $ npcol, nprocs, nprocs_real, nprow, nq, nrhs,
103 $ n_first, n_last, worksiz
104 REAL anorm, sresid, thresh
105 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108 INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
109 $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
110 $ nbrval( ntests ), nbval( ntests ),
111 $ nrval( ntests ), nval( ntests ),
112 $ pval( ntests ), qval( ntests )
114 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
131 INTRINSIC dble,
max,
min, mod
134 DATA kfail, kpass, kskip, ktests / 4*0 /
143 CALL blacs_pinfo( iam, nprocs )
147 CALL pspbinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
148 $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
149 $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
150 $ qval, ntests, thresh, mem, iam, nprocs )
152 check = ( thresh.GE.0.0e+0 )
157 WRITE( nout, fmt = * )
158 WRITE( nout, fmt = 9995 )
159 WRITE( nout, fmt = 9994 )
160 WRITE( nout, fmt = * )
173 IF( nprow.LT.1 )
THEN
175 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
177 ELSE IF( npcol.LT.1 )
THEN
179 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
181 ELSE IF( nprow*npcol.GT.nprocs )
THEN
183 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 IF( ierr( 1 ).GT.0 )
THEN
189 $
WRITE( nout, fmt = 9997 )
'grid'
196 CALL blacs_get( -1, 0, ictxt )
197 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
202 CALL blacs_get( -1, 0, ictxtb )
203 CALL blacs_gridinit( ictxtb,
'Column-major', npcol, nprow )
208 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
210 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
224 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
230 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
233 IF( ierr( 1 ).GT.0 )
THEN
235 $
WRITE( nout, fmt = 9997 )
'size'
241 DO 45 bw_num = 1, nbw
248 $
WRITE( nout, fmt = 9999 )
'Band',
'bw', bw
258 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
261 IF( ierr( 1 ).GT.0 )
THEN
272 nb =( (n-(npcol-1)*bw-1)/npcol + 1 )
281 IF( nb.LT.
min( 2*bw, n ) )
THEN
287 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
290 IF( ierr( 1 ).GT.0 )
THEN
297 np =
numroc( (bw+1), (bw+1),
299 nq =
numroc( n, nb, mycol, 0, npcol )
302 iprepad = ((bw+1)+10)
304 ipostpad = ((bw+1)+10)
315 $ ictxt,((bw+1)+10), ierr( 1 ) )
324 desca( 6 ) = ((bw+1)+10)
327 ierr_temp = ierr( 1 )
329 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
333 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
335 IF( ierr( 1 ).LT.0 )
THEN
337 $
WRITE( nout, fmt = 9997 )
'descriptor'
349 free_ptr = free_ptr + iprepad
352 free_ptr = free_ptr + desca2d( lld_ )*
369 free_ptr = free_ptr + iprepad
371 free_ptr = free_ptr + fillin_size
384 free_ptr = free_ptr + ipw_size
389 IF( free_ptr.GT.memsiz )
THEN
391 $
WRITE( nout, fmt = 9996 )
392 $
'divide and conquer factorization',
399 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
402 IF( ierr( 1 ).GT.0 )
THEN
404 $
WRITE( nout, fmt = 9997 )
'MEMORY'
410 worksiz =
max( ((bw+1)+10), nb )
418 worksiz =
max( worksiz, desca2d( nb_ ) )
421 worksiz =
max( worksiz,
422 $
max(5,
max(bw*(bw+2),nb))+2*nb )
425 free_ptr = free_ptr + iprepad
426 ip_driver_w = free_ptr
427 free_ptr = free_ptr + worksiz + ipostpad
433 IF( free_ptr.GT.memsiz )
THEN
435 $
WRITE( nout, fmt = 9996 )
'factorization',
436 $ ( free_ptr )*realsz
442 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
445 IF( ierr( 1 ).GT.0 )
THEN
447 $
WRITE( nout, fmt = 9997 )
'MEMORY'
452 CALL psbmatgen( ictxt, uplo,
'B', bw, bw, n, (bw+1), nb,
453 $ mem( ipa ), ((bw+1)+10), 0, 0, iaseed,
454 $ myrow, mycol, nprow, npcol )
456 CALL psfillpad( ictxt, np, nq, mem( ipa-iprepad ),
457 $ ((bw+1)+10), iprepad, ipostpad,
461 $ mem( ip_driver_w-iprepad ), worksiz,
462 $ iprepad, ipostpad, padval )
469 $ n, mem( ipa ), 1, 1,
470 $ desca2d, mem( ip_driver_w ) )
471 CALL pschekpad( ictxt,
'PSLANGE', np, nq,
472 $ mem( ipa-iprepad ), ((bw+1)+10),
473 $ iprepad, ipostpad, padval )
476 $ mem( ip_driver_w-iprepad ), worksiz,
477 $ iprepad, ipostpad, padval )
482 CALL blacs_barrier( ictxt,
'All' )
488 CALL pspbtrf( uplo, n, bw, mem( ipa ), 1, desca,
489 $ mem( ip_fillin ), fillin_size, mem( ipw ),
496 WRITE( nout, fmt = * )
'PSPBTRF INFO=', info
507 $ nq, mem( ipa-iprepad ), ((bw+1)+10),
508 $ iprepad, ipostpad, padval )
522 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
523 $ ictxtb, nb+10, ierr( 1 ) )
532 descb( 6 ) = descb2d( lld_ )
537 IF( ipb .GT. 0 )
THEN
541 free_ptr = free_ptr + iprepad
543 free_ptr = free_ptr + nrhs*descb2d( lld_ )
548 ipw_solve_size = (bw*nrhs)
551 free_ptr = free_ptr + ipw_solve_size
554 IF( free_ptr.GT.memsiz )
THEN
556 $
WRITE( nout, fmt = 9996 )
'solve',
557 $ ( free_ptr )*realsz
563 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
566 IF( ierr( 1 ).GT.0 )
THEN
568 $
WRITE( nout, fmt = 9997 )
'MEMORY'
573 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
578 $ descb2d( m_ ), descb2d( n_ ),
579 $ descb2d( mb_ ), descb2d( nb_ ),
581 $ descb2d( lld_ ), descb2d( rsrc_ ),
583 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
584 $ myrow, npcol, nprow )
588 $ mem( ipb-iprepad ),
593 $ mem( ip_driver_w-iprepad ),
599 CALL blacs_barrier( ictxt,
'All')
604 CALL pspbtrs( uplo, n, bw, nrhs, mem( ipa ), 1,
605 $ desca, mem( ipb ), 1, descb,
606 $ mem( ip_fillin ), fillin_size,
607 $ mem( ipw_solve ), ipw_solve_size,
614 $
WRITE( nout, fmt = * )
'PSPBTRS INFO=', info
626 $ mem( ip_driver_w-iprepad ),
635 $ mem( ipb ), 1, 1, descb2d,
636 $ iaseed, mem( ipa ), 1, 1, desca2d,
637 $ ibseed, anorm, sresid,
638 $ mem( ip_driver_w ), worksiz )
641 IF( sresid.GT.thresh )
642 $
WRITE( nout, fmt = 9985 ) sresid
647 IF( ( sresid.LE.thresh ).AND.
648 $ ( (sresid-sresid).EQ.0.0e+0 ) )
THEN
663 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
665 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
670 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
676 nprocs_real = ( n-1 )/nb + 1
677 n_last = mod( n-1, nb ) + 1
680 nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
681 $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
682 $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
683 $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
684 nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
685 $ *( -1.d0 /2.d0+dble(bw)
686 $ *( -1.d0 / 3.d0 ) ) ) +
687 $ dble(n)*( dble(bw) /
688 $ 2.d0*( 1.d0+dble(bw) ) )
691 $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
692 $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
693 $ ( dble(bw)*( 2*dble(n)-
694 $ ( dble(bw)+1.d0 ) ) )
701 nops2 = ( (dble(n_first))* dble(bw)**2 )
703 IF ( nprocs_real .GT. 1)
THEN
708 $ 4*( (dble(n_last)*dble(bw)**2) )
711 IF ( nprocs_real .GT. 2)
THEN
715 nops2 = nops2 + (nprocs_real-2)*
716 $ 4*( (dble(nb)*dble(bw)**2) )
722 $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
723 IF( nprocs_real .GT. 1 )
THEN
725 $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
732 $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
734 IF ( nprocs_real .GT. 1 )
THEN
739 $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
742 IF ( nprocs_real .GT. 2 )
THEN
747 $ ( nprocs_real-2)*2*
748 $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
754 $ nrhs*( nprocs_real-1 ) * ( bw*bw )
755 IF( nprocs_real .GT. 1 )
THEN
757 $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
766 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
THEN
768 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
773 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 )
THEN
775 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
780 IF( wtime( 2 ).GE.0.0d+0 )
781 $
WRITE( nout, fmt = 9993 )
'WALL', uplo,
784 $ nb, nrhs, nprow, npcol,
785 $ wtime( 1 ), wtime( 2 ), tmflops,
790 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
792 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
797 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
THEN
799 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
804 IF( ctime( 2 ).GE.0.0d+0 )
805 $
WRITE( nout, fmt = 9993 )
'CPU ', uplo,
808 $ nb, nrhs, nprow, npcol,
809 $ ctime( 1 ), ctime( 2 ), tmflops,
825 CALL blacs_gridexit( ictxt )
826 CALL blacs_gridexit( ictxtb )
836 ktests = kpass + kfail + kskip
837 WRITE( nout, fmt = * )
838 WRITE( nout, fmt = 9992 ) ktests
840 WRITE( nout, fmt = 9991 ) kpass
841 WRITE( nout, fmt = 9989 ) kfail
843 WRITE( nout, fmt = 9990 ) kpass
845 WRITE( nout, fmt = 9988 ) kskip
846 WRITE( nout, fmt = * )
847 WRITE( nout, fmt = * )
848 WRITE( nout, fmt = 9987 )
849 IF( nout.NE.6 .AND. nout.NE.0 )
855 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
856 $
'; It should be at least 1' )
857 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
859 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
860 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
862 9995
FORMAT(
'TIME UL N BW NB NRHS P Q L*U Time ',
863 $
'Slv Time MFLOPS MFLOP2 CHECK' )
864 9994
FORMAT(
'---- -- ------ --- ---- ----- -- ---- -------- ',
865 $
'-------- ------ ------ ------' )
866 9993
FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
868 $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
869 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
870 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
871 9990
FORMAT( i5,
' tests completed without checking.' )
872 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
873 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
874 9987
FORMAT(
'END OF TESTS.' )
875 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7 )
876 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine psbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
real function pslange(norm, m, n, a, ia, ja, desca, work)
subroutine pspbinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pspblaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pspbtrf(uplo, n, bw, a, ja, desca, af, laf, work, lwork, info)
subroutine pspbtrs(uplo, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)