69 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70 $ lld_, mb_, m_, nb_, n_, rsrc_
71 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74 INTEGER dblesz, intgsz, memsiz, ntests, totmem
75 DOUBLE PRECISION padval, zero
76 parameter( dblesz = 8, intgsz = 4, totmem = 4000000,
77 $ memsiz = totmem / dblesz, ntests = 20,
78 $ padval = -9923.0d+0, zero = 0.0d+0 )
84 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88 $ lipiv, liwork, lwork, lw2, m, maxmn,
89 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90 $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91 $ nprocs, nprow, nq, nrhs, worksiz
93 DOUBLE PRECISION anorm, anorm1, fresid, nops, rcond,
94 $ sresid, sresid2, tmflops
97 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98 $ mval( ntests ), nbrval( ntests ),
99 $ nbval( ntests ), nrval( ntests ),
100 $ nval( ntests ), pval( ntests ),
102 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
105 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
106 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
122 DATA kfail, kpass, kskip, ktests / 4*0 /
128 CALL blacs_pinfo( iam, nprocs )
131 CALL pdluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
132 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
133 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
134 $ est, mem, iam, nprocs )
135 check = ( thresh.GE.0.0e+0 )
140 WRITE( nout, fmt = * )
141 WRITE( nout, fmt = 9995 )
142 WRITE( nout, fmt = 9994 )
143 WRITE( nout, fmt = * )
156 IF( nprow.LT.1 )
THEN
158 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
160 ELSE IF( npcol.LT.1 )
THEN
162 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
164 ELSE IF( nprow*npcol.GT.nprocs )
THEN
166 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
170 IF( ierr( 1 ).GT.0 )
THEN
172 $
WRITE( nout, fmt = 9997 )
'grid'
179 CALL blacs_get( -1, 0, ictxt )
180 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
181 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
186 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
199 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'M', m
201 ELSE IF( n.LT.1 )
THEN
203 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
209 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
211 IF( ierr( 1 ).GT.0 )
THEN
213 $
WRITE( nout, fmt = 9997 )
'matrix'
228 $
WRITE( nout, fmt = 9999 )
'NB',
'NB', nb
233 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'NB'
244 mp =
numroc( m, nb, myrow, 0, nprow )
245 np =
numroc( n, nb, myrow, 0, nprow )
246 nq =
numroc( n, nb, mycol, 0, npcol )
248 iprepad =
max( nb, mp )
250 ipostpad =
max( nb, nq )
259 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
260 $
max( 1, mp )+imidpad, ierr( 1 ) )
264 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
266 IF( ierr( 1 ).LT.0 )
THEN
268 $
WRITE( nout, fmt = 9997 )
'descriptor'
277 IF( est .AND. m.EQ.n )
THEN
278 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
279 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
281 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
283 lipiv =
iceil( intgsz*( mp+nb ), dblesz )
284 ipw = ippiv + lipiv + ipostpad + iprepad
292 worksiz =
max( 2, nq )
294 worksiz =
max( worksiz, mp*desca( nb_ )+
297 worksiz =
max( worksiz, mp * desca( nb_ ) )
299 worksiz = worksiz + ipostpad
310 IF( ipw+worksiz.GT.memsiz )
THEN
312 $
WRITE( nout, fmt = 9996 )
'factorization',
313 $ ( ipw+worksiz )*dblesz
319 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
321 IF( ierr( 1 ).GT.0 )
THEN
323 $
WRITE( nout, fmt = 9997 )
'MEMORY'
330 CALL pdmatgen( ictxt,
'No transpose',
'No transpose',
331 $ desca( m_ ), desca( n_ ), desca( mb_ ),
332 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
333 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
334 $ mp, 0, nq, myrow, mycol, nprow, npcol )
339 CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
340 $ desca( lld_ ), iprepad, ipostpad,
342 CALL pdfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
343 $ lipiv, iprepad, ipostpad, padval )
344 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
345 $ mem( ipw-iprepad ), worksiz-ipostpad,
346 $ iprepad, ipostpad, padval )
347 anorm =
pdlange(
'I', m, n, mem( ipa ), 1, 1, desca,
349 anorm1 =
pdlange(
'1', m, n, mem( ipa ), 1, 1, desca,
351 CALL pdchekpad( ictxt,
'PDLANGE', mp, nq,
352 $ mem( ipa-iprepad ), desca( lld_ ),
353 $ iprepad, ipostpad, padval )
354 CALL pdchekpad( ictxt,
'PDLANGE', worksiz-ipostpad,
355 $ 1, mem( ipw-iprepad ),
356 $ worksiz-ipostpad, iprepad, ipostpad,
360 IF( est .AND. m.EQ.n )
THEN
361 CALL pdmatgen( ictxt,
'No transpose',
'No transpose',
362 $ desca( m_ ), desca( n_ ), desca( mb_ ),
363 $ desca( nb_ ), mem( ipa0 ),
364 $ desca( lld_ ), desca( rsrc_ ),
365 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
366 $ myrow, mycol, nprow, npcol )
368 $
CALL pdfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
369 $ desca( lld_ ), iprepad, ipostpad,
374 CALL blacs_barrier( ictxt,
'All' )
379 CALL pdgetrf( m, n, mem( ipa ), 1, 1, desca,
380 $ mem( ippiv ), info )
386 $
WRITE( nout, fmt = * )
'PDGETRF INFO=', info
396 CALL pdchekpad( ictxt,
'PDGETRF', mp, nq,
397 $ mem( ipa-iprepad ), desca( lld_ ),
398 $ iprepad, ipostpad, padval )
399 CALL pdchekpad( ictxt,
'PDGETRF', lipiv, 1,
400 $ mem( ippiv-iprepad ), lipiv, iprepad,
415 CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,
416 $ mem( ippiv ), mem( ipw ) )
417 CALL pdlafchk(
'No',
'No', m, n, mem( ipa ), 1, 1,
418 $ desca, iaseed, anorm, fresid,
423 CALL pdchekpad( ictxt,
'PDGETRRV', mp, nq,
424 $ mem( ipa-iprepad ), desca( lld_ ),
425 $ iprepad, ipostpad, padval )
426 CALL pdchekpad( ictxt,
'PDGETRRV', lipiv, 1,
427 $ mem( ippiv-iprepad ), lipiv,
428 $ iprepad, ipostpad, padval )
430 $ worksiz-ipostpad, 1,
431 $ mem( ipw-iprepad ),
432 $ worksiz-ipostpad, iprepad,
437 IF( ( fresid.LE.thresh ) .AND.
438 $ ( (fresid-fresid).EQ.0.0d+0 ) )
THEN
444 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
445 $
WRITE( nout, fmt = 9986 ) fresid
453 fresid = fresid - fresid
460 CALL slcombine( ictxt,
'All',
'>',
'W', 1, 1,
462 CALL slcombine( ictxt,
'All',
'>',
'C', 1, 1,
467 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
475 nops = dble( maxmn )*( dble( minmn )**2 ) -
476 $ (1.0d+0 / 3.0d+0)*( dble( minmn )**3 ) -
477 $ (1.0d+0 / 2.0d+0)*( dble( minmn )**2 )
484 IF( wtime( 1 ).GT.0.0d+0 )
THEN
485 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
491 IF( wtime( 1 ).GE.0.0d+0 )
492 $
WRITE( nout, fmt = 9993 )
'WALL', m, n, nb,
493 $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
494 $ wtime( 2 ), tmflops, passed
498 IF( ctime( 1 ).GT.0.0d+0 )
THEN
499 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
505 IF( ctime( 1 ).GE.0.0d+0 )
506 $
WRITE( nout, fmt = 9993 )
'CPU ', m, n, nb,
507 $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
508 $ ctime( 2 ), tmflops, passed
519 lwork =
max( 1, 2*np ) +
max( 1, 2*nq ) +
520 $
max( 2, desca( nb_ )*
521 $
max( 1,
iceil( nprow-1, npcol ) ),
523 $
max( 1,
iceil( npcol-1, nprow ) ) )
524 ipw2 = ipw + lwork + ipostpad + iprepad
525 liwork =
max( 1, np )
526 lw2 =
iceil( liwork*intgsz, dblesz ) + ipostpad
529 IF( ipw2+lw2.GT.memsiz )
THEN
531 $
WRITE( nout, fmt = 9996 )
'cond est',
532 $ ( ipw2+lw2 )*dblesz
538 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
541 IF( ierr( 1 ).GT.0 )
THEN
543 $
WRITE( nout, fmt = 9997 )
'MEMORY'
550 $ mem( ipw-iprepad ), lwork,
551 $ iprepad, ipostpad, padval )
553 $ mem( ipw2-iprepad ),
554 $ lw2-ipostpad, iprepad,
560 CALL pdgecon(
'1', n, mem( ipa ), 1, 1, desca,
561 $ anorm1, rcond, mem( ipw ), lwork,
562 $ mem( ipw2 ), liwork, info )
565 CALL pdchekpad( ictxt,
'PDGECON', np, nq,
566 $ mem( ipa-iprepad ),
567 $ desca( lld_ ), iprepad,
569 CALL pdchekpad( ictxt,
'PDGECON', lwork, 1,
570 $ mem( ipw-iprepad ), lwork,
571 $ iprepad, ipostpad, padval )
574 $ mem( ipw2-iprepad ),
575 $ lw2-ipostpad, iprepad,
592 CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
593 $ ictxt,
max( 1, np )+imidpad,
598 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
601 IF( ierr( 1 ).LT.0 )
THEN
603 $
WRITE( nout, fmt = 9997 )
'descriptor'
610 myrhs =
numroc( descb( n_ ), descb( nb_ ),
611 $ mycol, descb( csrc_ ), npcol )
615 ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
617 ipferr = ipb0 + descb( lld_ )*myrhs +
619 ipberr = myrhs + ipferr + ipostpad + iprepad
620 ipw = myrhs + ipberr + ipostpad + iprepad
622 ipw = ipb + descb( lld_ )*myrhs + ipostpad +
630 lcm =
ilcm( nprow, npcol )
632 worksiz =
max( worksiz-ipostpad,
633 $ nq * nbrhs + np * nbrhs +
634 $
max(
max( nq*nb, 2*nbrhs ),
637 worksiz = ipostpad + worksiz
643 IF( ipw+worksiz.GT.memsiz )
THEN
645 $
WRITE( nout, fmt = 9996 )
'solve',
646 $ ( ipw+worksiz )*dblesz
652 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
655 IF( ierr( 1 ).GT.0 )
THEN
657 $
WRITE( nout, fmt = 9997 )
'MEMORY'
664 CALL pdmatgen( ictxt,
'No',
'No', descb( m_ ),
665 $ descb( n_ ), descb( mb_ ),
666 $ descb( nb_ ), mem( ipb ),
667 $ descb( lld_ ), descb( rsrc_ ),
668 $ descb( csrc_ ), ibseed, 0, np, 0,
669 $ myrhs, myrow, mycol, nprow,
674 $ mem( ipb-iprepad ),
675 $ descb( lld_ ), iprepad,
680 $ descb( m_ ), descb( n_ ),
681 $ descb( mb_ ), descb( nb_ ),
682 $ mem( ipb0 ), descb( lld_ ),
684 $ descb( csrc_ ), ibseed, 0, np,
685 $ 0, myrhs, myrow, mycol, nprow,
689 $ mem( ipb0-iprepad ),
690 $ descb( lld_ ), iprepad,
693 $ mem( ipferr-iprepad ), 1,
697 $ mem( ipberr-iprepad ), 1,
703 CALL blacs_barrier( ictxt,
'All' )
708 CALL pdgetrs(
'No', n, nrhs, mem( ipa ), 1, 1,
709 $ desca, mem( ippiv ), mem( ipb ),
710 $ 1, 1, descb, info )
718 CALL pdchekpad( ictxt,
'PDGETRS', np, nq,
719 $ mem( ipa-iprepad ),
720 $ desca( lld_ ), iprepad,
722 CALL pdchekpad( ictxt,
'PDGETRS', lipiv, 1,
723 $ mem( ippiv-iprepad ), lipiv,
724 $ iprepad, ipostpad, padval )
726 $ myrhs, mem( ipb-iprepad ),
727 $ descb( lld_ ), iprepad,
731 $ 1, mem( ipw-iprepad ),
732 $ worksiz-ipostpad, iprepad,
738 $ mem( ipb ), 1, 1, descb,
739 $ iaseed, 1, 1, desca, ibseed,
740 $ anorm, sresid, mem( ipw ) )
742 IF( iam.EQ.0 .AND. sresid.GT.thresh )
743 $
WRITE( nout, fmt = 9985 ) sresid
748 $ myrhs, mem( ipb-iprepad ),
749 $ descb( lld_ ), iprepad,
752 $ worksiz-ipostpad, 1,
753 $ mem( ipw-iprepad ),
755 $ iprepad, ipostpad, padval )
759 IF( sresid.LE.thresh .AND.
760 $ ( sresid-sresid ).EQ.0.0d+0 )
THEN
769 sresid = sresid - sresid
777 lwork =
max( 1, 3*np )
778 ipw2 = ipw + lwork + ipostpad + iprepad
779 liwork =
max( 1, np )
780 lw2 =
iceil( liwork*intgsz, dblesz ) +
784 IF( ipw2+lw2.GT.memsiz )
THEN
786 $
WRITE( nout, fmt = 9996 )
787 $
'iter ref', ( ipw2+lw2 )*dblesz
793 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
796 IF( ierr( 1 ).GT.0 )
THEN
798 $
WRITE( nout, fmt = 9997 )
806 $ mem( ipw-iprepad ),
807 $ lwork, iprepad, ipostpad,
810 $ mem( ipw2-iprepad ),
811 $ lw2-ipostpad, iprepad,
818 CALL pdgerfs(
'No', n, nrhs, mem( ipa0 ), 1,
819 $ 1, desca, mem( ipa ), 1, 1,
820 $ desca, mem( ippiv ),
821 $ mem( ipb0 ), 1, 1, descb,
822 $ mem( ipb ), 1, 1, descb,
823 $ mem( ipferr ), mem( ipberr ),
824 $ mem( ipw ), lwork, mem( ipw2 ),
829 $ nq, mem( ipa0-iprepad ),
830 $ desca( lld_ ), iprepad,
833 $ nq, mem( ipa-iprepad ),
834 $ desca( lld_ ), iprepad,
837 $ 1, mem( ippiv-iprepad ),
841 $ myrhs, mem( ipb-iprepad ),
842 $ descb( lld_ ), iprepad,
846 $ mem( ipb0-iprepad ),
847 $ descb( lld_ ), iprepad,
851 $ mem( ipferr-iprepad ), 1,
856 $ mem( ipberr-iprepad ), 1,
860 $ 1, mem( ipw-iprepad ),
861 $ lwork, iprepad, ipostpad,
865 $ mem( ipw2-iprepad ),
866 $ lw2-ipostpad, iprepad,
870 $ 1, mem( ipw-iprepad ),
871 $ worksiz-ipostpad, iprepad,
877 $ mem( ipb ), 1, 1, descb,
878 $ iaseed, 1, 1, desca,
879 $ ibseed, anorm, sresid2,
882 IF( iam.EQ.0 .AND. sresid2.GT.thresh )
883 $
WRITE( nout, fmt = 9985 ) sresid2
888 $ myrhs, mem( ipb-iprepad ),
889 $ descb( lld_ ), iprepad,
892 $ worksiz-ipostpad, 1,
893 $ mem( ipw-iprepad ),
894 $ worksiz-ipostpad, iprepad,
901 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
903 CALL slcombine( ictxt,
'All',
'>',
'C', 2, 1,
908 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
912 nops = (2.0d+0/3.0d+0)*( dble(n)**3 ) -
913 $ (1.0d+0/2.0d+0)*( dble(n)**2 )
917 nops = nops + 2.0d+0*(dble(n)**2)*dble(nrhs)
925 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
928 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
935 IF( wtime( 2 ).GE.0.0d+0 )
936 $
WRITE( nout, fmt = 9993 )
'WALL', m, n,
937 $ nb, nrhs, nbrhs, nprow, npcol,
938 $ wtime( 1 ), wtime( 2 ), tmflops,
943 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
946 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
951 IF( ctime( 2 ).GE.0.0d+0 )
952 $
WRITE( nout, fmt = 9993 )
'CPU ', m, n,
953 $ nb, nrhs, nbrhs, nprow, npcol,
954 $ ctime( 1 ), ctime( 2 ), tmflops,
960 IF( check.AND.( sresid.GT.thresh ) )
THEN
964 CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,
965 $ mem( ippiv ), mem( ipw ) )
966 CALL pdlafchk(
'No',
'No', m, n, mem( ipa ), 1,
967 $ 1, desca, iaseed, anorm, fresid,
972 CALL pdchekpad( ictxt,
'PDGETRRV', np, nq,
973 $ mem( ipa-iprepad ), desca( lld_ ),
974 $ iprepad, ipostpad, padval )
975 CALL pdchekpad( ictxt,
'PDGETRRV', lipiv,
976 $ 1, mem( ippiv-iprepad ), lipiv,
977 $ iprepad, ipostpad, padval )
979 $ worksiz-ipostpad, 1,
980 $ mem( ipw-iprepad ),
981 $ worksiz-ipostpad, iprepad,
984 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
985 $
WRITE( nout, fmt = 9986 ) fresid
990 CALL blacs_gridexit( ictxt )
997 ktests = kpass + kfail + kskip
998 WRITE( nout, fmt = * )
999 WRITE( nout, fmt = 9992 ) ktests
1001 WRITE( nout, fmt = 9991 ) kpass
1002 WRITE( nout, fmt = 9989 ) kfail
1004 WRITE( nout, fmt = 9990 ) kpass
1006 WRITE( nout, fmt = 9988 ) kskip
1007 WRITE( nout, fmt = * )
1008 WRITE( nout, fmt = * )
1009 WRITE( nout, fmt = 9987 )
1010 IF( nout.NE.6 .AND. nout.NE.0 )
1014 CALL blacs_exit( 0 )
1016 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
1017 $
'; It should be at least 1' )
1018 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
1020 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
1021 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
1023 9995
FORMAT(
'TIME M N NB NRHS NBRHS P Q LU Time ',
1024 $
'Sol Time MFLOPS CHECK' )
1025 9994
FORMAT(
'---- ----- ----- --- ---- ----- ---- ---- -------- ',
1026 $
'-------- -------- ------' )
1027 9993
FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1028 $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1029 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
1030 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
1031 9990
FORMAT( i5,
' tests completed without checking.' )
1032 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
1033 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
1034 9987
FORMAT(
'END OF TESTS.' )
1035 9986
FORMAT(
'||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1036 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )