63 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64 $ lld_, mb_, m_, nb_, n_, rsrc_
65 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68 INTEGER dblesz, memsiz, ntests, totmem
69 DOUBLE PRECISION padval
70 DOUBLE PRECISION one, zero
71 parameter( dblesz = 8, totmem = 2000000,
72 $ memsiz = totmem / dblesz, ntests = 20,
73 $ padval = -9923.0d+0 )
74 parameter( one = 1.0d+0, zero = 0.0d+0 )
81 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
82 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83 $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
84 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
90 DOUBLE PRECISION addfac, adds, anorm, bnorm, mulfac, mults,
91 $ nops, sresid, tmflops
94 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
95 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
96 $ nbrval( ntests ), nbval( ntests ),
97 $ nrval( ntests ), nval( ntests ),
98 $ pval( ntests ), qval( ntests )
99 DOUBLE PRECISION ctime( 1 ), mem( memsiz ), result( 2 ),
103 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
104 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121 DATA ktests, kpass, kfail, kskip / 4*0 /
127 CALL blacs_pinfo( iam, nprocs )
131 CALL pdlsinfo( outfile, nout, nmat, mval, ntests, nval,
132 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
133 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
134 $ ntests, thresh, mem, iam, nprocs )
135 check = ( thresh.GE.0.0e+0 )
140 WRITE( nout, fmt = * )
141 WRITE( nout, fmt = 9995 )
142 WRITE( nout, fmt = 9994 )
143 WRITE( nout, fmt = * )
156 IF( nprow.LT.1 )
THEN
158 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
160 ELSE IF( npcol.LT.1 )
THEN
162 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
164 ELSE IF( nprow*npcol.GT.nprocs )
THEN
166 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
170 IF( ierr( 1 ).GT.0 )
THEN
172 $
WRITE( nout, fmt = 9997 )
'grid'
179 CALL blacs_get( -1, 0, ictxt )
180 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
181 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
185 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
198 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'M', m
200 ELSE IF( n.LT.1 )
THEN
202 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
208 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
210 IF( ierr( 1 ).GT.0 )
THEN
212 $
WRITE( nout, fmt = 9997 )
'matrix'
229 $
WRITE( nout, fmt = 9999 )
'NB',
'NB', nb
234 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
236 IF( ierr( 1 ).GT.0 )
THEN
238 $
WRITE( nout, fmt = 9997 )
'NB'
245 mp =
numroc( m, nb, myrow, 0, nprow )
246 mq =
numroc( m, nb, mycol, 0, npcol )
247 np =
numroc( n, nb, myrow, 0, nprow )
249 nq =
numroc( n, nb, mycol, 0, npcol )
252 iprepad =
max( nb, mp )
254 ipostpad =
max( nb, nq )
263 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
264 $
max( 1, mp ) + imidpad, ierr( 1 ) )
268 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
270 IF( ierr( 1 ).LT.0 )
THEN
272 $
WRITE( nout, fmt = 9997 )
'descriptor'
285 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
288 worksiz = nq + ipostpad
293 IF( ( ipw+worksiz ).GT.memsiz )
THEN
295 $
WRITE( nout, fmt = 9996 )
'MEMORY',
296 $ ( ipx+worksiz )*dblesz
302 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1,
305 IF( ierr( 1 ).GT.0 )
THEN
307 $
WRITE( nout, fmt = 9997 )
'MEMORY'
313 CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
314 $ desca( lld_ ), iprepad, ipostpad,
316 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
317 $ mem( ipw-iprepad ),
318 $ worksiz-ipostpad, iprepad,
324 CALL pdqrt13( iscale, m, n, mem( ipa ), 1, 1,
325 $ desca, anorm, iaseed, mem( ipw ) )
328 CALL pdchekpad( ictxt,
'PDQRT13', mp, nq,
329 $ mem( ipa-iprepad ), desca( lld_ ),
330 $ iprepad, ipostpad, padval )
332 $ worksiz-ipostpad, 1,
333 $ mem( ipw-iprepad ),
334 $ worksiz-ipostpad, iprepad,
340 IF( itran.EQ.1 )
THEN
362 nrhsp =
numroc( nrhs, nbrhs, myrow, 0,
364 nrhsq =
numroc( nrhs, nbrhs, mycol, 0,
370 $ nbrhs, 0, 0, ictxt,
371 $
max( 1, mnp ) + imidpad,
374 CALL descinit( descw, m, nrhs, nb, nbrhs,
375 $ 0, 0, ictxt,
max( 1, mp ) +
376 $ imidpad, ierr( 2 ) )
378 CALL descinit( descw, n, nrhs, nb, nbrhs,
379 $ 0, 0, ictxt,
max( 1, np ) +
380 $ imidpad, ierr( 2 ) )
385 CALL igsum2d( ictxt,
'All',
' ', 2, 1, ierr,
388 IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 )
THEN
390 $
WRITE( nout, fmt = 9997 )
'descriptor'
397 ipx = ipa + desca( lld_ )*nq + ipostpad +
399 ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
401 worksiz = descw( lld_ )*nrhsq + ipostpad
404 IF( ipw+worksiz.GT.memsiz )
THEN
406 $
WRITE( nout, fmt = 9996 )
'Generation',
407 $ ( ipw+worksiz )*dblesz
413 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
416 IF( ierr( 1 ).GT.0 )
THEN
418 $
WRITE( nout, fmt = 9997 )
'MEMORY'
427 $ descw( m_ ), descw( n_ ),
428 $ descw( mb_ ), descw( nb_ ),
429 $ mem( ipw ), descw( lld_ ),
431 $ descw( csrc_ ), ibseed, 0,
432 $ mp, 0, nrhsq, myrow, mycol,
436 $ descw( m_ ), descw( n_ ),
437 $ descw( mb_ ), descw( nb_ ),
438 $ mem( ipw ), descw( lld_ ),
440 $ descw( csrc_ ), ibseed, 0,
441 $ np, 0, nrhsq, myrow, mycol,
447 $ mem( ipx-iprepad ),
448 $ descx( lld_ ), iprepad,
452 $ mem( ipw-iprepad ),
453 $ descw( lld_ ), iprepad,
457 $ mem( ipw-iprepad ),
458 $ descw( lld_ ), iprepad,
464 CALL pdnrm2( ncols, bnorm, mem( ipw ), 1,
467 $
CALL pdscal( ncols, one / bnorm,
468 $ mem( ipw ), 1, jj, descw,
472 CALL pdgemm( trans,
'N', nrows, nrhs, ncols,
473 $ one, mem( ipa ), 1, 1, desca,
474 $ mem( ipw ), 1, 1, descw, zero,
475 $ mem( ipx ), 1, 1, descx )
482 $ nq, mem( ipa-iprepad ),
483 $ desca( lld_ ), iprepad,
485 CALL pdchekpad( ictxt,
'Generation', mnp,
486 $ nrhsq, mem( ipx-iprepad ),
487 $ descx( lld_ ), iprepad,
492 $ mem( ipw-iprepad ),
493 $ descw( lld_ ), iprepad,
498 $ mem( ipw-iprepad ),
499 $ descw( lld_ ), iprepad,
509 $ nbrhs, 0, 0, ictxt,
510 $
max( 1, np ) + imidpad,
514 $ nbrhs, 0, 0, ictxt,
515 $
max( 1, mp ) + imidpad,
521 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
524 IF( ierr( 1 ).LT.0 )
THEN
526 $
WRITE( nout, fmt = 9997 )
532 ipw = ipb + descb( lld_ )*nrhsq +
542 lwf = nb * ( mp + nq + nb )
543 lws =
max( ( nb*( nb - 1 ) ) / 2,
544 $ ( mp + nrhsq ) * nb ) + nb*nb
546 lcm =
ilcm( nprow, npcol )
550 lwf = nb * ( mp + nq + nb )
551 lws =
max( ( nb*( nb - 1 ) ) / 2, ( np +
553 $ 0, nprow ), nb, 0, 0, lcmp ),
554 $ nrhsq ) ) * nb ) + nb*nb
557 lwork = ltau +
max( lwf, lws )
558 worksiz = lwork + ipostpad
563 IF( ipw+worksiz.GT.memsiz )
THEN
565 $
WRITE( nout, fmt = 9996 )
'solve',
566 $ ( ipw+worksiz )*dblesz
572 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
575 IF( ierr( 1 ).GT.0 )
THEN
577 $
WRITE( nout, fmt = 9997 )
'MEMORY'
586 CALL pdlacpy(
'All', nrows, nrhs,
587 $ mem( ipx ), 1, 1, descx,
588 $ mem( ipb ), 1, 1, descb )
592 $ mem( ipb-iprepad ),
593 $ descb( lld_ ), iprepad,
597 $ mem( ipb-iprepad ),
598 $ descb( lld_ ), iprepad,
602 $ mem( ipw-iprepad ),
608 CALL blacs_barrier( ictxt,
'All' )
613 CALL pdgels( trans, m, n, nrhs, mem( ipa ),
614 $ 1, 1, desca, mem( ipx ), 1, 1,
615 $ descx, mem( ipw ), lwork, info )
624 $ nq, mem( ipa-iprepad ),
625 $ desca( lld_ ), iprepad,
628 $ nrhsq, mem( ipx-iprepad ),
629 $ descx( lld_ ), iprepad,
632 $ 1, mem( ipw-iprepad ),
640 CALL pdqrt13( iscale, m, n, mem( ipa ), 1, 1,
641 $ desca, anorm, iaseed,
650 IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
651 $ ( m.LT.n .AND. tpsd ) )
THEN
659 worksiz = np*nrhsq + nrhsp*mq
662 $
max( nq,
max( mq, nrhsq ) ) +
665 worksiz = mp*nrhsq + nrhsp*nq
676 IF( ( ipw+worksiz ).GT.memsiz )
THEN
678 $
WRITE( nout, fmt = 9996 )
679 $
'MEMORY', ( ipw+worksiz )*dblesz
685 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
688 IF( ierr( 1 ).GT.0 )
THEN
690 $
WRITE( nout, fmt = 9997 )
697 $ worksiz-ipostpad, 1,
698 $ mem( ipw-iprepad ),
703 result( 2 ) =
pdqrt17( trans, 1, m, n,
717 $ mem( ipa-iprepad ),
723 $ mem( ipx-iprepad ),
724 $ descx( lld_ ), iprepad,
729 $ mem( ipb-iprepad ),
736 $ mem( ipb-iprepad ),
742 $ worksiz-ipostpad, 1,
743 $ mem( ipw-iprepad ),
752 worksiz = mp + ipostpad
754 worksiz = nq + ipostpad
760 IF( ( ipw+worksiz ).GT.memsiz )
THEN
762 $
WRITE( nout, fmt = 9996 )
'MEMORY',
763 $ ( ipw+worksiz )*dblesz
769 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
772 IF( ierr( 1 ).GT.0 )
THEN
774 $
WRITE( nout, fmt = 9997 )
'MEMORY'
780 $ worksiz-ipostpad, 1,
781 $ mem( ipw-iprepad ),
786 CALL pdqrt16( trans, m, n, nrhs,
787 $ mem( ipa ), 1, 1, desca,
788 $ mem( ipx ), 1, 1, descx,
789 $ mem( ipb ), 1, 1, descb,
790 $ mem( ipw ), result( 1 ) )
794 $ mem( ipa-iprepad ),
800 $ mem( ipx-iprepad ),
801 $ descx( lld_ ), iprepad,
806 $ mem( ipb-iprepad ),
813 $ mem( ipb-iprepad ),
819 $ worksiz-ipostpad, 1,
820 $ mem( ipw-iprepad ),
827 IF( ( m.GE.n .AND. tpsd ) .OR.
828 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) )
THEN
834 nnrhsq =
numroc( n+nrhs, nb, mycol,
838 lwf = nb * ( nb + mp + nnrhsq )
839 worksiz = mp * nnrhsq + ltau + lwf +
844 mnrhsp =
numroc( m+nrhs, nb, myrow,
848 lwf = nb * ( nb + mnrhsp + nq )
849 worksiz = mnrhsp * nq + ltau + lwf +
858 IF( ( ipw+worksiz ).GT.memsiz )
THEN
860 $
WRITE( nout, fmt = 9996 )
861 $
'MEMORY', ( ipw+worksiz )*dblesz
867 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
870 IF( ierr( 1 ).GT.0 )
THEN
872 $
WRITE( nout, fmt = 9997 )
879 $ worksiz-ipostpad, 1,
880 $ mem( ipw-iprepad ),
887 result( 2 ) =
pdqrt14( trans, m, n,
898 $ mem( ipa-iprepad ),
904 $ mem( ipx-iprepad ),
905 $ descx( lld_ ), iprepad,
908 $ worksiz-ipostpad, 1,
909 $ mem( ipw-iprepad ),
920 IF( ( result( ii ).GE.thresh ) .AND.
921 $ ( result( ii )-result( ii ).EQ.0.0e+0
924 $
WRITE( nout, fmt = 9986 )trans,
925 $ m, n, nrhs, nb, itype, ii,
939 sresid = sresid - sresid
947 CALL slcombine( ictxt,
'All',
'>',
'W', 1, 1,
949 CALL slcombine( ictxt,
'All',
'>',
'C', 1, 1,
954 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
963 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
964 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
965 $ n*nrhs*( 2.d0*m+2.d0-n )
966 adds = n*( ( 5.d0 / 6.d0 )+n*
967 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
968 $ + n*nrhs*( 2.d0*m+1.d0-n )
975 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
976 $ / 2.d0 )+m*( n-m / 3.d0 ) )
977 $ + n*nrhs*( 2.d0*m+2.d0-n )
978 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
980 $ + n*nrhs*( 2.d0*m+1.d0-n )
982 nops = addfac*adds + mulfac*mults
989 IF( wtime( 1 ).GT.0.0d+0 )
THEN
990 tmflops = nops / ( wtime( 1 )*1.0d+6 )
995 IF( wtime( 1 ).GE.0.0d+0 )
996 $
WRITE( nout, fmt = 9993 )
997 $
'WALL', trans, m, n, nb, nrhs,
998 $ nbrhs, nprow, npcol, wtime( 1 ),
1003 IF( ctime( 1 ).GT.0.0d+0 )
THEN
1004 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1009 IF( ctime( 1 ).GE.0.0d+0 )
1010 $
WRITE( nout, fmt = 9993 )
1011 $
'CPU ', trans, m, n, nb, nrhs,
1012 $ nbrhs, nprow, npcol, ctime( 1 ),
1021 CALL blacs_gridexit( ictxt )
1027 ktests = kpass + kfail + kskip
1028 WRITE( nout, fmt = * )
1029 WRITE( nout, fmt = 9992 ) ktests
1031 WRITE( nout, fmt = 9991 ) kpass
1032 WRITE( nout, fmt = 9989 ) kfail
1034 WRITE( nout, fmt = 9990 ) kpass
1036 WRITE( nout, fmt = 9988 ) kskip
1037 WRITE( nout, fmt = * )
1038 WRITE( nout, fmt = * )
1039 WRITE( nout, fmt = 9987 )
1040 IF( nout.NE.6 .AND. nout.NE.0 )
1044 CALL blacs_exit( 0 )
1046 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
1047 $
'; It should be at least 1' )
1048 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
1050 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
1051 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
1053 9995
FORMAT(
'Time TRANS M N NB NRHS NBRHS P Q ',
1054 $
'LS Time MFLOPS CHECK' )
1055 9994
FORMAT(
'---- ----- ------ ------ --- ----- ----- ----- ----- ',
1056 $
'--------- -------- ------' )
1057 9993
FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1058 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1059 9992
FORMAT(
'Finished', i6,
' tests, with the following results:' )
1060 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
1061 9990
FORMAT( i5,
' tests completed without checking.' )
1062 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
1063 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
1064 9987
FORMAT(
'END OF TESTS.' )
1065 9986
FORMAT(
' TRANS=''', a1,
''', M=', i5,
', N=', i5,
', NRHS=', i4,
1066 $
', NB=', i4,
', type', i2,
', test(', i2,
')=', g12.5 )
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function iceil(inum, idenom)
integer function ilcm(m, n)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine pdgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
subroutine pdlsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pdqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
double precision function pdqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
subroutine pdqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
double precision function pdqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)