63 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64 $ lld_, mb_, m_, nb_, n_, rsrc_
65 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68 INTEGER memsiz, ntests, realsz, totmem
71 parameter( realsz = 4, totmem = 2000000,
72 $ memsiz = totmem / realsz, ntests = 20,
73 $ padval = -9923.0e+0 )
74 parameter( one = 1.0e+0, zero = 0.0e+0 )
81 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
82 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83 $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
84 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
89 REAL anorm, bnorm, sresid, thresh
90 DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
93 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
94 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
95 $ nbrval( ntests ), nbval( ntests ),
96 $ nrval( ntests ), nval( ntests ),
97 $ pval( ntests ), qval( ntests )
98 REAL mem( memsiz ), result( 2 )
99 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
102 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
103 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
120 DATA ktests, kpass, kfail, kskip / 4*0 /
126 CALL blacs_pinfo( iam, nprocs )
130 CALL pslsinfo( outfile, nout, nmat, mval, ntests, nval,
131 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
132 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
133 $ ntests, thresh, mem, iam, nprocs )
134 check = ( thresh.GE.0.0e+0 )
139 WRITE( nout, fmt = * )
140 WRITE( nout, fmt = 9995 )
141 WRITE( nout, fmt = 9994 )
142 WRITE( nout, fmt = * )
155 IF( nprow.LT.1 )
THEN
157 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
159 ELSE IF( npcol.LT.1 )
THEN
161 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
163 ELSE IF( nprow*npcol.GT.nprocs )
THEN
165 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
169 IF( ierr( 1 ).GT.0 )
THEN
171 $
WRITE( nout, fmt = 9997 )
'grid'
178 CALL blacs_get( -1, 0, ictxt )
179 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
180 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
184 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
197 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'M', m
199 ELSE IF( n.LT.1 )
THEN
201 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
207 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
209 IF( ierr( 1 ).GT.0 )
THEN
211 $
WRITE( nout, fmt = 9997 )
'matrix'
228 $
WRITE( nout, fmt = 9999 )
'NB',
'NB', nb
233 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'NB'
244 mp =
numroc( m, nb, myrow, 0, nprow )
245 mq =
numroc( m, nb, mycol, 0, npcol )
246 np =
numroc( n, nb, myrow, 0, nprow )
248 nq =
numroc( n, nb, mycol, 0, npcol )
251 iprepad =
max( nb, mp )
253 ipostpad =
max( nb, nq )
262 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
263 $
max( 1, mp ) + imidpad, ierr( 1 ) )
267 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
269 IF( ierr( 1 ).LT.0 )
THEN
271 $
WRITE( nout, fmt = 9997 )
'descriptor'
284 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
287 worksiz = nq + ipostpad
292 IF( ( ipw+worksiz ).GT.memsiz )
THEN
294 $
WRITE( nout, fmt = 9996 )
'MEMORY',
295 $ ( ipx+worksiz )*realsz
301 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1,
304 IF( ierr( 1 ).GT.0 )
THEN
306 $
WRITE( nout, fmt = 9997 )
'MEMORY'
312 CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
313 $ desca( lld_ ), iprepad, ipostpad,
315 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
316 $ mem( ipw-iprepad ),
317 $ worksiz-ipostpad, iprepad,
323 CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
324 $ desca, anorm, iaseed, mem( ipw ) )
327 CALL pschekpad( ictxt,
'PSQRT13', mp, nq,
328 $ mem( ipa-iprepad ), desca( lld_ ),
329 $ iprepad, ipostpad, padval )
331 $ worksiz-ipostpad, 1,
332 $ mem( ipw-iprepad ),
333 $ worksiz-ipostpad, iprepad,
339 IF( itran.EQ.1 )
THEN
361 nrhsp =
numroc( nrhs, nbrhs, myrow, 0,
363 nrhsq =
numroc( nrhs, nbrhs, mycol, 0,
369 $ nbrhs, 0, 0, ictxt,
370 $
max( 1, mnp ) + imidpad,
373 CALL descinit( descw, m, nrhs, nb, nbrhs,
374 $ 0, 0, ictxt,
max( 1, mp ) +
375 $ imidpad, ierr( 2 ) )
377 CALL descinit( descw, n, nrhs, nb, nbrhs,
378 $ 0, 0, ictxt,
max( 1, np ) +
379 $ imidpad, ierr( 2 ) )
384 CALL igsum2d( ictxt,
'All',
' ', 2, 1, ierr,
387 IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 )
THEN
389 $
WRITE( nout, fmt = 9997 )
'descriptor'
396 ipx = ipa + desca( lld_ )*nq + ipostpad +
398 ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
400 worksiz = descw( lld_ )*nrhsq + ipostpad
403 IF( ipw+worksiz.GT.memsiz )
THEN
405 $
WRITE( nout, fmt = 9996 )
'Generation',
406 $ ( ipw+worksiz )*realsz
412 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
415 IF( ierr( 1 ).GT.0 )
THEN
417 $
WRITE( nout, fmt = 9997 )
'MEMORY'
426 $ descw( m_ ), descw( n_ ),
427 $ descw( mb_ ), descw( nb_ ),
428 $ mem( ipw ), descw( lld_ ),
430 $ descw( csrc_ ), ibseed, 0,
431 $ mp, 0, nrhsq, myrow, mycol,
435 $ descw( m_ ), descw( n_ ),
436 $ descw( mb_ ), descw( nb_ ),
437 $ mem( ipw ), descw( lld_ ),
439 $ descw( csrc_ ), ibseed, 0,
440 $ np, 0, nrhsq, myrow, mycol,
446 $ mem( ipx-iprepad ),
447 $ descx( lld_ ), iprepad,
451 $ mem( ipw-iprepad ),
452 $ descw( lld_ ), iprepad,
456 $ mem( ipw-iprepad ),
457 $ descw( lld_ ), iprepad,
463 CALL psnrm2( ncols, bnorm, mem( ipw ), 1,
466 $
CALL psscal( ncols, one / bnorm,
467 $ mem( ipw ), 1, jj, descw,
471 CALL psgemm( trans,
'N', nrows, nrhs, ncols,
472 $ one, mem( ipa ), 1, 1, desca,
473 $ mem( ipw ), 1, 1, descw, zero,
474 $ mem( ipx ), 1, 1, descx )
481 $ nq, mem( ipa-iprepad ),
482 $ desca( lld_ ), iprepad,
484 CALL pschekpad( ictxt,
'Generation', mnp,
485 $ nrhsq, mem( ipx-iprepad ),
486 $ descx( lld_ ), iprepad,
491 $ mem( ipw-iprepad ),
492 $ descw( lld_ ), iprepad,
497 $ mem( ipw-iprepad ),
498 $ descw( lld_ ), iprepad,
508 $ nbrhs, 0, 0, ictxt,
509 $
max( 1, np ) + imidpad,
513 $ nbrhs, 0, 0, ictxt,
514 $
max( 1, mp ) + imidpad,
520 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
523 IF( ierr( 1 ).LT.0 )
THEN
525 $
WRITE( nout, fmt = 9997 )
531 ipw = ipb + descb( lld_ )*nrhsq +
541 lwf = nb * ( mp + nq + nb )
542 lws =
max( ( nb*( nb - 1 ) ) / 2,
543 $ ( mp + nrhsq ) * nb ) + nb*nb
545 lcm =
ilcm( nprow, npcol )
549 lwf = nb * ( mp + nq + nb )
550 lws =
max( ( nb*( nb - 1 ) ) / 2, ( np +
552 $ 0, nprow ), nb, 0, 0, lcmp ),
553 $ nrhsq ) ) * nb ) + nb*nb
556 lwork = ltau +
max( lwf, lws )
557 worksiz = lwork + ipostpad
562 IF( ipw+worksiz.GT.memsiz )
THEN
564 $
WRITE( nout, fmt = 9996 )
'solve',
565 $ ( ipw+worksiz )*realsz
571 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
574 IF( ierr( 1 ).GT.0 )
THEN
576 $
WRITE( nout, fmt = 9997 )
'MEMORY'
585 CALL pslacpy(
'All', nrows, nrhs,
586 $ mem( ipx ), 1, 1, descx,
587 $ mem( ipb ), 1, 1, descb )
591 $ mem( ipb-iprepad ),
592 $ descb( lld_ ), iprepad,
596 $ mem( ipb-iprepad ),
597 $ descb( lld_ ), iprepad,
601 $ mem( ipw-iprepad ),
607 CALL blacs_barrier( ictxt,
'All' )
612 CALL psgels( trans, m, n, nrhs, mem( ipa ),
613 $ 1, 1, desca, mem( ipx ), 1, 1,
614 $ descx, mem( ipw ), lwork, info )
623 $ nq, mem( ipa-iprepad ),
624 $ desca( lld_ ), iprepad,
627 $ nrhsq, mem( ipx-iprepad ),
628 $ descx( lld_ ), iprepad,
631 $ 1, mem( ipw-iprepad ),
639 CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
640 $ desca, anorm, iaseed,
649 IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
650 $ ( m.LT.n .AND. tpsd ) )
THEN
658 worksiz = np*nrhsq + nrhsp*mq
661 $
max( nq,
max( mq, nrhsq ) ) +
664 worksiz = mp*nrhsq + nrhsp*nq
675 IF( ( ipw+worksiz ).GT.memsiz )
THEN
677 $
WRITE( nout, fmt = 9996 )
678 $
'MEMORY', ( ipw+worksiz )*realsz
684 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
687 IF( ierr( 1 ).GT.0 )
THEN
689 $
WRITE( nout, fmt = 9997 )
696 $ worksiz-ipostpad, 1,
697 $ mem( ipw-iprepad ),
702 result( 2 ) =
psqrt17( trans, 1, m, n,
716 $ mem( ipa-iprepad ),
722 $ mem( ipx-iprepad ),
723 $ descx( lld_ ), iprepad,
728 $ mem( ipb-iprepad ),
735 $ mem( ipb-iprepad ),
741 $ worksiz-ipostpad, 1,
742 $ mem( ipw-iprepad ),
751 worksiz = mp + ipostpad
753 worksiz = nq + ipostpad
759 IF( ( ipw+worksiz ).GT.memsiz )
THEN
761 $
WRITE( nout, fmt = 9996 )
'MEMORY',
762 $ ( ipw+worksiz )*realsz
768 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
771 IF( ierr( 1 ).GT.0 )
THEN
773 $
WRITE( nout, fmt = 9997 )
'MEMORY'
779 $ worksiz-ipostpad, 1,
780 $ mem( ipw-iprepad ),
785 CALL psqrt16( trans, m, n, nrhs,
786 $ mem( ipa ), 1, 1, desca,
787 $ mem( ipx ), 1, 1, descx,
788 $ mem( ipb ), 1, 1, descb,
789 $ mem( ipw ), result( 1 ) )
793 $ mem( ipa-iprepad ),
799 $ mem( ipx-iprepad ),
800 $ descx( lld_ ), iprepad,
805 $ mem( ipb-iprepad ),
812 $ mem( ipb-iprepad ),
818 $ worksiz-ipostpad, 1,
819 $ mem( ipw-iprepad ),
826 IF( ( m.GE.n .AND. tpsd ) .OR.
827 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) )
THEN
833 nnrhsq =
numroc( n+nrhs, nb, mycol,
837 lwf = nb * ( nb + mp + nnrhsq )
838 worksiz = mp * nnrhsq + ltau + lwf +
843 mnrhsp =
numroc( m+nrhs, nb, myrow,
847 lwf = nb * ( nb + mnrhsp + nq )
848 worksiz = mnrhsp * nq + ltau + lwf +
857 IF( ( ipw+worksiz ).GT.memsiz )
THEN
859 $
WRITE( nout, fmt = 9996 )
860 $
'MEMORY', ( ipw+worksiz )*realsz
866 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
869 IF( ierr( 1 ).GT.0 )
THEN
871 $
WRITE( nout, fmt = 9997 )
878 $ worksiz-ipostpad, 1,
879 $ mem( ipw-iprepad ),
886 result( 2 ) =
psqrt14( trans, m, n,
897 $ mem( ipa-iprepad ),
903 $ mem( ipx-iprepad ),
904 $ descx( lld_ ), iprepad,
907 $ worksiz-ipostpad, 1,
908 $ mem( ipw-iprepad ),
919 IF( ( result( ii ).GE.thresh ) .AND.
920 $ ( result( ii )-result( ii ).EQ.0.0e+0
923 $
WRITE( nout, fmt = 9986 )trans,
924 $ m, n, nrhs, nb, itype, ii,
938 sresid = sresid - sresid
946 CALL slcombine( ictxt,
'All',
'>',
'W', 1, 1,
948 CALL slcombine( ictxt,
'All',
'>',
'C', 1, 1,
953 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
962 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
963 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
964 $ n*nrhs*( 2.d0*m+2.d0-n )
965 adds = n*( ( 5.d0 / 6.d0 )+n*
966 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
967 $ + n*nrhs*( 2.d0*m+1.d0-n )
974 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
975 $ / 2.d0 )+m*( n-m / 3.d0 ) )
976 $ + n*nrhs*( 2.d0*m+2.d0-n )
977 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
979 $ + n*nrhs*( 2.d0*m+1.d0-n )
981 nops = addfac*adds + mulfac*mults
988 IF( wtime( 1 ).GT.0.0d+0 )
THEN
989 tmflops = nops / ( wtime( 1 )*1.0d+6 )
994 IF( wtime( 1 ).GE.0.0d+0 )
995 $
WRITE( nout, fmt = 9993 )
996 $
'WALL', trans, m, n, nb, nrhs,
997 $ nbrhs, nprow, npcol, wtime( 1 ),
1002 IF( ctime( 1 ).GT.0.0d+0 )
THEN
1003 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1008 IF( ctime( 1 ).GE.0.0d+0 )
1009 $
WRITE( nout, fmt = 9993 )
1010 $
'CPU ', trans, m, n, nb, nrhs,
1011 $ nbrhs, nprow, npcol, ctime( 1 ),
1020 CALL blacs_gridexit( ictxt )
1026 ktests = kpass + kfail + kskip
1027 WRITE( nout, fmt = * )
1028 WRITE( nout, fmt = 9992 ) ktests
1030 WRITE( nout, fmt = 9991 ) kpass
1031 WRITE( nout, fmt = 9989 ) kfail
1033 WRITE( nout, fmt = 9990 ) kpass
1035 WRITE( nout, fmt = 9988 ) kskip
1036 WRITE( nout, fmt = * )
1037 WRITE( nout, fmt = * )
1038 WRITE( nout, fmt = 9987 )
1039 IF( nout.NE.6 .AND. nout.NE.0 )
1043 CALL blacs_exit( 0 )
1045 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
1046 $
'; It should be at least 1' )
1047 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
1049 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
1050 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
1052 9995
FORMAT(
'Time TRANS M N NB NRHS NBRHS P Q ',
1053 $
'LS Time MFLOPS CHECK' )
1054 9994
FORMAT(
'---- ----- ------ ------ --- ----- ----- ----- ----- ',
1055 $
'--------- -------- ------' )
1056 9993
FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1057 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1058 9992
FORMAT(
'Finished', i6,
' tests, with the following results:' )
1059 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
1060 9990
FORMAT( i5,
' tests completed without checking.' )
1061 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
1062 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
1063 9987
FORMAT(
'END OF TESTS.' )
1064 9986
FORMAT(
' TRANS=''', a1,
''', M=', i5,
', N=', i5,
', NRHS=', i4,
1065 $
', NB=', i4,
', type', i2,
', test(', i2,
')=', g12.5 )
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
integer function iceil(inum, idenom)
integer function ilcm(m, n)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine psgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
real function pslange(norm, m, n, a, ia, ja, desca, work)
subroutine pslsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine psqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
real function psqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
subroutine psqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
real function psqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)