64 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
65 $ lld_, mb_, m_, nb_, n_, rsrc_
66 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
67 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
68 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
69 INTEGER dblesz, memsiz, ntests, totmem, zplxsz
70 DOUBLE PRECISION rzero, rone
71 COMPLEX*16 one, padval, zero
72 parameter( dblesz = 8, zplxsz = 16, totmem = 2000000,
73 $ memsiz = totmem / zplxsz, ntests = 20,
74 $ padval = ( -9923.0d+0, -9923.0d+0 ) )
75 parameter( one = ( 1.0d+0, 0.0d+0 ), rzero = 0.0d+0,
76 $ rone = 1.0d+0, zero = ( 0.0d+0, 0.0d+0 ) )
83 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
84 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
85 $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
86 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
87 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
88 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
89 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
90 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
92 DOUBLE PRECISION addfac, adds, anorm, bnorm, mulfac, mults,
93 $ nops, sresid, tmflops
96 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
97 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
98 $ nbrval( ntests ), nbval( ntests ),
99 $ nrval( ntests ), nval( ntests ),
100 $ pval( ntests ), qval( ntests )
101 DOUBLE PRECISION ctime( 1 ), result( 2 ), wtime( 1 )
102 COMPLEX*16 mem( memsiz )
105 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
106 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
124 DATA ktests, kpass, kfail, kskip / 4*0 /
130 CALL blacs_pinfo( iam, nprocs )
134 CALL pzlsinfo( outfile, nout, nmat, mval, ntests, nval,
135 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
136 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
137 $ ntests, thresh, mem, iam, nprocs )
138 check = ( thresh.GE.0.0e+0 )
143 WRITE( nout, fmt = * )
144 WRITE( nout, fmt = 9995 )
145 WRITE( nout, fmt = 9994 )
146 WRITE( nout, fmt = * )
159 IF( nprow.LT.1 )
THEN
161 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
163 ELSE IF( npcol.LT.1 )
THEN
165 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
167 ELSE IF( nprow*npcol.GT.nprocs )
THEN
169 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
173 IF( ierr( 1 ).GT.0 )
THEN
175 $
WRITE( nout, fmt = 9997 )
'grid'
182 CALL blacs_get( -1, 0, ictxt )
183 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
184 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
188 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
201 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'M', m
203 ELSE IF( n.LT.1 )
THEN
205 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
211 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
213 IF( ierr( 1 ).GT.0 )
THEN
215 $
WRITE( nout, fmt = 9997 )
'matrix'
232 $
WRITE( nout, fmt = 9999 )
'NB',
'NB', nb
237 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
239 IF( ierr( 1 ).GT.0 )
THEN
241 $
WRITE( nout, fmt = 9997 )
'NB'
248 mp =
numroc( m, nb, myrow, 0, nprow )
249 mq =
numroc( m, nb, mycol, 0, npcol )
250 np =
numroc( n, nb, myrow, 0, nprow )
252 nq =
numroc( n, nb, mycol, 0, npcol )
255 iprepad =
max( nb, mp )
257 ipostpad =
max( nb, nq )
266 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
267 $
max( 1, mp ) + imidpad, ierr( 1 ) )
271 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
273 IF( ierr( 1 ).LT.0 )
THEN
275 $
WRITE( nout, fmt = 9997 )
'descriptor'
288 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
291 worksiz = nq + ipostpad
296 IF( ( ipw+worksiz ).GT.memsiz )
THEN
298 $
WRITE( nout, fmt = 9996 )
'MEMORY',
299 $ ( ipx+worksiz )*zplxsz
305 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1,
308 IF( ierr( 1 ).GT.0 )
THEN
310 $
WRITE( nout, fmt = 9997 )
'MEMORY'
316 CALL pzfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
317 $ desca( lld_ ), iprepad, ipostpad,
319 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
320 $ mem( ipw-iprepad ),
321 $ worksiz-ipostpad, iprepad,
327 CALL pzqrt13( iscale, m, n, mem( ipa ), 1, 1,
328 $ desca, anorm, iaseed, mem( ipw ) )
331 CALL pzchekpad( ictxt,
'PZQRT13', mp, nq,
332 $ mem( ipa-iprepad ), desca( lld_ ),
333 $ iprepad, ipostpad, padval )
335 $ worksiz-ipostpad, 1,
336 $ mem( ipw-iprepad ),
337 $ worksiz-ipostpad, iprepad,
343 IF( itran.EQ.1 )
THEN
365 nrhsp =
numroc( nrhs, nbrhs, myrow, 0,
367 nrhsq =
numroc( nrhs, nbrhs, mycol, 0,
373 $ nbrhs, 0, 0, ictxt,
374 $
max( 1, mnp ) + imidpad,
377 CALL descinit( descw, m, nrhs, nb, nbrhs,
378 $ 0, 0, ictxt,
max( 1, mp ) +
379 $ imidpad, ierr( 2 ) )
381 CALL descinit( descw, n, nrhs, nb, nbrhs,
382 $ 0, 0, ictxt,
max( 1, np ) +
383 $ imidpad, ierr( 2 ) )
388 CALL igsum2d( ictxt,
'All',
' ', 2, 1, ierr,
391 IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 )
THEN
393 $
WRITE( nout, fmt = 9997 )
'descriptor'
400 ipx = ipa + desca( lld_ )*nq + ipostpad +
402 ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
404 worksiz = descw( lld_ )*nrhsq + ipostpad
407 IF( ipw+worksiz.GT.memsiz )
THEN
409 $
WRITE( nout, fmt = 9996 )
'Generation',
410 $ ( ipw+worksiz )*zplxsz
416 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
419 IF( ierr( 1 ).GT.0 )
THEN
421 $
WRITE( nout, fmt = 9997 )
'MEMORY'
430 $ descw( m_ ), descw( n_ ),
431 $ descw( mb_ ), descw( nb_ ),
432 $ mem( ipw ), descw( lld_ ),
434 $ descw( csrc_ ), ibseed, 0,
435 $ mp, 0, nrhsq, myrow, mycol,
439 $ descw( m_ ), descw( n_ ),
440 $ descw( mb_ ), descw( nb_ ),
441 $ mem( ipw ), descw( lld_ ),
443 $ descw( csrc_ ), ibseed, 0,
444 $ np, 0, nrhsq, myrow, mycol,
450 $ mem( ipx-iprepad ),
451 $ descx( lld_ ), iprepad,
455 $ mem( ipw-iprepad ),
456 $ descw( lld_ ), iprepad,
460 $ mem( ipw-iprepad ),
461 $ descw( lld_ ), iprepad,
467 CALL pdznrm2( ncols, bnorm, mem( ipw ),
470 $
CALL pzdscal( ncols, rone / bnorm,
471 $ mem( ipw ), 1, jj, descw,
475 CALL pzgemm( trans,
'N', nrows, nrhs, ncols,
476 $ one, mem( ipa ), 1, 1, desca,
477 $ mem( ipw ), 1, 1, descw, zero,
478 $ mem( ipx ), 1, 1, descx )
485 $ nq, mem( ipa-iprepad ),
486 $ desca( lld_ ), iprepad,
488 CALL pzchekpad( ictxt,
'Generation', mnp,
489 $ nrhsq, mem( ipx-iprepad ),
490 $ descx( lld_ ), iprepad,
495 $ mem( ipw-iprepad ),
496 $ descw( lld_ ), iprepad,
501 $ mem( ipw-iprepad ),
502 $ descw( lld_ ), iprepad,
512 $ nbrhs, 0, 0, ictxt,
513 $
max( 1, np ) + imidpad,
517 $ nbrhs, 0, 0, ictxt,
518 $
max( 1, mp ) + imidpad,
524 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
527 IF( ierr( 1 ).LT.0 )
THEN
529 $
WRITE( nout, fmt = 9997 )
535 ipw = ipb + descb( lld_ )*nrhsq +
545 lwf = nb * ( mp + nq + nb )
546 lws =
max( ( nb*( nb - 1 ) ) / 2,
547 $ ( mp + nrhsq ) * nb ) + nb*nb
549 lcm =
ilcm( nprow, npcol )
553 lwf = nb * ( mp + nq + nb )
554 lws =
max( ( nb*( nb - 1 ) ) / 2, ( np +
556 $ 0, nprow ), nb, 0, 0, lcmp ),
557 $ nrhsq ) ) * nb ) + nb*nb
560 lwork = ltau +
max( lwf, lws )
561 worksiz = lwork + ipostpad
566 IF( ipw+worksiz.GT.memsiz )
THEN
568 $
WRITE( nout, fmt = 9996 )
'solve',
569 $ ( ipw+worksiz )*zplxsz
575 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
578 IF( ierr( 1 ).GT.0 )
THEN
580 $
WRITE( nout, fmt = 9997 )
'MEMORY'
589 CALL pzlacpy(
'All', nrows, nrhs,
590 $ mem( ipx ), 1, 1, descx,
591 $ mem( ipb ), 1, 1, descb )
595 $ mem( ipb-iprepad ),
596 $ descb( lld_ ), iprepad,
600 $ mem( ipb-iprepad ),
601 $ descb( lld_ ), iprepad,
605 $ mem( ipw-iprepad ),
611 CALL blacs_barrier( ictxt,
'All' )
616 CALL pzgels( trans, m, n, nrhs, mem( ipa ),
617 $ 1, 1, desca, mem( ipx ), 1, 1,
618 $ descx, mem( ipw ), lwork, info )
627 $ nq, mem( ipa-iprepad ),
628 $ desca( lld_ ), iprepad,
631 $ nrhsq, mem( ipx-iprepad ),
632 $ descx( lld_ ), iprepad,
635 $ 1, mem( ipw-iprepad ),
643 CALL pzqrt13( iscale, m, n, mem( ipa ), 1, 1,
644 $ desca, anorm, iaseed,
653 IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
654 $ ( m.LT.n .AND. tpsd ) )
THEN
662 worksiz = np*nrhsq + nrhsp*mq
666 $ mq, nrhsq ) ), zplxsz ) +
669 worksiz = mp*nrhsq + nrhsp*nq
673 $ nrhsq ), zplxsz ) +
681 IF( ( ipw+worksiz ).GT.memsiz )
THEN
683 $
WRITE( nout, fmt = 9996 )
684 $
'MEMORY', ( ipw+worksiz )*zplxsz
690 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
693 IF( ierr( 1 ).GT.0 )
THEN
695 $
WRITE( nout, fmt = 9997 )
702 $ worksiz-ipostpad, 1,
703 $ mem( ipw-iprepad ),
708 result( 2 ) =
pzqrt17( trans, 1, m, n,
722 $ mem( ipa-iprepad ),
728 $ mem( ipx-iprepad ),
729 $ descx( lld_ ), iprepad,
734 $ mem( ipb-iprepad ),
741 $ mem( ipb-iprepad ),
747 $ worksiz-ipostpad, 1,
748 $ mem( ipw-iprepad ),
757 worksiz = mp + ipostpad
759 worksiz = nq + ipostpad
765 IF( ( ipw+worksiz ).GT.memsiz )
THEN
767 $
WRITE( nout, fmt = 9996 )
'MEMORY',
768 $ ( ipw+worksiz )*zplxsz
774 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
777 IF( ierr( 1 ).GT.0 )
THEN
779 $
WRITE( nout, fmt = 9997 )
'MEMORY'
785 $ worksiz-ipostpad, 1,
786 $ mem( ipw-iprepad ),
791 CALL pzqrt16( trans, m, n, nrhs,
792 $ mem( ipa ), 1, 1, desca,
793 $ mem( ipx ), 1, 1, descx,
794 $ mem( ipb ), 1, 1, descb,
795 $ mem( ipw ), result( 1 ) )
799 $ mem( ipa-iprepad ),
805 $ mem( ipx-iprepad ),
806 $ descx( lld_ ), iprepad,
811 $ mem( ipb-iprepad ),
818 $ mem( ipb-iprepad ),
824 $ worksiz-ipostpad, 1,
825 $ mem( ipw-iprepad ),
832 IF( ( m.GE.n .AND. tpsd ) .OR.
833 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) )
THEN
839 nnrhsq =
numroc( n+nrhs, nb, mycol,
843 lwf = nb * ( nb + mp + nnrhsq )
844 worksiz = mp * nnrhsq + ltau + lwf +
849 mnrhsp =
numroc( m+nrhs, nb, myrow,
853 lwf = nb * ( nb + mnrhsp + nq )
854 worksiz = mnrhsp * nq + ltau + lwf +
863 IF( ( ipw+worksiz ).GT.memsiz )
THEN
865 $
WRITE( nout, fmt = 9996 )
866 $
'MEMORY', ( ipw+worksiz )*zplxsz
872 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
875 IF( ierr( 1 ).GT.0 )
THEN
877 $
WRITE( nout, fmt = 9997 )
884 $ worksiz-ipostpad, 1,
885 $ mem( ipw-iprepad ),
892 result( 2 ) =
pzqrt14( trans, m, n,
903 $ mem( ipa-iprepad ),
909 $ mem( ipx-iprepad ),
910 $ descx( lld_ ), iprepad,
913 $ worksiz-ipostpad, 1,
914 $ mem( ipw-iprepad ),
925 IF( ( result( ii ).GE.thresh ) .AND.
926 $ ( result( ii )-result( ii ).EQ.0.0e+0
929 $
WRITE( nout, fmt = 9986 )trans,
930 $ m, n, nrhs, nb, itype, ii,
944 sresid = sresid - sresid
952 CALL slcombine( ictxt,
'All',
'>',
'W', 1, 1,
954 CALL slcombine( ictxt,
'All',
'>',
'C', 1, 1,
959 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
968 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
969 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
970 $ n*nrhs*( 2.d0*m+2.d0-n )
971 adds = n*( ( 5.d0 / 6.d0 )+n*
972 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
973 $ + n*nrhs*( 2.d0*m+1.d0-n )
980 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
981 $ / 2.d0 )+m*( n-m / 3.d0 ) )
982 $ + n*nrhs*( 2.d0*m+2.d0-n )
983 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
985 $ + n*nrhs*( 2.d0*m+1.d0-n )
987 nops = addfac*adds + mulfac*mults
994 IF( wtime( 1 ).GT.0.0d+0 )
THEN
995 tmflops = nops / ( wtime( 1 )*1.0d+6 )
1000 IF( wtime( 1 ).GE.0.0d+0 )
1001 $
WRITE( nout, fmt = 9993 )
1002 $
'WALL', trans, m, n, nb, nrhs,
1003 $ nbrhs, nprow, npcol, wtime( 1 ),
1008 IF( ctime( 1 ).GT.0.0d+0 )
THEN
1009 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1014 IF( ctime( 1 ).GE.0.0d+0 )
1015 $
WRITE( nout, fmt = 9993 )
1016 $
'CPU ', trans, m, n, nb, nrhs,
1017 $ nbrhs, nprow, npcol, ctime( 1 ),
1026 CALL blacs_gridexit( ictxt )
1032 ktests = kpass + kfail + kskip
1033 WRITE( nout, fmt = * )
1034 WRITE( nout, fmt = 9992 ) ktests
1036 WRITE( nout, fmt = 9991 ) kpass
1037 WRITE( nout, fmt = 9989 ) kfail
1039 WRITE( nout, fmt = 9990 ) kpass
1041 WRITE( nout, fmt = 9988 ) kskip
1042 WRITE( nout, fmt = * )
1043 WRITE( nout, fmt = * )
1044 WRITE( nout, fmt = 9987 )
1045 IF( nout.NE.6 .AND. nout.NE.0 )
1049 CALL blacs_exit( 0 )
1051 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
1052 $
'; It should be at least 1' )
1053 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
1055 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
1056 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
1058 9995
FORMAT(
'Time TRANS M N NB NRHS NBRHS P Q ',
1059 $
'LS Time MFLOPS CHECK' )
1060 9994
FORMAT(
'---- ----- ------ ------ --- ----- ----- ----- ----- ',
1061 $
'--------- -------- ------' )
1062 9993
FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1063 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1064 9992
FORMAT(
'Finished', i6,
' tests, with the following results:' )
1065 9991
FORMAT( i5,
' tests completed and passed residual checks.' )
1066 9990
FORMAT( i5,
' tests completed without checking.' )
1067 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
1068 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
1069 9987
FORMAT(
'END OF TESTS.' )
1070 9986
FORMAT(
' TRANS=''', a1,
''', M=', i5,
', N=', i5,
', NRHS=', i4,
1071 $
', NB=', i4,
', type', i2,
', test(', i2,
')=', g12.5 )