1 SUBROUTINE psttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
70 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
71 $ mb_, nb_, rsrc_, csrc_, lld_
72 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
73 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
74 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
77 parameter( realsz = 4, padval = -9923.0e+0 )
79 parameter( timetests = 11 )
81 parameter( tests = 8 )
83 parameter( mintimen = 8 )
89 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
90 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
91 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
92 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
93 $ nps, nq, splitstimed, worksiz, worktrd
95 DOUBLE PRECISION NOPS, TMFLOPS
98 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
99 $ baltest( tests ), baltime( timetests ),
100 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
101 $ intertest( tests ), intertime( timetests ),
102 $ pnbtest( tests ), pnbtime( timetests ),
103 $ twogemmtest( tests ), twogemmtime( timetests )
104 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
107 EXTERNAL blacs_barrier, blacs_get, blacs_gridexit,
108 $ blacs_gridinfo, blacs_gridinit,
descinit,
115 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
117 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv, pslansy
120 INTRINSIC dble, int,
max, real, sqrt
124 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
125 $ lltblock, minsz, pnb, timeinternals, timing,
129 COMMON / blocksizes / gstblock, lltblock, bckblock,
131 COMMON / minsize / minsz
132 COMMON / pjlaenvtiming / timing
133 COMMON / tailoredopts / pnb, anb, interleave,
135 COMMON / timecontrol / timeinternals
138 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
139 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
140 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
141 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
143 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
145 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
146 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
147 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
148 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
149 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
153 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
161 memsiz = totmem / realsz
166 WRITE( nout, fmt = * )
167 WRITE( nout, fmt = 9995 )
168 WRITE( nout, fmt = 9994 )
169 WRITE( nout, fmt = 9993 )
170 WRITE( nout, fmt = * )
175 ngrids = int( sqrt( real( nprocs ) ) )
185 CALL blacs_get( -1, 0, ictxt )
186 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
187 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
191 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
203 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
209 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
211 IF( ierr( 1 ).GT.0 )
THEN
213 $
WRITE( nout, fmt = 9997 )
'matrix'
220 IF( n.GT.mintimen )
THEN
231 maxtests = timetests + 2
238 DO 10 k = 1, maxtests
241 IF( k.GE.maxtests-1 )
THEN
257 dummy = pjlaenv( ictxt, 3,
'PSSYTTRD',
'L', 0, 0,
264 balanced = baltime( k )
265 interleave = intertime( k )
266 twogemms = twogemmtime( k )
273 balanced = baltest( k )
274 interleave = intertest( k )
275 twogemms = twogemmtest( k )
283 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
284 CALL igebs2d( ictxt,
'All',
' ', 1, 1, splitstimed,
287 CALL igebr2d( ictxt,
'All',
' ', 1, 1, splitstimed, 1,
292 IF( splitstimed.EQ.0 .AND. k.EQ.maxtests )
304 np = numroc( n, nb, myrow, 0, nprow )
305 nq = numroc( n, nb, mycol, 0, npcol )
307 iprepad =
max( nb, np )
309 ipostpad =
max( nb, nq )
319 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
320 $
max( 1, np )+imidpad, ierr( 1 ) )
322 CALL descinit( descd, 1, n, nb, nb, 0, 0, ictxt, 1,
327 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
329 IF( ierr( 1 ).LT.0 )
THEN
331 $
WRITE( nout, fmt = 9997 )
'descriptor'
340 IF( lsame( uplo,
'U' ) )
THEN
343 noffd = numroc( n-1, nb, mycol, 0, npcol )
347 ipd = ipa + desca( lld_ )*nq + ipostpad + iprepad
348 ipe = ipd + ndiag + ipostpad + iprepad
349 ipt = ipe + noffd + ipostpad + iprepad
350 ipw = ipt + nq + ipostpad + iprepad
355 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
356 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
358 worktrd = lwmin + ipostpad
365 IF( nprow.NE.npcol )
THEN
366 lcm = ilcm( nprow, npcol )
367 itemp = nb*iceil( iceil( np, nb ), lcm / nprow ) +
370 itemp =
max( itemp, 2*( nb+np )*nb )
371 worksiz =
max( lwmin, itemp ) + ipostpad
377 IF( ipw+worksiz.GT.memsiz )
THEN
379 $
WRITE( nout, fmt = 9996 )
'Tridiagonal reduction',
380 $ ( ipw+worksiz )*realsz
386 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
388 IF( ierr( 1 ).GT.0 )
THEN
390 $
WRITE( nout, fmt = 9997 )
'MEMORY'
399 CALL psmatgen( ictxt,
'Hemm',
'N', desca( m_ ),
400 $ desca( n_ ), desca( mb_ ), desca( nb_ ),
401 $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
402 $ desca( csrc_ ), iaseed, 0, np, 0, nq,
403 $ myrow, mycol, nprow, npcol )
409 CALL psfillpad( ictxt, np, nq, mem( ipa-iprepad ),
410 $ desca( lld_ ), iprepad, ipostpad,
412 CALL psfillpad( ictxt, ndiag, 1, mem( ipd-iprepad ),
413 $ ndiag, iprepad, ipostpad, padval )
414 CALL psfillpad( ictxt, noffd, 1, mem( ipe-iprepad ),
415 $ noffd, iprepad, ipostpad, padval )
416 CALL psfillpad( ictxt, nq, 1, mem( ipt-iprepad ), nq,
417 $ iprepad, ipostpad, padval )
418 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
419 $ mem( ipw-iprepad ), worksiz-ipostpad,
420 $ iprepad, ipostpad, padval )
421 anorm = pslansy(
'I', uplo, n, mem( ipa ), 1, 1,
422 $ desca, mem( ipw ) )
423 CALL pschekpad( ictxt,
'PSLANSY', np, nq,
424 $ mem( ipa-iprepad ), desca( lld_ ),
425 $ iprepad, ipostpad, padval )
426 CALL pschekpad( ictxt,
'PSLANSY', worksiz-ipostpad, 1,
427 $ mem( ipw-iprepad ), worksiz-ipostpad,
428 $ iprepad, ipostpad, padval )
429 CALL psfillpad( ictxt, worktrd-ipostpad, 1,
430 $ mem( ipw-iprepad ), worktrd-ipostpad,
431 $ iprepad, ipostpad, padval )
435 CALL blacs_barrier( ictxt,
'All' )
440 CALL pssyttrd( uplo, n, mem( ipa ), 1, 1, desca,
441 $ mem( ipd ), mem( ipe ), mem( ipt ),
442 $ mem( ipw ), lwmin, info )
450 CALL pschekpad( ictxt,
'PSSYTTRD', np, nq,
451 $ mem( ipa-iprepad ), desca( lld_ ),
452 $ iprepad, ipostpad, padval )
453 CALL pschekpad( ictxt,
'PSSYTTRD', ndiag, 1,
454 $ mem( ipd-iprepad ), ndiag, iprepad,
457 CALL pschekpad( ictxt,
'PSSYTTRDc', noffd, 1,
458 $ mem( ipe-iprepad ), noffd, iprepad,
460 CALL pschekpad( ictxt,
'PSSYTTRDd', nq, 1,
461 $ mem( ipt-iprepad ), nq, iprepad,
463 CALL pschekpad( ictxt,
'PSSYTTRDe', worktrd-ipostpad,
464 $ 1, mem( ipw-iprepad ),
465 $ worktrd-ipostpad, iprepad, ipostpad,
467 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
468 $ mem( ipw-iprepad ), worksiz-ipostpad,
469 $ iprepad, ipostpad, padval )
473 CALL pssytdrv( uplo, n, mem( ipa ), 1, 1, desca,
474 $ mem( ipd ), mem( ipe ), mem( ipt ),
475 $ mem( ipw ), ierr( 1 ) )
481 CALL pslatran( n, 1, mem( ipa ), 1, 1, desca,
483 CALL pslafchk(
'Hemm',
'No', n, n, mem( ipa ), 1, 1,
484 $ desca, iaseed, anorm, fresid,
489 CALL pschekpad( ictxt,
'PSSYTDRVf', np, nq,
490 $ mem( ipa-iprepad ), desca( lld_ ),
491 $ iprepad, ipostpad, padval )
492 CALL pschekpad( ictxt,
'PSSYTDRVg', ndiag, 1,
493 $ mem( ipd-iprepad ), ndiag, iprepad,
495 CALL pschekpad( ictxt,
'PSSYTDRVh', noffd, 1,
496 $ mem( ipe-iprepad ), noffd, iprepad,
498 CALL pschekpad( ictxt,
'PSSYTDRVi', worksiz-ipostpad,
499 $ 1, mem( ipw-iprepad ),
500 $ worksiz-ipostpad, iprepad, ipostpad,
505 IF( fresid.LE.thresh .AND. fresid-fresid.EQ.
506 $ 0.0e+0 .AND. ierr( 1 ).EQ.0 )
THEN
510 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
511 $
WRITE( nout, fmt = 9991 )fresid
519 IF( myrow.EQ.0 .AND. mycol.EQ.0 .AND. ierr( 1 ).NE.0 )
520 $
WRITE( nout, fmt = * )
'D or E copies incorrect ...'
526 fresid = fresid - fresid
532 CALL slcombine( ictxt,
'All',
'>',
'W', 50, 1, wtime )
533 CALL slcombine( ictxt,
'All',
'>',
'C', 50, 1, ctime )
537 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
542 nops = ( 16.0d+0 / 3.0d+0 )*nops**3
547 IF( wtime( 1 ).GT.0.0d+0 )
THEN
548 tmflops = nops / wtime( 1 )
552 IF( wtime( 1 ).GE.0.0d+0 )
553 $
WRITE( nout, fmt = 9992 )
'WALL', n, interleave,
554 $ twogemms, balanced, anb, pnb, nprow*npcol,
555 $ wtime( 1 ), tmflops, fresid, passed
559 IF( ctime( 1 ).GT.0.0d+0 )
THEN
560 tmflops = nops / ctime( 1 )
564 IF( ctime( 1 ).GE.0.0d+0 )
565 $
WRITE( nout, fmt = 9992 )
'CPU ', n, interleave,
566 $ twogemms, balanced, anb, pnb, nprow*npcol,
567 $ ctime( 1 ), tmflops, fresid, passed
573 IF( wtime( 13 )+wtime( 15 )+wtime( 16 ).GT.0.0d+0 .OR.
574 $ ctime( 13 )+ctime( 15 )+ctime( 16 ).GT.0.0d+0 )
578 IF( splitstimed.EQ.1 )
THEN
579 WRITE( nout, fmt = 9990 )wtime( 10 ), wtime( 11 ),
580 $ wtime( 12 ), wtime( 13 ), wtime( 14 ),
582 WRITE( nout, fmt = 9989 )wtime( 16 ), wtime( 17 ),
583 $ wtime( 18 ), wtime( 19 ), wtime( 20 ),
586 WRITE( nout, fmt = 9988 )ctime( 10 ), ctime( 11 ),
587 $ ctime( 12 ), ctime( 13 ), ctime( 14 ),
589 WRITE( nout, fmt = 9987 )ctime( 16 ), ctime( 17 ),
590 $ ctime( 18 ), ctime( 19 ), ctime( 20 ),
592 WRITE( nout, fmt = 9986 )n, nprow*npcol, pnb, anb,
593 $ interleave, balanced, twogemms, timeinternals
599 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
600 IF( splitstimed.EQ.1 )
THEN
601 WRITE( nout, fmt = 9985 )
602 WRITE( nout, fmt = 9984 )
603 WRITE( nout, fmt = 9983 )
604 WRITE( nout, fmt = 9982 )
605 WRITE( nout, fmt = 9981 )
606 WRITE( nout, fmt = 9980 )
607 WRITE( nout, fmt = 9979 )
608 WRITE( nout, fmt = 9978 )
609 WRITE( nout, fmt = 9977 )
610 WRITE( nout, fmt = 9976 )
611 WRITE( nout, fmt = 9975 )
612 WRITE( nout, fmt = 9974 )
613 WRITE( nout, fmt = 9973 )
618 CALL blacs_gridexit( ictxt )
622 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
623 $
'; It should be at least 1' )
624 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
626 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
627 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
630 9995
FORMAT(
'PSSYTTRD, tailored reduction to tridiagonal form, test.'
632 9994
FORMAT(
'TIME N int 2gm bal anb pnb prcs TRD Time ',
633 $
' MFLOPS Residual CHECK' )
634 9993
FORMAT(
'---- ---- --- --- --- --- --- ---- -------- ',
635 $
'----------- -------- ------' )
636 9992
FORMAT( a4, 1x, i5, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x,
637 $ i5, 1x, f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
638 9991
FORMAT(
'||A - Q*T*Q''|| / (||A|| * N * eps) = ', g25.7 )
639 9990
FORMAT(
'wsplit1=[wsplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
640 $ 1x, f9.2, 1x, f9.2,
' ];' )
641 9989
FORMAT(
'wsplit2=[wsplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
642 $ 1x, f9.2, 1x, f9.2,
' ];' )
643 9988
FORMAT(
'csplit1=[csplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
644 $ 1x, f9.2, 1x, f9.2,
' ];' )
645 9987
FORMAT(
'csplit2=[csplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
646 $ 1x, f9.2, 1x, f9.2,
' ];' )
647 9986
FORMAT(
'size_opts=[size_opts;', i4, 1x, i4, 1x, i4, 1x, i4, 1x,
648 $ i4, 1x, i4, 1x, i4, 1x, i4, 1x,
' ];' )
649 9985
FORMAT(
'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;',
650 $
' TWOGEMMS=7; TIMEINTERNALS=8;' )
651 9984
FORMAT(
'S1_OVERHEAD = 1; % Should be mainly cost of barrier' )
652 9983
FORMAT(
'S1_BARRIER = 2; % Cost of barrier' )
653 9982
FORMAT(
'S1_UPDCURCOL = 3; % Update the current column' )
654 9981
FORMAT(
'S1_HOUSE = 4; % Compute the householder vector' )
655 9980
FORMAT(
'S1_SPREAD = 5; % Spread across' )
656 9979
FORMAT(
'S1_TRANSPOSE = 6; % Transpose' )
657 9978
FORMAT(
'S2_UPDCURBLK = 1; % Update the current block column' )
658 9977
FORMAT(
'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' )
659 9976
FORMAT(
'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' )
660 9975
FORMAT(
'S2_TRANS_SUM = 4; % v = v + vt'' ' )
661 9974
FORMAT(
'S2_DOT = 5; % c = v'' * h ' )
662 9973
FORMAT(
'S2_R2K = 6; % A = A - v * h'' - h * v'' ' )