1 SUBROUTINE pdttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
17 DOUBLE PRECISION MEM( * )
71 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
77 DOUBLE PRECISION PADVAL
78 parameter( dblesz = 8, padval = -9923.0d+0 )
80 parameter( timetests = 11 )
82 parameter( tests = 8 )
84 parameter( mintimen = 8 )
90 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
91 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
92 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
93 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
94 $ nps, nq, splitstimed, worksiz, worktrd
95 DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS
98 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
99 $ baltest( tests ), baltime( timetests ),
100 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
101 $ intertest( tests ), intertime( timetests ),
102 $ pnbtest( tests ), pnbtime( timetests ),
103 $ twogemmtest( tests ), twogemmtime( timetests )
104 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
107 EXTERNAL blacs_barrier, blacs_get, blacs_gridexit,
108 $ blacs_gridinfo, blacs_gridinit,
descinit,
115 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
116 DOUBLE PRECISION PDLANSY
117 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv, pdlansy
120 INTRINSIC dble, int,
max, sqrt
124 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
125 $ lltblock, minsz, pnb, timeinternals, timing,
129 COMMON / blocksizes / gstblock, lltblock, bckblock,
131 COMMON / minsize / minsz
132 COMMON / pjlaenvtiming / timing
133 COMMON / tailoredopts / pnb, anb, interleave,
135 COMMON / timecontrol / timeinternals
138 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
139 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
140 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
141 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
143 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
145 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
146 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
147 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
148 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
149 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
153 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
161 memsiz = totmem / dblesz
166 WRITE( nout, fmt = * )
167 WRITE( nout, fmt = 9995 )
168 WRITE( nout, fmt = 9994 )
169 WRITE( nout, fmt = 9993 )
170 WRITE( nout, fmt = * )
175 ngrids = int( sqrt( dble( nprocs ) ) )
185 CALL blacs_get( -1, 0, ictxt )
186 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
187 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
191 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
203 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
209 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
211 IF( ierr( 1 ).GT.0 )
THEN
213 $
WRITE( nout, fmt = 9997 )
'matrix'
220 IF( n.GT.mintimen )
THEN
231 maxtests = timetests + 2
238 DO 10 k = 1, maxtests
241 IF( k.GE.maxtests-1 )
THEN
257 dummy = pjlaenv( ictxt, 3,
'PDSYTTRD',
'L', 0, 0,
264 balanced = baltime( k )
265 interleave = intertime( k )
266 twogemms = twogemmtime( k )
273 balanced = baltest( k )
274 interleave = intertest( k )
275 twogemms = twogemmtest( k )
283 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
284 CALL igebs2d( ictxt,
'All',
' ', 1, 1, splitstimed,
287 CALL igebr2d( ictxt,
'All',
' ', 1, 1, splitstimed, 1,
292 IF( splitstimed.EQ.0 .AND. k.EQ.maxtests )
304 np = numroc( n, nb, myrow, 0, nprow )
305 nq = numroc( n, nb, mycol, 0, npcol )
307 iprepad =
max( nb, np )
309 ipostpad =
max( nb, nq )
319 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
320 $
max( 1, np )+imidpad, ierr( 1 ) )
322 CALL descinit( descd, 1, n, nb, nb, 0, 0, ictxt, 1,
327 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
329 IF( ierr( 1 ).LT.0 )
THEN
331 $
WRITE( nout, fmt = 9997 )
'descriptor'
340 IF( lsame( uplo,
'U' ) )
THEN
343 noffd = numroc( n-1, nb, mycol, 0, npcol )
347 ipd = ipa + desca( lld_ )*nq + ipostpad + iprepad
348 ipe = ipd + ndiag + ipostpad + iprepad
349 ipt = ipe + noffd + ipostpad + iprepad
350 ipw = ipt + nq + ipostpad + iprepad
355 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
356 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
358 worktrd = lwmin + ipostpad
365 IF( nprow.NE.npcol )
THEN
366 lcm = ilcm( nprow, npcol )
367 itemp = nb*iceil( iceil( np, nb ), lcm / nprow ) +
370 itemp =
max( itemp, 2*( nb+np )*nb )
371 worksiz =
max( lwmin, itemp ) + ipostpad
377 IF( ipw+worksiz.GT.memsiz )
THEN
379 $
WRITE( nout, fmt = 9996 )
'Tridiagonal reduction',
380 $ ( ipw+worksiz )*dblesz
386 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
388 IF( ierr( 1 ).GT.0 )
THEN
390 $
WRITE( nout, fmt = 9997 )
'MEMORY'
399 CALL pdmatgen( ictxt,
'Hemm',
'N', desca( m_ ),
400 $ desca( n_ ), desca( mb_ ), desca( nb_ ),
401 $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
402 $ desca( csrc_ ), iaseed, 0, np, 0, nq,
403 $ myrow, mycol, nprow, npcol )
409 CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
410 $ desca( lld_ ), iprepad, ipostpad,
412 CALL pdfillpad( ictxt, ndiag, 1, mem( ipd-iprepad ),
413 $ ndiag, iprepad, ipostpad, padval )
414 CALL pdfillpad( ictxt, noffd, 1, mem( ipe-iprepad ),
415 $ noffd, iprepad, ipostpad, padval )
416 CALL pdfillpad( ictxt, nq, 1, mem( ipt-iprepad ), nq,
417 $ iprepad, ipostpad, padval )
418 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
419 $ mem( ipw-iprepad ), worksiz-ipostpad,
420 $ iprepad, ipostpad, padval )
421 anorm = pdlansy(
'I', uplo, n, mem( ipa ), 1, 1,
422 $ desca, mem( ipw ) )
423 CALL pdchekpad( ictxt,
'PDLANSY', np, nq,
424 $ mem( ipa-iprepad ), desca( lld_ ),
425 $ iprepad, ipostpad, padval )
426 CALL pdchekpad( ictxt,
'PDLANSY', worksiz-ipostpad, 1,
427 $ mem( ipw-iprepad ), worksiz-ipostpad,
428 $ iprepad, ipostpad, padval )
429 CALL pdfillpad( ictxt, worktrd-ipostpad, 1,
430 $ mem( ipw-iprepad ), worktrd-ipostpad,
431 $ iprepad, ipostpad, padval )
435 CALL blacs_barrier( ictxt,
'All' )
440 CALL pdsyttrd( uplo, n, mem( ipa ), 1, 1, desca,
441 $ mem( ipd ), mem( ipe ), mem( ipt ),
442 $ mem( ipw ), lwmin, info )
450 CALL pdchekpad( ictxt,
'PDSYTTRD', np, nq,
451 $ mem( ipa-iprepad ), desca( lld_ ),
452 $ iprepad, ipostpad, padval )
453 CALL pdchekpad( ictxt,
'PDSYTTRD', ndiag, 1,
454 $ mem( ipd-iprepad ), ndiag, iprepad,
457 CALL pdchekpad( ictxt,
'PDSYTTRDc', noffd, 1,
458 $ mem( ipe-iprepad ), noffd, iprepad,
460 CALL pdchekpad( ictxt,
'PDSYTTRDd', nq, 1,
461 $ mem( ipt-iprepad ), nq, iprepad,
463 CALL pdchekpad( ictxt,
'PDSYTTRDe', worktrd-ipostpad,
464 $ 1, mem( ipw-iprepad ),
465 $ worktrd-ipostpad, iprepad, ipostpad,
467 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
468 $ mem( ipw-iprepad ), worksiz-ipostpad,
469 $ iprepad, ipostpad, padval )
473 CALL pdsytdrv( uplo, n, mem( ipa ), 1, 1, desca,
474 $ mem( ipd ), mem( ipe ), mem( ipt ),
475 $ mem( ipw ), ierr( 1 ) )
481 CALL pdlatran( n, 1, mem( ipa ), 1, 1, desca,
483 CALL pdlafchk(
'Hemm',
'No', n, n, mem( ipa ), 1, 1,
484 $ desca, iaseed, anorm, fresid,
489 CALL pdchekpad( ictxt,
'PDSYTDRVf', np, nq,
490 $ mem( ipa-iprepad ), desca( lld_ ),
491 $ iprepad, ipostpad, padval )
492 CALL pdchekpad( ictxt,
'PDSYTDRVg', ndiag, 1,
493 $ mem( ipd-iprepad ), ndiag, iprepad,
495 CALL pdchekpad( ictxt,
'PDSYTDRVh', noffd, 1,
496 $ mem( ipe-iprepad ), noffd, iprepad,
498 CALL pdchekpad( ictxt,
'PDSYTDRVi', worksiz-ipostpad,
499 $ 1, mem( ipw-iprepad ),
500 $ worksiz-ipostpad, iprepad, ipostpad,
505 IF( fresid.LE.thresh .AND. fresid-fresid.EQ.
506 $ 0.0d+0 .AND. ierr( 1 ).EQ.0 )
THEN
510 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
511 $
WRITE( nout, fmt = 9991 )fresid
519 IF( myrow.EQ.0 .AND. mycol.EQ.0 .AND. ierr( 1 ).NE.0 )
520 $
WRITE( nout, fmt = * )
'D or E copies incorrect ...'
526 fresid = fresid - fresid
532 CALL slcombine( ictxt,
'All',
'>',
'W', 50, 1, wtime )
533 CALL slcombine( ictxt,
'All',
'>',
'C', 50, 1, ctime )
537 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
542 nops = ( 16.0d+0 / 3.0d+0 )*nops**3
547 IF( wtime( 1 ).GT.0.0d+0 )
THEN
548 tmflops = nops / wtime( 1 )
552 IF( wtime( 1 ).GE.0.0d+0 )
553 $
WRITE( nout, fmt = 9992 )
'WALL', n, interleave,
554 $ twogemms, balanced, anb, pnb, nprow*npcol,
555 $ wtime( 1 ), tmflops, fresid, passed
559 IF( ctime( 1 ).GT.0.0d+0 )
THEN
560 tmflops = nops / ctime( 1 )
564 IF( ctime( 1 ).GE.0.0d+0 )
565 $
WRITE( nout, fmt = 9992 )
'CPU ', n, interleave,
566 $ twogemms, balanced, anb, pnb, nprow*npcol,
567 $ ctime( 1 ), tmflops, fresid, passed
573 IF( wtime( 13 )+wtime( 15 )+wtime( 16 ).GT.0.0d+0 .OR.
574 $ ctime( 13 )+ctime( 15 )+ctime( 16 ).GT.0.0d+0 )
578 IF( splitstimed.EQ.1 )
THEN
579 WRITE( nout, fmt = 9990 )wtime( 10 ), wtime( 11 ),
580 $ wtime( 12 ), wtime( 13 ), wtime( 14 ),
582 WRITE( nout, fmt = 9989 )wtime( 16 ), wtime( 17 ),
583 $ wtime( 18 ), wtime( 19 ), wtime( 20 ),
586 WRITE( nout, fmt = 9988 )ctime( 10 ), ctime( 11 ),
587 $ ctime( 12 ), ctime( 13 ), ctime( 14 ),
589 WRITE( nout, fmt = 9987 )ctime( 16 ), ctime( 17 ),
590 $ ctime( 18 ), ctime( 19 ), ctime( 20 ),
592 WRITE( nout, fmt = 9986 )n, nprow*npcol, pnb, anb,
593 $ interleave, balanced, twogemms, timeinternals
599 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
600 IF( splitstimed.EQ.1 )
THEN
601 WRITE( nout, fmt = 9985 )
602 WRITE( nout, fmt = 9984 )
603 WRITE( nout, fmt = 9983 )
604 WRITE( nout, fmt = 9982 )
605 WRITE( nout, fmt = 9981 )
606 WRITE( nout, fmt = 9980 )
607 WRITE( nout, fmt = 9979 )
608 WRITE( nout, fmt = 9978 )
609 WRITE( nout, fmt = 9977 )
610 WRITE( nout, fmt = 9976 )
611 WRITE( nout, fmt = 9975 )
612 WRITE( nout, fmt = 9974 )
613 WRITE( nout, fmt = 9973 )
618 CALL blacs_gridexit( ictxt )
622 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
623 $
'; It should be at least 1' )
624 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
626 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
627 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
630 9995
FORMAT(
'PDSYTTRD, tailored reduction to tridiagonal form, test.'
632 9994
FORMAT(
'TIME N int 2gm bal anb pnb prcs TRD Time ',
633 $
' MFLOPS Residual CHECK' )
634 9993
FORMAT(
'---- ---- --- --- --- --- --- ---- -------- ',
635 $
'----------- -------- ------' )
636 9992
FORMAT( a4, 1x, i5, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x,
637 $ i5, 1x, f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
638 9991
FORMAT(
'||A - Q*T*Q''|| / (||A|| * N * eps) = ', g25.7 )
639 9990
FORMAT(
'wsplit1=[wsplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
640 $ 1x, f9.2, 1x, f9.2,
' ];' )
641 9989
FORMAT(
'wsplit2=[wsplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
642 $ 1x, f9.2, 1x, f9.2,
' ];' )
643 9988
FORMAT(
'csplit1=[csplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
644 $ 1x, f9.2, 1x, f9.2,
' ];' )
645 9987
FORMAT(
'csplit2=[csplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
646 $ 1x, f9.2, 1x, f9.2,
' ];' )
647 9986
FORMAT(
'size_opts=[size_opts;', i4, 1x, i4, 1x, i4, 1x, i4, 1x,
648 $ i4, 1x, i4, 1x, i4, 1x, i4, 1x,
' ];' )
649 9985
FORMAT(
'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;',
650 $
' TWOGEMMS=7; TIMEINTERNALS=8;' )
651 9984
FORMAT(
'S1_OVERHEAD = 1; % Should be mainly cost of barrier' )
652 9983
FORMAT(
'S1_BARRIER = 2; % Cost of barrier' )
653 9982
FORMAT(
'S1_UPDCURCOL = 3; % Update the current column' )
654 9981
FORMAT(
'S1_HOUSE = 4; % Compute the householder vector' )
655 9980
FORMAT(
'S1_SPREAD = 5; % Spread across' )
656 9979
FORMAT(
'S1_TRANSPOSE = 6; % Transpose' )
657 9978
FORMAT(
'S2_UPDCURBLK = 1; % Update the current block column' )
658 9977
FORMAT(
'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' )
659 9976
FORMAT(
'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' )
660 9975
FORMAT(
'S2_TRANS_SUM = 4; % v = v + vt'' ' )
661 9974
FORMAT(
'S2_DOT = 5; % c = v'' * h ' )
662 9973
FORMAT(
'S2_R2K = 6; % A = A - v * h'' - h * v'' ' )