1 SUBROUTINE pzttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
71 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
76 INTEGER DBLESZ, ZPLXSZ
78 parameter( dblesz = 8, zplxsz = 16,
79 $ padval = ( -9923.0d+0, -9924.0d+0 ) )
81 parameter( timetests = 11 )
83 parameter( tests = 8 )
85 parameter( mintimen = 8 )
91 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
92 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
93 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
94 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
95 $ nps, nq, splitstimed, worksiz, worktrd
96 DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS
99 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
100 $ baltest( tests ), baltime( timetests ),
101 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
102 $ intertest( tests ), intertime( timetests ),
103 $ pnbtest( tests ), pnbtime( timetests ),
104 $ twogemmtest( tests ), twogemmtime( timetests )
105 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
108 EXTERNAL blacs_barrier, blacs_get, blacs_gridexit,
109 $ blacs_gridinfo, blacs_gridinit,
descinit,
116 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
117 DOUBLE PRECISION PZLANHE
118 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv, pzlanhe
121 INTRINSIC dble, int,
max, sqrt
125 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
126 $ lltblock, minsz, pnb, timeinternals, timing,
130 COMMON / blocksizes / gstblock, lltblock, bckblock,
132 COMMON / minsize / minsz
133 COMMON / pjlaenvtiming / timing
134 COMMON / tailoredopts / pnb, anb, interleave,
136 COMMON / timecontrol / timeinternals
139 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
140 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
141 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
142 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
144 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
146 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
147 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
148 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
149 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
150 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
154 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
162 memsiz = totmem / zplxsz
167 WRITE( nout, fmt = * )
168 WRITE( nout, fmt = 9995 )
169 WRITE( nout, fmt = 9994 )
170 WRITE( nout, fmt = 9993 )
171 WRITE( nout, fmt = * )
176 ngrids = int( sqrt( dble( nprocs ) ) )
186 CALL blacs_get( -1, 0, ictxt )
187 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
188 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
192 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
204 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
210 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
212 IF( ierr( 1 ).GT.0 )
THEN
214 $
WRITE( nout, fmt = 9997 )
'matrix'
221 IF( n.GT.mintimen )
THEN
232 maxtests = timetests + 2
239 DO 10 k = 1, maxtests
242 IF( k.GE.maxtests-1 )
THEN
258 dummy = pjlaenv( ictxt, 3,
'PZHETTRD',
'L', 0, 0,
265 balanced = baltime( k )
266 interleave = intertime( k )
267 twogemms = twogemmtime( k )
274 balanced = baltest( k )
275 interleave = intertest( k )
276 twogemms = twogemmtest( k )
284 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
285 CALL igebs2d( ictxt,
'All',
' ', 1, 1, splitstimed,
288 CALL igebr2d( ictxt,
'All',
' ', 1, 1, splitstimed, 1,
293 IF( splitstimed.EQ.0 .AND. k.EQ.maxtests )
305 np = numroc( n, nb, myrow, 0, nprow )
306 nq = numroc( n, nb, mycol, 0, npcol )
308 iprepad =
max( nb, np )
310 ipostpad =
max( nb, nq )
320 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
321 $
max( 1, np )+imidpad, ierr( 1 ) )
323 CALL descinit( descd, 1, n, nb, nb, 0, 0, ictxt, 1,
328 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
330 IF( ierr( 1 ).LT.0 )
THEN
332 $
WRITE( nout, fmt = 9997 )
'descriptor'
341 IF( lsame( uplo,
'U' ) )
THEN
344 noffd = numroc( n-1, nb, mycol, 0, npcol )
346 ndiag = iceil( dblesz*ndiag, zplxsz )
347 noffd = iceil( dblesz*noffd, zplxsz )
350 ipd = ipa + desca( lld_ )*nq + ipostpad + iprepad
351 ipe = ipd + ndiag + ipostpad + iprepad
352 ipt = ipe + noffd + ipostpad + iprepad
353 ipw = ipt + nq + ipostpad + iprepad
358 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
359 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
361 worktrd = lwmin + ipostpad
368 IF( nprow.NE.npcol )
THEN
369 lcm = ilcm( nprow, npcol )
370 itemp = nb*iceil( iceil( np, nb ), lcm / nprow ) +
373 itemp =
max( iceil( dblesz*itemp, zplxsz ),
375 worksiz =
max( lwmin, itemp ) + ipostpad
381 IF( ipw+worksiz.GT.memsiz )
THEN
383 $
WRITE( nout, fmt = 9996 )
'Tridiagonal reduction',
384 $ ( ipw+worksiz )*zplxsz
390 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
392 IF( ierr( 1 ).GT.0 )
THEN
394 $
WRITE( nout, fmt = 9997 )
'MEMORY'
403 CALL pzmatgen( ictxt,
'Hemm',
'N', desca( m_ ),
404 $ desca( n_ ), desca( mb_ ), desca( nb_ ),
405 $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
406 $ desca( csrc_ ), iaseed, 0, np, 0, nq,
407 $ myrow, mycol, nprow, npcol )
413 CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
414 $ desca( lld_ ), iprepad, ipostpad,
416 CALL pzfillpad( ictxt, ndiag, 1, mem( ipd-iprepad ),
417 $ ndiag, iprepad, ipostpad, padval )
418 CALL pzfillpad( ictxt, noffd, 1, mem( ipe-iprepad ),
419 $ noffd, iprepad, ipostpad, padval )
420 CALL pzfillpad( ictxt, nq, 1, mem( ipt-iprepad ), nq,
421 $ iprepad, ipostpad, padval )
422 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
423 $ mem( ipw-iprepad ), worksiz-ipostpad,
424 $ iprepad, ipostpad, padval )
425 anorm = pzlanhe(
'I', uplo, n, mem( ipa ), 1, 1,
426 $ desca, mem( ipw ) )
427 CALL pzchekpad( ictxt,
'PZLANHE', np, nq,
428 $ mem( ipa-iprepad ), desca( lld_ ),
429 $ iprepad, ipostpad, padval )
430 CALL pzchekpad( ictxt,
'PZLANHE', worksiz-ipostpad, 1,
431 $ mem( ipw-iprepad ), worksiz-ipostpad,
432 $ iprepad, ipostpad, padval )
433 CALL pzfillpad( ictxt, worktrd-ipostpad, 1,
434 $ mem( ipw-iprepad ), worktrd-ipostpad,
435 $ iprepad, ipostpad, padval )
439 CALL blacs_barrier( ictxt,
'All' )
444 CALL pzhettrd( uplo, n, mem( ipa ), 1, 1, desca,
445 $ mem( ipd ), mem( ipe ), mem( ipt ),
446 $ mem( ipw ), lwmin, info )
454 CALL pzchekpad( ictxt,
'PZHETTRD', np, nq,
455 $ mem( ipa-iprepad ), desca( lld_ ),
456 $ iprepad, ipostpad, padval )
457 CALL pzchekpad( ictxt,
'PZHETTRD', ndiag, 1,
458 $ mem( ipd-iprepad ), ndiag, iprepad,
461 CALL pzchekpad( ictxt,
'PZHETTRDc', noffd, 1,
462 $ mem( ipe-iprepad ), noffd, iprepad,
464 CALL pzchekpad( ictxt,
'PZHETTRDd', nq, 1,
465 $ mem( ipt-iprepad ), nq, iprepad,
467 CALL pzchekpad( ictxt,
'PZHETTRDe', worktrd-ipostpad,
468 $ 1, mem( ipw-iprepad ),
469 $ worktrd-ipostpad, iprepad, ipostpad,
471 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
472 $ mem( ipw-iprepad ), worksiz-ipostpad,
473 $ iprepad, ipostpad, padval )
477 CALL pzhetdrv( uplo, n, mem( ipa ), 1, 1, desca,
478 $ mem( ipd ), mem( ipe ), mem( ipt ),
479 $ mem( ipw ), ierr( 1 ) )
485 CALL pzlatran( n, 1, mem( ipa ), 1, 1, desca,
487 CALL pzlafchk(
'Hemm',
'No', n, n, mem( ipa ), 1, 1,
488 $ desca, iaseed, anorm, fresid,
493 CALL pzchekpad( ictxt,
'PZHETDRVf', np, nq,
494 $ mem( ipa-iprepad ), desca( lld_ ),
495 $ iprepad, ipostpad, padval )
496 CALL pzchekpad( ictxt,
'PZHETDRVg', ndiag, 1,
497 $ mem( ipd-iprepad ), ndiag, iprepad,
499 CALL pzchekpad( ictxt,
'PZHETDRVh', noffd, 1,
500 $ mem( ipe-iprepad ), noffd, iprepad,
502 CALL pzchekpad( ictxt,
'PZHETDRVi', worksiz-ipostpad,
503 $ 1, mem( ipw-iprepad ),
504 $ worksiz-ipostpad, iprepad, ipostpad,
509 IF( fresid.LE.thresh .AND. fresid-fresid.EQ.
510 $ 0.0d+0 .AND. ierr( 1 ).EQ.0 )
THEN
514 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
515 $
WRITE( nout, fmt = 9991 )fresid
523 IF( myrow.EQ.0 .AND. mycol.EQ.0 .AND. ierr( 1 ).NE.0 )
524 $
WRITE( nout, fmt = * )
'D or E copies incorrect ...'
530 fresid = fresid - fresid
536 CALL slcombine( ictxt,
'All',
'>',
'W', 50, 1, wtime )
537 CALL slcombine( ictxt,
'All',
'>',
'C', 50, 1, ctime )
541 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
546 nops = ( 16.0d+0 / 3.0d+0 )*nops**3
551 IF( wtime( 1 ).GT.0.0d+0 )
THEN
552 tmflops = nops / wtime( 1 )
556 IF( wtime( 1 ).GE.0.0d+0 )
557 $
WRITE( nout, fmt = 9992 )
'WALL', n, interleave,
558 $ twogemms, balanced, anb, pnb, nprow*npcol,
559 $ wtime( 1 ), tmflops, fresid, passed
563 IF( ctime( 1 ).GT.0.0d+0 )
THEN
564 tmflops = nops / ctime( 1 )
568 IF( ctime( 1 ).GE.0.0d+0 )
569 $
WRITE( nout, fmt = 9992 )
'CPU ', n, interleave,
570 $ twogemms, balanced, anb, pnb, nprow*npcol,
571 $ ctime( 1 ), tmflops, fresid, passed
577 IF( wtime( 13 )+wtime( 15 )+wtime( 16 ).GT.0.0d+0 .OR.
578 $ ctime( 13 )+ctime( 15 )+ctime( 16 ).GT.0.0d+0 )
582 IF( splitstimed.EQ.1 )
THEN
583 WRITE( nout, fmt = 9990 )wtime( 10 ), wtime( 11 ),
584 $ wtime( 12 ), wtime( 13 ), wtime( 14 ),
586 WRITE( nout, fmt = 9989 )wtime( 16 ), wtime( 17 ),
587 $ wtime( 18 ), wtime( 19 ), wtime( 20 ),
590 WRITE( nout, fmt = 9988 )ctime( 10 ), ctime( 11 ),
591 $ ctime( 12 ), ctime( 13 ), ctime( 14 ),
593 WRITE( nout, fmt = 9987 )ctime( 16 ), ctime( 17 ),
594 $ ctime( 18 ), ctime( 19 ), ctime( 20 ),
596 WRITE( nout, fmt = 9986 )n, nprow*npcol, pnb, anb,
597 $ interleave, balanced, twogemms, timeinternals
603 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
604 IF( splitstimed.EQ.1 )
THEN
605 WRITE( nout, fmt = 9985 )
606 WRITE( nout, fmt = 9984 )
607 WRITE( nout, fmt = 9983 )
608 WRITE( nout, fmt = 9982 )
609 WRITE( nout, fmt = 9981 )
610 WRITE( nout, fmt = 9980 )
611 WRITE( nout, fmt = 9979 )
612 WRITE( nout, fmt = 9978 )
613 WRITE( nout, fmt = 9977 )
614 WRITE( nout, fmt = 9976 )
615 WRITE( nout, fmt = 9975 )
616 WRITE( nout, fmt = 9974 )
617 WRITE( nout, fmt = 9973 )
622 CALL blacs_gridexit( ictxt )
626 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
627 $
'; It should be at least 1' )
628 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
630 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
631 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
634 9995
FORMAT(
'PZHETTRD, tailored reduction to tridiagonal form, test.'
636 9994
FORMAT(
'TIME N int 2gm bal anb pnb prcs TRD Time ',
637 $
' MFLOPS Residual CHECK' )
638 9993
FORMAT(
'---- ---- --- --- --- --- --- ---- -------- ',
639 $
'----------- -------- ------' )
640 9992
FORMAT( a4, 1x, i5, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x,
641 $ i5, 1x, f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
642 9991
FORMAT(
'||A - Q*T*Q''|| / (||A|| * N * eps) = ', g25.7 )
643 9990
FORMAT(
'wsplit1=[wsplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
644 $ 1x, f9.2, 1x, f9.2,
' ];' )
645 9989
FORMAT(
'wsplit2=[wsplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
646 $ 1x, f9.2, 1x, f9.2,
' ];' )
647 9988
FORMAT(
'csplit1=[csplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
648 $ 1x, f9.2, 1x, f9.2,
' ];' )
649 9987
FORMAT(
'csplit2=[csplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
650 $ 1x, f9.2, 1x, f9.2,
' ];' )
651 9986
FORMAT(
'size_opts=[size_opts;', i4, 1x, i4, 1x, i4, 1x, i4, 1x,
652 $ i4, 1x, i4, 1x, i4, 1x, i4, 1x,
' ];' )
653 9985
FORMAT(
'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;',
654 $
' TWOGEMMS=7; TIMEINTERNALS=8;' )
655 9984
FORMAT(
'S1_OVERHEAD = 1; % Should be mainly cost of barrier' )
656 9983
FORMAT(
'S1_BARRIER = 2; % Cost of barrier' )
657 9982
FORMAT(
'S1_UPDCURCOL = 3; % Update the current column' )
658 9981
FORMAT(
'S1_HOUSE = 4; % Compute the householder vector' )
659 9980
FORMAT(
'S1_SPREAD = 5; % Spread across' )
660 9979
FORMAT(
'S1_TRANSPOSE = 6; % Transpose' )
661 9978
FORMAT(
'S2_UPDCURBLK = 1; % Update the current block column' )
662 9977
FORMAT(
'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' )
663 9976
FORMAT(
'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' )
664 9975
FORMAT(
'S2_TRANS_SUM = 4; % v = v + vt'' ' )
665 9974
FORMAT(
'S2_DOT = 5; % c = v'' * h ' )
666 9973
FORMAT(
'S2_R2K = 6; % A = A - v * h'' - h * v'' ' )