1 SUBROUTINE pcttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
71 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
76 INTEGER REALSZ, CPLXSZ
78 parameter( realsz = 4, cplxsz = 8,
79 $ padval = ( -9923.0e+0, -9924.0e+0 ) )
81 parameter( timetests = 11 )
83 parameter( tests = 8 )
85 parameter( mintimen = 8 )
91 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
92 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
93 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
94 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
95 $ nps, nq, splitstimed, worksiz, worktrd
97 DOUBLE PRECISION NOPS, TMFLOPS
100 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
101 $ baltest( tests ), baltime( timetests ),
102 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
103 $ intertest( tests ), intertime( timetests ),
104 $ pnbtest( tests ), pnbtime( timetests ),
105 $ twogemmtest( tests ), twogemmtime( timetests )
106 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
109 EXTERNAL blacs_barrier, blacs_get, blacs_gridexit,
110 $ blacs_gridinfo, blacs_gridinit,
descinit,
117 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
119 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv, pclanhe
122 INTRINSIC dble, int,
max, real, sqrt
126 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
127 $ lltblock, minsz, pnb, timeinternals, timing,
131 COMMON / blocksizes / gstblock, lltblock, bckblock,
133 COMMON / minsize / minsz
134 COMMON / pjlaenvtiming / timing
135 COMMON / tailoredopts / pnb, anb, interleave,
137 COMMON / timecontrol / timeinternals
140 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
141 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
142 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
143 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
145 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
147 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
148 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
149 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
150 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
151 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
155 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
163 memsiz = totmem / cplxsz
168 WRITE( nout, fmt = * )
169 WRITE( nout, fmt = 9995 )
170 WRITE( nout, fmt = 9994 )
171 WRITE( nout, fmt = 9993 )
172 WRITE( nout, fmt = * )
177 ngrids = int( sqrt( real( nprocs ) ) )
187 CALL blacs_get( -1, 0, ictxt )
188 CALL blacs_gridinit( ictxt,
'Row-major', nprow, npcol )
189 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
193 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
205 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
211 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
213 IF( ierr( 1 ).GT.0 )
THEN
215 $
WRITE( nout, fmt = 9997 )
'matrix'
222 IF( n.GT.mintimen )
THEN
233 maxtests = timetests + 2
240 DO 10 k = 1, maxtests
243 IF( k.GE.maxtests-1 )
THEN
259 dummy = pjlaenv( ictxt, 3,
'PCHETTRD',
'L', 0, 0,
266 balanced = baltime( k )
267 interleave = intertime( k )
268 twogemms = twogemmtime( k )
275 balanced = baltest( k )
276 interleave = intertest( k )
277 twogemms = twogemmtest( k )
285 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
286 CALL igebs2d( ictxt,
'All',
' ', 1, 1, splitstimed,
289 CALL igebr2d( ictxt,
'All',
' ', 1, 1, splitstimed, 1,
294 IF( splitstimed.EQ.0 .AND. k.EQ.maxtests )
306 np = numroc( n, nb, myrow, 0, nprow )
307 nq = numroc( n, nb, mycol, 0, npcol )
309 iprepad =
max( nb, np )
311 ipostpad =
max( nb, nq )
321 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
322 $
max( 1, np )+imidpad, ierr( 1 ) )
324 CALL descinit( descd, 1, n, nb, nb, 0, 0, ictxt, 1,
329 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
331 IF( ierr( 1 ).LT.0 )
THEN
333 $
WRITE( nout, fmt = 9997 )
'descriptor'
342 IF( lsame( uplo,
'U' ) )
THEN
345 noffd = numroc( n-1, nb, mycol, 0, npcol )
347 ndiag = iceil( realsz*ndiag, cplxsz )
348 noffd = iceil( realsz*noffd, cplxsz )
351 ipd = ipa + desca( lld_ )*nq + ipostpad + iprepad
352 ipe = ipd + ndiag + ipostpad + iprepad
353 ipt = ipe + noffd + ipostpad + iprepad
354 ipw = ipt + nq + ipostpad + iprepad
359 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
360 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
362 worktrd = lwmin + ipostpad
369 IF( nprow.NE.npcol )
THEN
370 lcm = ilcm( nprow, npcol )
371 itemp = nb*iceil( iceil( np, nb ), lcm / nprow ) +
374 itemp =
max( iceil( realsz*itemp, cplxsz ),
376 worksiz =
max( lwmin, itemp ) + ipostpad
382 IF( ipw+worksiz.GT.memsiz )
THEN
384 $
WRITE( nout, fmt = 9996 )
'Tridiagonal reduction',
385 $ ( ipw+worksiz )*cplxsz
391 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
393 IF( ierr( 1 ).GT.0 )
THEN
395 $
WRITE( nout, fmt = 9997 )
'MEMORY'
404 CALL pcmatgen( ictxt,
'Hemm',
'N', desca( m_ ),
405 $ desca( n_ ), desca( mb_ ), desca( nb_ ),
406 $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
407 $ desca( csrc_ ), iaseed, 0, np, 0, nq,
408 $ myrow, mycol, nprow, npcol )
414 CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
415 $ desca( lld_ ), iprepad, ipostpad,
417 CALL pcfillpad( ictxt, ndiag, 1, mem( ipd-iprepad ),
418 $ ndiag, iprepad, ipostpad, padval )
419 CALL pcfillpad( ictxt, noffd, 1, mem( ipe-iprepad ),
420 $ noffd, iprepad, ipostpad, padval )
421 CALL pcfillpad( ictxt, nq, 1, mem( ipt-iprepad ), nq,
422 $ iprepad, ipostpad, padval )
423 CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
424 $ mem( ipw-iprepad ), worksiz-ipostpad,
425 $ iprepad, ipostpad, padval )
426 anorm = pclanhe(
'I', uplo, n, mem( ipa ), 1, 1,
427 $ desca, mem( ipw ) )
428 CALL pcchekpad( ictxt,
'PCLANHE', np, nq,
429 $ mem( ipa-iprepad ), desca( lld_ ),
430 $ iprepad, ipostpad, padval )
431 CALL pcchekpad( ictxt,
'PCLANHE', worksiz-ipostpad, 1,
432 $ mem( ipw-iprepad ), worksiz-ipostpad,
433 $ iprepad, ipostpad, padval )
434 CALL pcfillpad( ictxt, worktrd-ipostpad, 1,
435 $ mem( ipw-iprepad ), worktrd-ipostpad,
436 $ iprepad, ipostpad, padval )
440 CALL blacs_barrier( ictxt,
'All' )
445 CALL pchettrd( uplo, n, mem( ipa ), 1, 1, desca,
446 $ mem( ipd ), mem( ipe ), mem( ipt ),
447 $ mem( ipw ), lwmin, info )
455 CALL pcchekpad( ictxt,
'PCHETTRD', np, nq,
456 $ mem( ipa-iprepad ), desca( lld_ ),
457 $ iprepad, ipostpad, padval )
458 CALL pcchekpad( ictxt,
'PCHETTRD', ndiag, 1,
459 $ mem( ipd-iprepad ), ndiag, iprepad,
462 CALL pcchekpad( ictxt,
'PCHETTRDc', noffd, 1,
463 $ mem( ipe-iprepad ), noffd, iprepad,
465 CALL pcchekpad( ictxt,
'PCHETTRDd', nq, 1,
466 $ mem( ipt-iprepad ), nq, iprepad,
468 CALL pcchekpad( ictxt,
'PCHETTRDe', worktrd-ipostpad,
469 $ 1, mem( ipw-iprepad ),
470 $ worktrd-ipostpad, iprepad, ipostpad,
472 CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
473 $ mem( ipw-iprepad ), worksiz-ipostpad,
474 $ iprepad, ipostpad, padval )
478 CALL pchetdrv( uplo, n, mem( ipa ), 1, 1, desca,
479 $ mem( ipd ), mem( ipe ), mem( ipt ),
480 $ mem( ipw ), ierr( 1 ) )
486 CALL pclatran( n, 1, mem( ipa ), 1, 1, desca,
488 CALL pclafchk(
'Hemm',
'No', n, n, mem( ipa ), 1, 1,
489 $ desca, iaseed, anorm, fresid,
494 CALL pcchekpad( ictxt,
'PCHETDRVf', np, nq,
495 $ mem( ipa-iprepad ), desca( lld_ ),
496 $ iprepad, ipostpad, padval )
497 CALL pcchekpad( ictxt,
'PCHETDRVg', ndiag, 1,
498 $ mem( ipd-iprepad ), ndiag, iprepad,
500 CALL pcchekpad( ictxt,
'PCHETDRVh', noffd, 1,
501 $ mem( ipe-iprepad ), noffd, iprepad,
503 CALL pcchekpad( ictxt,
'PCHETDRVi', worksiz-ipostpad,
504 $ 1, mem( ipw-iprepad ),
505 $ worksiz-ipostpad, iprepad, ipostpad,
510 IF( fresid.LE.thresh .AND. fresid-fresid.EQ.
511 $ 0.0e+0 .AND. ierr( 1 ).EQ.0 )
THEN
515 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
516 $
WRITE( nout, fmt = 9991 )fresid
524 IF( myrow.EQ.0 .AND. mycol.EQ.0 .AND. ierr( 1 ).NE.0 )
525 $
WRITE( nout, fmt = * )
'D or E copies incorrect ...'
531 fresid = fresid - fresid
537 CALL slcombine( ictxt,
'All',
'>',
'W', 50, 1, wtime )
538 CALL slcombine( ictxt,
'All',
'>',
'C', 50, 1, ctime )
542 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
547 nops = ( 16.0d+0 / 3.0d+0 )*nops**3
552 IF( wtime( 1 ).GT.0.0d+0 )
THEN
553 tmflops = nops / wtime( 1 )
557 IF( wtime( 1 ).GE.0.0d+0 )
558 $
WRITE( nout, fmt = 9992 )
'WALL', n, interleave,
559 $ twogemms, balanced, anb, pnb, nprow*npcol,
560 $ wtime( 1 ), tmflops, fresid, passed
564 IF( ctime( 1 ).GT.0.0d+0 )
THEN
565 tmflops = nops / ctime( 1 )
569 IF( ctime( 1 ).GE.0.0d+0 )
570 $
WRITE( nout, fmt = 9992 )
'CPU ', n, interleave,
571 $ twogemms, balanced, anb, pnb, nprow*npcol,
572 $ ctime( 1 ), tmflops, fresid, passed
578 IF( wtime( 13 )+wtime( 15 )+wtime( 16 ).GT.0.0d+0 .OR.
579 $ ctime( 13 )+ctime( 15 )+ctime( 16 ).GT.0.0d+0 )
583 IF( splitstimed.EQ.1 )
THEN
584 WRITE( nout, fmt = 9990 )wtime( 10 ), wtime( 11 ),
585 $ wtime( 12 ), wtime( 13 ), wtime( 14 ),
587 WRITE( nout, fmt = 9989 )wtime( 16 ), wtime( 17 ),
588 $ wtime( 18 ), wtime( 19 ), wtime( 20 ),
591 WRITE( nout, fmt = 9988 )ctime( 10 ), ctime( 11 ),
592 $ ctime( 12 ), ctime( 13 ), ctime( 14 ),
594 WRITE( nout, fmt = 9987 )ctime( 16 ), ctime( 17 ),
595 $ ctime( 18 ), ctime( 19 ), ctime( 20 ),
597 WRITE( nout, fmt = 9986 )n, nprow*npcol, pnb, anb,
598 $ interleave, balanced, twogemms, timeinternals
604 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
605 IF( splitstimed.EQ.1 )
THEN
606 WRITE( nout, fmt = 9985 )
607 WRITE( nout, fmt = 9984 )
608 WRITE( nout, fmt = 9983 )
609 WRITE( nout, fmt = 9982 )
610 WRITE( nout, fmt = 9981 )
611 WRITE( nout, fmt = 9980 )
612 WRITE( nout, fmt = 9979 )
613 WRITE( nout, fmt = 9978 )
614 WRITE( nout, fmt = 9977 )
615 WRITE( nout, fmt = 9976 )
616 WRITE( nout, fmt = 9975 )
617 WRITE( nout, fmt = 9974 )
618 WRITE( nout, fmt = 9973 )
623 CALL blacs_gridexit( ictxt )
627 9999
FORMAT(
'ILLEGAL ', a6,
': ', a5,
' = ', i3,
628 $
'; It should be at least 1' )
629 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
631 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
632 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
635 9995
FORMAT(
'PCHETTRD, tailored reduction to tridiagonal form, test.'
637 9994
FORMAT(
'TIME N int 2gm bal anb pnb prcs TRD Time ',
638 $
' MFLOPS Residual CHECK' )
639 9993
FORMAT(
'---- ---- --- --- --- --- --- ---- -------- ',
640 $
'----------- -------- ------' )
641 9992
FORMAT( a4, 1x, i5, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x,
642 $ i5, 1x, f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
643 9991
FORMAT(
'||A - Q*T*Q''|| / (||A|| * N * eps) = ', g25.7 )
644 9990
FORMAT(
'wsplit1=[wsplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
645 $ 1x, f9.2, 1x, f9.2,
' ];' )
646 9989
FORMAT(
'wsplit2=[wsplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
647 $ 1x, f9.2, 1x, f9.2,
' ];' )
648 9988
FORMAT(
'csplit1=[csplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
649 $ 1x, f9.2, 1x, f9.2,
' ];' )
650 9987
FORMAT(
'csplit2=[csplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
651 $ 1x, f9.2, 1x, f9.2,
' ];' )
652 9986
FORMAT(
'size_opts=[size_opts;', i4, 1x, i4, 1x, i4, 1x, i4, 1x,
653 $ i4, 1x, i4, 1x, i4, 1x, i4, 1x,
' ];' )
654 9985
FORMAT(
'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;',
655 $
' TWOGEMMS=7; TIMEINTERNALS=8;' )
656 9984
FORMAT(
'S1_OVERHEAD = 1; % Should be mainly cost of barrier' )
657 9983
FORMAT(
'S1_BARRIER = 2; % Cost of barrier' )
658 9982
FORMAT(
'S1_UPDCURCOL = 3; % Update the current column' )
659 9981
FORMAT(
'S1_HOUSE = 4; % Compute the householder vector' )
660 9980
FORMAT(
'S1_SPREAD = 5; % Spread across' )
661 9979
FORMAT(
'S1_TRANSPOSE = 6; % Transpose' )
662 9978
FORMAT(
'S2_UPDCURBLK = 1; % Update the current block column' )
663 9977
FORMAT(
'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' )
664 9976
FORMAT(
'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' )
665 9975
FORMAT(
'S2_TRANS_SUM = 4; % v = v + vt'' ' )
666 9974
FORMAT(
'S2_DOT = 5; % c = v'' * h ' )
667 9973
FORMAT(
'S2_R2K = 6; % A = A - v * h'' - h * v'' ' )