ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdhrddriver.f
Go to the documentation of this file.
1  PROGRAM pdhrddriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * March 13, 2000
7 *
8 * Purpose
9 * =======
10 *
11 * PDHRDDRIVER is the main test program for the DOUBLE PRECISION
12 * ScaLAPACK HRD (Hessenberg Reduction) routines.
13 *
14 * The program must be driven by a short data file. An annotated
15 * example of a data file can be obtained by deleting the first 3
16 * characters from the following 14 lines:
17 * 'ScaLAPACK HRD input file'
18 * 'PVM machine'
19 * 'HRD.out' output file name (if any)
20 * 6 device out
21 * 2 number of problems sizes
22 * 100 101 values of N
23 * 2 1 values of ILO
24 * 99 101 values of IHI
25 * 3 number of NB's
26 * 2 3 5 values of NB
27 * 7 number of process grids (ordered pairs of P & Q)
28 * 1 2 1 4 2 3 8 values of P
29 * 1 2 4 1 3 2 1 values of Q
30 * 3.0 threshold
31 *
32 * Internal Parameters
33 * ===================
34 *
35 * TOTMEM INTEGER, default = 2000000
36 * TOTMEM is a machine-specific parameter indicating the
37 * maximum amount of available memory in bytes.
38 * The user should customize TOTMEM to his platform. Remember
39 * to leave room in memory for the operating system, the BLACS
40 * buffer, etc. For example, on a system with 8 MB of memory
41 * per process (e.g., one processor on an Intel iPSC/860), the
42 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
43 * code, BLACS buffer, etc). However, for PVM, we usually set
44 * TOTMEM = 2000000. Some experimenting with the maximum value
45 * of TOTMEM may be required.
46 *
47 * INTGSZ INTEGER, default = 4 bytes.
48 * DBLESZ INTEGER, default = 8 bytes.
49 * INTGSZ and DBLESZ indicate the length in bytes on the
50 * given platform for an integer and a double precision real.
51 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
52 *
53 * All arrays used by SCALAPACK routines are allocated from
54 * this array and referenced by pointers. The integer IPA,
55 * for example, is a pointer to the starting element of MEM for
56 * the matrix A.
57 *
58 * =====================================================================
59 *
60 * .. Parameters ..
61  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
62  $ lld_, mb_, m_, nb_, n_, rsrc_
63  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
64  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
65  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
66  INTEGER dblesz, memsiz, ntests, totmem
67  DOUBLE PRECISION padval
68  parameter( dblesz = 8, totmem = 2000000,
69  $ memsiz = totmem / dblesz, ntests = 20,
70  $ padval = -9923.0d+0 )
71 * ..
72 * .. Local Scalars ..
73  LOGICAL check
74  CHARACTER*6 passed
75  CHARACTER*80 outfile
76  INTEGER i, iam, iaseed, ictxt, ihi, ihip, ihlp, ihlq,
77  $ ilcol, ilo, ilrow, info, inlq, imidpad, ipa,
78  $ ipt, ipw, ipostpad, iprepad, itemp, j, k,
79  $ kfail, kpass, kskip, ktests, lcm, lcmq, loff,
80  $ lwork, mycol, myrow, n, nb, ngrids, nmat, nnb,
81  $ nprocs, nout, np, npcol, nprow, nq, workhrd,
82  $ worksiz
83  REAL thresh
84  DOUBLE PRECISION anorm, fresid, nops, tmflops
85 * ..
86 * .. Local Arrays ..
87  INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
88  $ nval( ntests ), nvhi( ntests ), nvlo( ntests ),
89  $ pval( ntests ), qval( ntests )
90  DOUBLE PRECISION ctime( 1 ), mem( memsiz ), wtime( 1 )
91 * ..
92 * .. External Subroutines ..
93  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
94  $ blacs_gridexit, blacs_gridinit, blacs_gridinfo,
95  $ descinit, igsum2d, blacs_pinfo, pdfillpad,
99 * ..
100 * .. External Functions ..
101  INTEGER ilcm, indxg2p, numroc
102  DOUBLE PRECISION pdlange
103  EXTERNAL ilcm, indxg2p, numroc, pdlange
104 * ..
105 * .. Intrinsic Functions ..
106  INTRINSIC dble, max
107 * ..
108 * .. Data statements ..
109  DATA ktests, kpass, kfail, kskip / 4*0 /
110 * ..
111 * .. Executable Statements ..
112 *
113 * Get starting information
114 *
115  CALL blacs_pinfo( iam, nprocs )
116  iaseed = 100
117  CALL pdhrdinfo( outfile, nout, nmat, nval, nvlo, nvhi, ntests,
118  $ nnb, nbval, ntests, ngrids, pval, ntests, qval,
119  $ ntests, thresh, mem, iam, nprocs )
120  check = ( thresh.GE.0.0e+0 )
121 *
122 * Print headings
123 *
124  IF( iam.EQ.0 ) THEN
125  WRITE( nout, fmt = * )
126  WRITE( nout, fmt = 9995 )
127  WRITE( nout, fmt = 9994 )
128  WRITE( nout, fmt = * )
129  END IF
130 *
131 * Loop over different process grids
132 *
133  DO 30 i = 1, ngrids
134 *
135  nprow = pval( i )
136  npcol = qval( i )
137 *
138 * Make sure grid information is correct
139 *
140  ierr( 1 ) = 0
141  IF( nprow.LT.1 ) THEN
142  IF( iam.EQ.0 )
143  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
144  ierr( 1 ) = 1
145  ELSE IF( npcol.LT.1 ) THEN
146  IF( iam.EQ.0 )
147  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
148  ierr( 1 ) = 1
149  ELSE IF( nprow*npcol.GT.nprocs ) THEN
150  IF( iam.EQ.0 )
151  $ WRITE( nout, fmt = 9998 )nprow*npcol, nprocs
152  ierr( 1 ) = 1
153  END IF
154 *
155  IF( ierr( 1 ).GT.0 ) THEN
156  IF( iam.EQ.0 )
157  $ WRITE( nout, fmt = 9997 ) 'grid'
158  kskip = kskip + 1
159  GO TO 30
160  END IF
161 *
162 * Define process grid
163 *
164  CALL blacs_get( -1, 0, ictxt )
165  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
166  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
167 *
168 * Go to bottom of loop if this case doesn't use my process
169 *
170  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
171  $ GOTO 30
172 *
173  DO 20 j = 1, nmat
174 *
175  n = nval( j )
176  ilo = nvlo( j )
177  ihi = nvhi( j )
178 *
179 * Make sure matrix information is correct
180 *
181  ierr( 1 ) = 0
182  IF( n.LT.1 ) THEN
183  IF( iam.EQ.0 )
184  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
185  ierr( 1 ) = 1
186  END IF
187 *
188 * Check all processes for an error
189 *
190  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
191 *
192  IF( ierr( 1 ).GT.0 ) THEN
193  IF( iam.EQ.0 )
194  $ WRITE( nout, fmt = 9997 ) 'matrix'
195  kskip = kskip + 1
196  GO TO 20
197  END IF
198 *
199  DO 10 k = 1, nnb
200  nb = nbval( k )
201 *
202 * Make sure nb is legal
203 *
204  ierr( 1 ) = 0
205  IF( nb.LT.1 ) THEN
206  ierr( 1 ) = 1
207  IF( iam.EQ.0 )
208  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
209  END IF
210 *
211 * Check all processes for an error
212 *
213  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
214 *
215  IF( ierr( 1 ).GT.0 ) THEN
216  IF( iam.EQ.0 )
217  $ WRITE( nout, fmt = 9997 ) 'NB'
218  kskip = kskip + 1
219  GO TO 10
220  END IF
221 *
222  np = numroc( n, nb, myrow, 0, nprow )
223  nq = numroc( n, nb, mycol, 0, npcol )
224  IF( check ) THEN
225  iprepad = max( nb, np )
226  imidpad = nb
227  ipostpad = max( nb, nq )
228  ELSE
229  iprepad = 0
230  imidpad = 0
231  ipostpad = 0
232  END IF
233 *
234 * Initialize the array descriptor for the matrix A
235 *
236  CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
237  $ max( 1, np ) + imidpad, info )
238 *
239 * Check all processes for an error
240 *
241  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
242 *
243  IF( ierr( 1 ).LT.0 ) THEN
244  IF( iam.EQ.0 )
245  $ WRITE( nout, fmt = 9997 ) 'descriptor'
246  kskip = kskip + 1
247  GO TO 10
248  END IF
249 *
250 * Assign pointers into MEM for SCALAPACK arrays, A is
251 * allocated starting at position MEM( IPREPAD+1 )
252 *
253  ipa = iprepad + 1
254  ipt = ipa + desca( lld_ )*nq + ipostpad + iprepad
255  ipw = ipt + nq + ipostpad + iprepad
256 *
257 * Calculate the amount of workspace required for the
258 * reduction
259 *
260  ihip = numroc( ihi, nb, myrow, desca( rsrc_ ), nprow )
261  loff = mod( ilo-1, nb )
262  ilrow = indxg2p( ilo, nb, myrow, desca( rsrc_ ), nprow )
263  ilcol = indxg2p( ilo, nb, mycol, desca( csrc_ ), npcol )
264  ihlp = numroc( ihi-ilo+loff+1, nb, myrow, ilrow, nprow )
265  inlq = numroc( n-ilo+loff+1, nb, mycol, ilcol, npcol )
266  lwork = nb*( nb + max( ihip+1, ihlp+inlq ) )
267  workhrd = lwork + ipostpad
268  worksiz = workhrd
269 *
270 * Figure the amount of workspace required by the check
271 *
272  IF( check ) THEN
273  lcm = ilcm( nprow, npcol )
274  lcmq = lcm / npcol
275  ihlq = numroc( ihi-ilo+loff+1, nb, mycol, ilcol,
276  $ npcol )
277  itemp = nb*max( ihlp+inlq, ihlq+max( ihip,
278  $ ihlp+numroc( numroc( ihi-ilo+loff+1, nb, 0, 0,
279  $ npcol ), nb, 0, 0, lcmq ) ) )
280  worksiz = max( nb*nb + nb*ihlp + itemp, nb * np ) +
281  $ ipostpad
282  END IF
283 *
284 * Check for adequate memory for problem size
285 *
286  ierr( 1 ) = 0
287  IF( ipw+worksiz.GT.memsiz ) THEN
288  IF( iam.EQ.0 )
289  $ WRITE( nout, fmt = 9996 ) 'Hessenberg reduction',
290  $ ( ipw+worksiz )*dblesz
291  ierr( 1 ) = 1
292  END IF
293 *
294 * Check all processes for an error
295 *
296  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
297 *
298  IF( ierr( 1 ).GT.0 ) THEN
299  IF( iam.EQ.0 )
300  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
301  kskip = kskip + 1
302  GO TO 10
303  END IF
304 *
305 * Generate A
306 *
307  CALL pdmatgen( ictxt, 'No', 'No', desca( m_ ),
308  $ desca( n_ ), desca( mb_ ), desca( nb_ ),
309  $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
310  $ desca( csrc_ ),
311  $ iaseed, 0, np, 0, nq, myrow, mycol,
312  $ nprow, npcol )
313 *
314 * Need Infinity-norm of A for checking
315 *
316  IF( check ) THEN
317  CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
318  $ desca( lld_ ), iprepad, ipostpad,
319  $ padval )
320  CALL pdfillpad( ictxt, nq, 1, mem( ipt-iprepad ),
321  $ nq, iprepad, ipostpad, padval )
322  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
323  $ mem( ipw-iprepad ), worksiz-ipostpad,
324  $ iprepad, ipostpad, padval )
325  anorm = pdlange( 'I', n, n, mem( ipa ), 1, 1, desca,
326  $ mem( ipw ) )
327  CALL pdchekpad( ictxt, 'PDLANGE', np, nq,
328  $ mem( ipa-iprepad ), desca( lld_ ),
329  $ iprepad, ipostpad, padval )
330  CALL pdchekpad( ictxt, 'PDLANGE',
331  $ worksiz-ipostpad, 1,
332  $ mem( ipw-iprepad ), worksiz-ipostpad,
333  $ iprepad, ipostpad, padval )
334  CALL pdfillpad( ictxt, workhrd-ipostpad, 1,
335  $ mem( ipw-iprepad ), workhrd-ipostpad,
336  $ iprepad, ipostpad, padval )
337  END IF
338 *
339  CALL slboot()
340  CALL blacs_barrier( ictxt, 'All' )
341  CALL sltimer( 1 )
342 *
343 * Reduce Hessenberg form
344 *
345  CALL pdgehrd( n, ilo, ihi, mem( ipa ), 1, 1, desca,
346  $ mem( ipt ), mem( ipw ), lwork, info )
347  CALL sltimer( 1 )
348 *
349  IF( check ) THEN
350 *
351 * Check for memory overwrite
352 *
353  CALL pdchekpad( ictxt, 'PDGEHRD', np, nq,
354  $ mem( ipa-iprepad ), desca( lld_ ),
355  $ iprepad, ipostpad, padval )
356  CALL pdchekpad( ictxt, 'PDGEHRD', nq, 1,
357  $ mem( ipt-iprepad ), nq, iprepad,
358  $ ipostpad, padval )
359  CALL pdchekpad( ictxt, 'PDGEHRD', workhrd-ipostpad,
360  $ 1, mem( ipw-iprepad ),
361  $ workhrd-ipostpad, iprepad,
362  $ ipostpad, padval )
363  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
364  $ mem( ipw-iprepad ), worksiz-ipostpad,
365  $ iprepad, ipostpad, padval )
366 *
367 * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps)
368 *
369  CALL pdgehdrv( n, ilo, ihi, mem( ipa ), 1, 1, desca,
370  $ mem( ipt ), mem( ipw ) )
371  CALL pdlafchk( 'No', 'No', n, n, mem( ipa ), 1, 1,
372  $ desca, iaseed, anorm, fresid,
373  $ mem( ipw ) )
374 *
375 * Check for memory overwrite
376 *
377  CALL pdchekpad( ictxt, 'PDGEHDRV', np, nq,
378  $ mem( ipa-iprepad ), desca( lld_ ),
379  $ iprepad, ipostpad, padval )
380  CALL pdchekpad( ictxt, 'PDGEHDRV', nq, 1,
381  $ mem( ipt-iprepad ), nq, iprepad,
382  $ ipostpad, padval )
383  CALL pdchekpad( ictxt, 'PDGEHDRV',
384  $ worksiz-ipostpad, 1,
385  $ mem( ipw-iprepad ), worksiz-ipostpad,
386  $ iprepad, ipostpad, padval )
387 *
388 * Test residual and detect NaN result
389 *
390  IF( fresid.LE.thresh .AND. fresid-fresid.EQ.0.0d+0 )
391  $ THEN
392  kpass = kpass + 1
393  passed = 'PASSED'
394  ELSE
395  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
396  $ WRITE( nout, fmt = 9986 ) fresid
397  kfail = kfail + 1
398  passed = 'FAILED'
399  END IF
400  ELSE
401 *
402 * Don't perform the checking, only the timing operation
403 *
404  kpass = kpass + 1
405  fresid = fresid - fresid
406  passed = 'BYPASS'
407  END IF
408 *
409 * Gather max. of all CPU and WALL clock timings
410 *
411  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
412  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
413 *
414 * Print results
415 *
416  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
417 *
418 * HRD requires 10/3 * N^3 floating point ops. (flops)
419 * more precisely,
420 * HRD requires 4/3*(IHI-ILO)^3 + 2*IHI*(IHI-ILO)^2 flops
421 *
422  nops = dble( ihi-ilo )
423  nops = nops * nops *
424  $ ( 2.0d0*dble( ihi ) + (4.0d0/3.0d0)*nops )
425  nops = nops / 1.0d+6
426 *
427 * Print WALL time
428 *
429  IF( wtime( 1 ).GT.0.0d+0 ) THEN
430  tmflops = nops / wtime( 1 )
431  ELSE
432  tmflops = 0.0d+0
433  END IF
434  IF( wtime( 1 ).GE.0.0d+0 )
435  $ WRITE( nout, fmt = 9993 ) 'WALL', n, ilo, ihi, nb,
436  $ nprow, npcol, wtime( 1 ), tmflops, fresid,
437  $ passed
438 *
439 * Print CPU time
440 *
441  IF( ctime( 1 ).GT.0.0d+0 ) THEN
442  tmflops = nops / ctime( 1 )
443  ELSE
444  tmflops = 0.0d+0
445  END IF
446  IF( ctime( 1 ).GE.0.0d+0 )
447  $ WRITE( nout, fmt = 9993 ) 'CPU ', n, ilo, ihi, nb,
448  $ nprow, npcol, ctime( 1 ), tmflops, fresid,
449  $ passed
450  END IF
451  10 CONTINUE
452  20 CONTINUE
453 *
454  CALL blacs_gridexit( ictxt )
455  30 CONTINUE
456 *
457 * Print ending messages and close output file
458 *
459  IF( iam.EQ.0 ) THEN
460  ktests = kpass + kfail + kskip
461  WRITE( nout, fmt = * )
462  WRITE( nout, fmt = 9992 ) ktests
463  IF( check ) THEN
464  WRITE( nout, fmt = 9991 ) kpass
465  WRITE( nout, fmt = 9989 ) kfail
466  ELSE
467  WRITE( nout, fmt = 9990 ) kpass
468  END IF
469  WRITE( nout, fmt = 9988 ) kskip
470  WRITE( nout, fmt = * )
471  WRITE( nout, fmt = * )
472  WRITE( nout, fmt = 9987 )
473  IF( nout.NE.6 .AND. nout.NE.0 )
474  $ CLOSE( nout )
475  END IF
476 *
477  CALL blacs_exit( 0 )
478 *
479  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
480  $ '; It should be at least 1' )
481  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
482  $ i4 )
483  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
484  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
485  $ i11 )
486  9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ',
487  $ ' MFLOPS Residual CHECK' )
488  9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ',
489  $ '----------- -------- ------' )
490  9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
491  $ f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
492  9992 FORMAT( 'Finished', i4, ' tests, with the following results:' )
493  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
494  9990 FORMAT( i5, ' tests completed without checking.' )
495  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
496  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
497  9987 FORMAT( 'END OF TESTS.' )
498  9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', g25.7 )
499 *
500  stop
501 *
502 * End of PDHRDDRIVER
503 *
504  END
indxg2p
integer function indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
Definition: indxg2p.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pdhrdinfo
subroutine pdhrdinfo(SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pdhrdinfo.f:5
pdgehrd
subroutine pdgehrd(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgehrd.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
pdlafchk
subroutine pdlafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pdlafchk.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
pdgehdrv
subroutine pdgehdrv(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK)
Definition: pdgehdrv.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pdhrddriver
program pdhrddriver
Definition: pdhrddriver.f:1