ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pchrddriver.f
Go to the documentation of this file.
1  PROGRAM pchrddriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * March 13, 2000
7 *
8 * Purpose
9 * =======
10 *
11 * PCHRDDRIVER is the main test program for the COMPLEX
12 * ScaLAPACK HRD (Hessenberg Reduction) routines.
13 *
14 * The program must be driven by a short data file. An annotated
15 * example of a data file can be obtained by deleting the first 3
16 * characters from the following 14 lines:
17 * 'ScaLAPACK HRD input file'
18 * 'PVM machine'
19 * 'HRD.out' output file name (if any)
20 * 6 device out
21 * 2 number of problems sizes
22 * 100 101 values of N
23 * 2 1 values of ILO
24 * 99 101 values of IHI
25 * 3 number of NB's
26 * 2 3 5 values of NB
27 * 7 number of process grids (ordered pairs of P & Q)
28 * 1 2 1 4 2 3 8 values of P
29 * 1 2 4 1 3 2 1 values of Q
30 * 3.0 threshold
31 *
32 * Internal Parameters
33 * ===================
34 *
35 * TOTMEM INTEGER, default = 2000000
36 * TOTMEM is a machine-specific parameter indicating the
37 * maximum amount of available memory in bytes.
38 * The user should customize TOTMEM to his platform. Remember
39 * to leave room in memory for the operating system, the BLACS
40 * buffer, etc. For example, on a system with 8 MB of memory
41 * per process (e.g., one processor on an Intel iPSC/860), the
42 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
43 * code, BLACS buffer, etc). However, for PVM, we usually set
44 * TOTMEM = 2000000. Some experimenting with the maximum value
45 * of TOTMEM may be required.
46 *
47 * INTGSZ INTEGER, default = 4 bytes.
48 * CPLXSZ INTEGER, default = 8 bytes.
49 * INTGSZ and CPLXSZ indicate the length in bytes on the
50 * given platform for an integer and a single precision
51 * complex.
52 * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
53 *
54 * All arrays used by SCALAPACK routines are allocated from
55 * this array and referenced by pointers. The integer IPA,
56 * for example, is a pointer to the starting element of MEM for
57 * the matrix A.
58 *
59 * =====================================================================
60 *
61 * .. Parameters ..
62  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
63  $ lld_, mb_, m_, nb_, n_, rsrc_
64  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
65  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
66  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
67  INTEGER cplxsz, memsiz, ntests, totmem
68  COMPLEX padval
69  parameter( cplxsz = 8, totmem = 2000000,
70  $ memsiz = totmem / cplxsz, ntests = 20,
71  $ padval = ( -9923.0e+0, -9923.0e+0 ) )
72 * ..
73 * .. Local Scalars ..
74  LOGICAL check
75  CHARACTER*6 passed
76  CHARACTER*80 outfile
77  INTEGER i, iam, iaseed, ictxt, ihi, ihip, ihlp, ihlq,
78  $ ilcol, ilo, ilrow, info, inlq, imidpad, ipa,
79  $ ipt, ipw, ipostpad, iprepad, itemp, j, k,
80  $ kfail, kpass, kskip, ktests, lcm, lcmq, loff,
81  $ lwork, mycol, myrow, n, nb, ngrids, nmat, nnb,
82  $ nprocs, nout, np, npcol, nprow, nq, workhrd,
83  $ worksiz
84  REAL anorm, fresid, thresh
85  DOUBLE PRECISION nops, tmflops
86 * ..
87 * .. Local Arrays ..
88  INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
89  $ nval( ntests ), nvhi( ntests ), nvlo( ntests ),
90  $ pval( ntests ), qval( ntests )
91  DOUBLE PRECISION ctime( 1 ), wtime( 1 )
92  COMPLEX mem( memsiz )
93 * ..
94 * .. External Subroutines ..
95  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
96  $ blacs_gridexit, blacs_gridinit, blacs_gridinfo,
97  $ descinit, igsum2d, blacs_pinfo, pcfillpad,
100  $ slcombine, sltimer
101 * ..
102 * .. External Functions ..
103  INTEGER ilcm, indxg2p, numroc
104  REAL pclange
105  EXTERNAL ilcm, indxg2p, numroc, pclange
106 * ..
107 * .. Intrinsic Functions ..
108  INTRINSIC dble, max
109 * ..
110 * .. Data statements ..
111  DATA ktests, kpass, kfail, kskip / 4*0 /
112 * ..
113 * .. Executable Statements ..
114 *
115 * Get starting information
116 *
117  CALL blacs_pinfo( iam, nprocs )
118  iaseed = 100
119  CALL pchrdinfo( outfile, nout, nmat, nval, nvlo, nvhi, ntests,
120  $ nnb, nbval, ntests, ngrids, pval, ntests, qval,
121  $ ntests, thresh, mem, iam, nprocs )
122  check = ( thresh.GE.0.0e+0 )
123 *
124 * Print headings
125 *
126  IF( iam.EQ.0 ) THEN
127  WRITE( nout, fmt = * )
128  WRITE( nout, fmt = 9995 )
129  WRITE( nout, fmt = 9994 )
130  WRITE( nout, fmt = * )
131  END IF
132 *
133 * Loop over different process grids
134 *
135  DO 30 i = 1, ngrids
136 *
137  nprow = pval( i )
138  npcol = qval( i )
139 *
140 * Make sure grid information is correct
141 *
142  ierr( 1 ) = 0
143  IF( nprow.LT.1 ) THEN
144  IF( iam.EQ.0 )
145  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
146  ierr( 1 ) = 1
147  ELSE IF( npcol.LT.1 ) THEN
148  IF( iam.EQ.0 )
149  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
150  ierr( 1 ) = 1
151  ELSE IF( nprow*npcol.GT.nprocs ) THEN
152  IF( iam.EQ.0 )
153  $ WRITE( nout, fmt = 9998 )nprow*npcol, nprocs
154  ierr( 1 ) = 1
155  END IF
156 *
157  IF( ierr( 1 ).GT.0 ) THEN
158  IF( iam.EQ.0 )
159  $ WRITE( nout, fmt = 9997 ) 'grid'
160  kskip = kskip + 1
161  GO TO 30
162  END IF
163 *
164 * Define process grid
165 *
166  CALL blacs_get( -1, 0, ictxt )
167  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
168  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
169 *
170 * Go to bottom of loop if this case doesn't use my process
171 *
172  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
173  $ GOTO 30
174 *
175  DO 20 j = 1, nmat
176 *
177  n = nval( j )
178  ilo = nvlo( j )
179  ihi = nvhi( j )
180 *
181 * Make sure matrix information is correct
182 *
183  ierr( 1 ) = 0
184  IF( n.LT.1 ) THEN
185  IF( iam.EQ.0 )
186  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
187  ierr( 1 ) = 1
188  END IF
189 *
190 * Check all processes for an error
191 *
192  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
193 *
194  IF( ierr( 1 ).GT.0 ) THEN
195  IF( iam.EQ.0 )
196  $ WRITE( nout, fmt = 9997 ) 'matrix'
197  kskip = kskip + 1
198  GO TO 20
199  END IF
200 *
201  DO 10 k = 1, nnb
202  nb = nbval( k )
203 *
204 * Make sure nb is legal
205 *
206  ierr( 1 ) = 0
207  IF( nb.LT.1 ) THEN
208  ierr( 1 ) = 1
209  IF( iam.EQ.0 )
210  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
211  END IF
212 *
213 * Check all processes for an error
214 *
215  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
216 *
217  IF( ierr( 1 ).GT.0 ) THEN
218  IF( iam.EQ.0 )
219  $ WRITE( nout, fmt = 9997 ) 'NB'
220  kskip = kskip + 1
221  GO TO 10
222  END IF
223 *
224  np = numroc( n, nb, myrow, 0, nprow )
225  nq = numroc( n, nb, mycol, 0, npcol )
226  IF( check ) THEN
227  iprepad = max( nb, np )
228  imidpad = nb
229  ipostpad = max( nb, nq )
230  ELSE
231  iprepad = 0
232  imidpad = 0
233  ipostpad = 0
234  END IF
235 *
236 * Initialize the array descriptor for the matrix A
237 *
238  CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
239  $ max( 1, np ) + imidpad, info )
240 *
241 * Check all processes for an error
242 *
243  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
244 *
245  IF( ierr( 1 ).LT.0 ) THEN
246  IF( iam.EQ.0 )
247  $ WRITE( nout, fmt = 9997 ) 'descriptor'
248  kskip = kskip + 1
249  GO TO 10
250  END IF
251 *
252 * Assign pointers into MEM for SCALAPACK arrays, A is
253 * allocated starting at position MEM( IPREPAD+1 )
254 *
255  ipa = iprepad + 1
256  ipt = ipa + desca( lld_ )*nq + ipostpad + iprepad
257  ipw = ipt + nq + ipostpad + iprepad
258 *
259 * Calculate the amount of workspace required for the
260 * reduction
261 *
262  ihip = numroc( ihi, nb, myrow, desca( rsrc_ ), nprow )
263  loff = mod( ilo-1, nb )
264  ilrow = indxg2p( ilo, nb, myrow, desca( rsrc_ ), nprow )
265  ilcol = indxg2p( ilo, nb, mycol, desca( csrc_ ), npcol )
266  ihlp = numroc( ihi-ilo+loff+1, nb, myrow, ilrow, nprow )
267  inlq = numroc( n-ilo+loff+1, nb, mycol, ilcol, npcol )
268  lwork = nb*( nb + max( ihip+1, ihlp+inlq ) )
269  workhrd = lwork + ipostpad
270  worksiz = workhrd
271 *
272 * Figure the amount of workspace required by the check
273 *
274  IF( check ) THEN
275  lcm = ilcm( nprow, npcol )
276  lcmq = lcm / npcol
277  ihlq = numroc( ihi-ilo+loff+1, nb, mycol, ilcol,
278  $ npcol )
279  itemp = nb*max( ihlp+inlq, ihlq+max( ihip,
280  $ ihlp+numroc( numroc( ihi-ilo+loff+1, nb, 0, 0,
281  $ npcol ), nb, 0, 0, lcmq ) ) )
282  worksiz = max( nb*nb + nb*ihlp + itemp, nb * np ) +
283  $ ipostpad
284  END IF
285 *
286 * Check for adequate memory for problem size
287 *
288  ierr( 1 ) = 0
289  IF( ipw+worksiz.GT.memsiz ) THEN
290  IF( iam.EQ.0 )
291  $ WRITE( nout, fmt = 9996 ) 'Hessenberg reduction',
292  $ ( ipw+worksiz )*cplxsz
293  ierr( 1 ) = 1
294  END IF
295 *
296 * Check all processes for an error
297 *
298  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
299 *
300  IF( ierr( 1 ).GT.0 ) THEN
301  IF( iam.EQ.0 )
302  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
303  kskip = kskip + 1
304  GO TO 10
305  END IF
306 *
307 * Generate A
308 *
309  CALL pcmatgen( ictxt, 'No', 'No', desca( m_ ),
310  $ desca( n_ ), desca( mb_ ), desca( nb_ ),
311  $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
312  $ desca( csrc_ ),
313  $ iaseed, 0, np, 0, nq, myrow, mycol,
314  $ nprow, npcol )
315 *
316 * Need Infinity-norm of A for checking
317 *
318  IF( check ) THEN
319  CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
320  $ desca( lld_ ), iprepad, ipostpad,
321  $ padval )
322  CALL pcfillpad( ictxt, nq, 1, mem( ipt-iprepad ),
323  $ nq, iprepad, ipostpad, padval )
324  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
325  $ mem( ipw-iprepad ), worksiz-ipostpad,
326  $ iprepad, ipostpad, padval )
327  anorm = pclange( 'I', n, n, mem( ipa ), 1, 1, desca,
328  $ mem( ipw ) )
329  CALL pcchekpad( ictxt, 'PCLANGE', np, nq,
330  $ mem( ipa-iprepad ), desca( lld_ ),
331  $ iprepad, ipostpad, padval )
332  CALL pcchekpad( ictxt, 'PCLANGE',
333  $ worksiz-ipostpad, 1,
334  $ mem( ipw-iprepad ), worksiz-ipostpad,
335  $ iprepad, ipostpad, padval )
336  CALL pcfillpad( ictxt, workhrd-ipostpad, 1,
337  $ mem( ipw-iprepad ), workhrd-ipostpad,
338  $ iprepad, ipostpad, padval )
339  END IF
340 *
341  CALL slboot()
342  CALL blacs_barrier( ictxt, 'All' )
343  CALL sltimer( 1 )
344 *
345 * Reduce Hessenberg form
346 *
347  CALL pcgehrd( n, ilo, ihi, mem( ipa ), 1, 1, desca,
348  $ mem( ipt ), mem( ipw ), lwork, info )
349  CALL sltimer( 1 )
350 *
351  IF( check ) THEN
352 *
353 * Check for memory overwrite
354 *
355  CALL pcchekpad( ictxt, 'PCGEHRD', np, nq,
356  $ mem( ipa-iprepad ), desca( lld_ ),
357  $ iprepad, ipostpad, padval )
358  CALL pcchekpad( ictxt, 'PCGEHRD', nq, 1,
359  $ mem( ipt-iprepad ), nq, iprepad,
360  $ ipostpad, padval )
361  CALL pcchekpad( ictxt, 'PCGEHRD', workhrd-ipostpad,
362  $ 1, mem( ipw-iprepad ),
363  $ workhrd-ipostpad, iprepad,
364  $ ipostpad, padval )
365  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
366  $ mem( ipw-iprepad ), worksiz-ipostpad,
367  $ iprepad, ipostpad, padval )
368 *
369 * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps)
370 *
371  CALL pcgehdrv( n, ilo, ihi, mem( ipa ), 1, 1, desca,
372  $ mem( ipt ), mem( ipw ) )
373  CALL pclafchk( 'No', 'No', n, n, mem( ipa ), 1, 1,
374  $ desca, iaseed, anorm, fresid,
375  $ mem( ipw ) )
376 *
377 * Check for memory overwrite
378 *
379  CALL pcchekpad( ictxt, 'PCGEHDRV', np, nq,
380  $ mem( ipa-iprepad ), desca( lld_ ),
381  $ iprepad, ipostpad, padval )
382  CALL pcchekpad( ictxt, 'PCGEHDRV', nq, 1,
383  $ mem( ipt-iprepad ), nq, iprepad,
384  $ ipostpad, padval )
385  CALL pcchekpad( ictxt, 'PCGEHDRV',
386  $ worksiz-ipostpad, 1,
387  $ mem( ipw-iprepad ), worksiz-ipostpad,
388  $ iprepad, ipostpad, padval )
389 *
390 * Test residual and detect NaN result
391 *
392  IF( fresid.LE.thresh .AND. fresid-fresid.EQ.0.0e+0 )
393  $ THEN
394  kpass = kpass + 1
395  passed = 'PASSED'
396  ELSE
397  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
398  $ WRITE( nout, fmt = 9986 ) fresid
399  kfail = kfail + 1
400  passed = 'FAILED'
401  END IF
402  ELSE
403 *
404 * Don't perform the checking, only the timing operation
405 *
406  kpass = kpass + 1
407  fresid = fresid - fresid
408  passed = 'BYPASS'
409  END IF
410 *
411 * Gather max. of all CPU and WALL clock timings
412 *
413  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
414  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
415 *
416 * Print results
417 *
418  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
419 *
420 * HRD requires 40/3 * N^3 floating point ops. (flops)
421 * more precisely,
422 * HRD requires 16/3*(IHI-ILO)^3+8*IHI*(IHI-ILO)^2 flops
423 *
424  nops = dble( ihi-ilo )
425  nops = nops * nops *
426  $ ( 8.0d0*dble( ihi ) + (16.0d0/3.0d0)*nops )
427  nops = nops / 1.0d+6
428 *
429 * Print WALL time
430 *
431  IF( wtime( 1 ).GT.0.0d+0 ) THEN
432  tmflops = nops / wtime( 1 )
433  ELSE
434  tmflops = 0.0d+0
435  END IF
436  IF( wtime( 1 ).GE.0.0d+0 )
437  $ WRITE( nout, fmt = 9993 ) 'WALL', n, ilo, ihi, nb,
438  $ nprow, npcol, wtime( 1 ), tmflops, fresid,
439  $ passed
440 *
441 * Print CPU time
442 *
443  IF( ctime( 1 ).GT.0.0d+0 ) THEN
444  tmflops = nops / ctime( 1 )
445  ELSE
446  tmflops = 0.0d+0
447  END IF
448  IF( ctime( 1 ).GE.0.0d+0 )
449  $ WRITE( nout, fmt = 9993 ) 'CPU ', n, ilo, ihi, nb,
450  $ nprow, npcol, ctime( 1 ), tmflops, fresid,
451  $ passed
452  END IF
453  10 CONTINUE
454  20 CONTINUE
455 *
456  CALL blacs_gridexit( ictxt )
457  30 CONTINUE
458 *
459 * Print ending messages and close output file
460 *
461  IF( iam.EQ.0 ) THEN
462  ktests = kpass + kfail + kskip
463  WRITE( nout, fmt = * )
464  WRITE( nout, fmt = 9992 ) ktests
465  IF( check ) THEN
466  WRITE( nout, fmt = 9991 ) kpass
467  WRITE( nout, fmt = 9989 ) kfail
468  ELSE
469  WRITE( nout, fmt = 9990 ) kpass
470  END IF
471  WRITE( nout, fmt = 9988 ) kskip
472  WRITE( nout, fmt = * )
473  WRITE( nout, fmt = * )
474  WRITE( nout, fmt = 9987 )
475  IF( nout.NE.6 .AND. nout.NE.0 )
476  $ CLOSE( nout )
477  END IF
478 *
479  CALL blacs_exit( 0 )
480 *
481  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
482  $ '; It should be at least 1' )
483  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
484  $ i4 )
485  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
486  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
487  $ i11 )
488  9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ',
489  $ ' MFLOPS Residual CHECK' )
490  9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ',
491  $ '----------- -------- ------' )
492  9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
493  $ f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
494  9992 FORMAT( 'Finished', i4, ' tests, with the following results:' )
495  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
496  9990 FORMAT( i5, ' tests completed without checking.' )
497  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
498  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
499  9987 FORMAT( 'END OF TESTS.' )
500  9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', g25.7 )
501 *
502  stop
503 *
504 * End of PCHRDDRIVER
505 *
506  END
pcgehdrv
subroutine pcgehdrv(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK)
Definition: pcgehdrv.f:2
pchrdinfo
subroutine pchrdinfo(SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pchrdinfo.f:5
indxg2p
integer function indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
Definition: indxg2p.f:2
pclafchk
subroutine pclafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pclafchk.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pchrddriver
program pchrddriver
Definition: pchrddriver.f:1
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pcgehrd
subroutine pcgehrd(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pcgehrd.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267