ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pshrddriver.f
Go to the documentation of this file.
1  PROGRAM pshrddriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * March 13, 2000
7 *
8 * Purpose
9 * =======
10 *
11 * PSHRDDRIVER is the main test program for the REAL
12 * ScaLAPACK HRD (Hessenberg Reduction) routines.
13 *
14 * The program must be driven by a short data file. An annotated
15 * example of a data file can be obtained by deleting the first 3
16 * characters from the following 14 lines:
17 * 'ScaLAPACK HRD input file'
18 * 'PVM machine'
19 * 'HRD.out' output file name (if any)
20 * 6 device out
21 * 2 number of problems sizes
22 * 100 101 values of N
23 * 2 1 values of ILO
24 * 99 101 values of IHI
25 * 3 number of NB's
26 * 2 3 5 values of NB
27 * 7 number of process grids (ordered pairs of P & Q)
28 * 1 2 1 4 2 3 8 values of P
29 * 1 2 4 1 3 2 1 values of Q
30 * 3.0 threshold
31 *
32 * Internal Parameters
33 * ===================
34 *
35 * TOTMEM INTEGER, default = 2000000
36 * TOTMEM is a machine-specific parameter indicating the
37 * maximum amount of available memory in bytes.
38 * The user should customize TOTMEM to his platform. Remember
39 * to leave room in memory for the operating system, the BLACS
40 * buffer, etc. For example, on a system with 8 MB of memory
41 * per process (e.g., one processor on an Intel iPSC/860), the
42 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
43 * code, BLACS buffer, etc). However, for PVM, we usually set
44 * TOTMEM = 2000000. Some experimenting with the maximum value
45 * of TOTMEM may be required.
46 *
47 * INTGSZ INTEGER, default = 4 bytes.
48 * REALSZ INTEGER, default = 4 bytes.
49 * INTGSZ and REALSZ indicate the length in bytes on the
50 * given platform for an integer and a single precision real.
51 * MEM REAL array, dimension ( TOTMEM / REALSZ )
52 *
53 * All arrays used by SCALAPACK routines are allocated from
54 * this array and referenced by pointers. The integer IPA,
55 * for example, is a pointer to the starting element of MEM for
56 * the matrix A.
57 *
58 * =====================================================================
59 *
60 * .. Parameters ..
61  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
62  $ lld_, mb_, m_, nb_, n_, rsrc_
63  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
64  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
65  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
66  INTEGER memsiz, ntests, realsz, totmem
67  REAL padval
68  parameter( realsz = 4, totmem = 2000000,
69  $ memsiz = totmem / realsz, ntests = 20,
70  $ padval = -9923.0e+0 )
71 * ..
72 * .. Local Scalars ..
73  LOGICAL check
74  CHARACTER*6 passed
75  CHARACTER*80 outfile
76  INTEGER i, iam, iaseed, ictxt, ihi, ihip, ihlp, ihlq,
77  $ ilcol, ilo, ilrow, info, inlq, imidpad, ipa,
78  $ ipt, ipw, ipostpad, iprepad, itemp, j, k,
79  $ kfail, kpass, kskip, ktests, lcm, lcmq, loff,
80  $ lwork, mycol, myrow, n, nb, ngrids, nmat, nnb,
81  $ nprocs, nout, np, npcol, nprow, nq, workhrd,
82  $ worksiz
83  REAL anorm, fresid, thresh
84  DOUBLE PRECISION nops, tmflops
85 * ..
86 * .. Local Arrays ..
87  INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
88  $ nval( ntests ), nvhi( ntests ), nvlo( ntests ),
89  $ pval( ntests ), qval( ntests )
90  REAL mem( memsiz )
91  DOUBLE PRECISION ctime( 1 ), wtime( 1 )
92 * ..
93 * .. External Subroutines ..
94  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
95  $ blacs_gridexit, blacs_gridinit, blacs_gridinfo,
96  $ descinit, igsum2d, blacs_pinfo, psfillpad,
100 * ..
101 * .. External Functions ..
102  INTEGER ilcm, indxg2p, numroc
103  REAL pslange
104  EXTERNAL ilcm, indxg2p, numroc, pslange
105 * ..
106 * .. Intrinsic Functions ..
107  INTRINSIC dble, max
108 * ..
109 * .. Data statements ..
110  DATA ktests, kpass, kfail, kskip / 4*0 /
111 * ..
112 * .. Executable Statements ..
113 *
114 * Get starting information
115 *
116  CALL blacs_pinfo( iam, nprocs )
117  iaseed = 100
118  CALL pshrdinfo( outfile, nout, nmat, nval, nvlo, nvhi, ntests,
119  $ nnb, nbval, ntests, ngrids, pval, ntests, qval,
120  $ ntests, thresh, mem, iam, nprocs )
121  check = ( thresh.GE.0.0e+0 )
122 *
123 * Print headings
124 *
125  IF( iam.EQ.0 ) THEN
126  WRITE( nout, fmt = * )
127  WRITE( nout, fmt = 9995 )
128  WRITE( nout, fmt = 9994 )
129  WRITE( nout, fmt = * )
130  END IF
131 *
132 * Loop over different process grids
133 *
134  DO 30 i = 1, ngrids
135 *
136  nprow = pval( i )
137  npcol = qval( i )
138 *
139 * Make sure grid information is correct
140 *
141  ierr( 1 ) = 0
142  IF( nprow.LT.1 ) THEN
143  IF( iam.EQ.0 )
144  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
145  ierr( 1 ) = 1
146  ELSE IF( npcol.LT.1 ) THEN
147  IF( iam.EQ.0 )
148  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
149  ierr( 1 ) = 1
150  ELSE IF( nprow*npcol.GT.nprocs ) THEN
151  IF( iam.EQ.0 )
152  $ WRITE( nout, fmt = 9998 )nprow*npcol, nprocs
153  ierr( 1 ) = 1
154  END IF
155 *
156  IF( ierr( 1 ).GT.0 ) THEN
157  IF( iam.EQ.0 )
158  $ WRITE( nout, fmt = 9997 ) 'grid'
159  kskip = kskip + 1
160  GO TO 30
161  END IF
162 *
163 * Define process grid
164 *
165  CALL blacs_get( -1, 0, ictxt )
166  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
167  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
168 *
169 * Go to bottom of loop if this case doesn't use my process
170 *
171  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
172  $ GOTO 30
173 *
174  DO 20 j = 1, nmat
175 *
176  n = nval( j )
177  ilo = nvlo( j )
178  ihi = nvhi( j )
179 *
180 * Make sure matrix information is correct
181 *
182  ierr( 1 ) = 0
183  IF( n.LT.1 ) THEN
184  IF( iam.EQ.0 )
185  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
186  ierr( 1 ) = 1
187  END IF
188 *
189 * Check all processes for an error
190 *
191  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
192 *
193  IF( ierr( 1 ).GT.0 ) THEN
194  IF( iam.EQ.0 )
195  $ WRITE( nout, fmt = 9997 ) 'matrix'
196  kskip = kskip + 1
197  GO TO 20
198  END IF
199 *
200  DO 10 k = 1, nnb
201  nb = nbval( k )
202 *
203 * Make sure nb is legal
204 *
205  ierr( 1 ) = 0
206  IF( nb.LT.1 ) THEN
207  ierr( 1 ) = 1
208  IF( iam.EQ.0 )
209  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
210  END IF
211 *
212 * Check all processes for an error
213 *
214  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
215 *
216  IF( ierr( 1 ).GT.0 ) THEN
217  IF( iam.EQ.0 )
218  $ WRITE( nout, fmt = 9997 ) 'NB'
219  kskip = kskip + 1
220  GO TO 10
221  END IF
222 *
223  np = numroc( n, nb, myrow, 0, nprow )
224  nq = numroc( n, nb, mycol, 0, npcol )
225  IF( check ) THEN
226  iprepad = max( nb, np )
227  imidpad = nb
228  ipostpad = max( nb, nq )
229  ELSE
230  iprepad = 0
231  imidpad = 0
232  ipostpad = 0
233  END IF
234 *
235 * Initialize the array descriptor for the matrix A
236 *
237  CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
238  $ max( 1, np ) + imidpad, info )
239 *
240 * Check all processes for an error
241 *
242  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
243 *
244  IF( ierr( 1 ).LT.0 ) THEN
245  IF( iam.EQ.0 )
246  $ WRITE( nout, fmt = 9997 ) 'descriptor'
247  kskip = kskip + 1
248  GO TO 10
249  END IF
250 *
251 * Assign pointers into MEM for SCALAPACK arrays, A is
252 * allocated starting at position MEM( IPREPAD+1 )
253 *
254  ipa = iprepad + 1
255  ipt = ipa + desca( lld_ )*nq + ipostpad + iprepad
256  ipw = ipt + nq + ipostpad + iprepad
257 *
258 * Calculate the amount of workspace required for the
259 * reduction
260 *
261  ihip = numroc( ihi, nb, myrow, desca( rsrc_ ), nprow )
262  loff = mod( ilo-1, nb )
263  ilrow = indxg2p( ilo, nb, myrow, desca( rsrc_ ), nprow )
264  ilcol = indxg2p( ilo, nb, mycol, desca( csrc_ ), npcol )
265  ihlp = numroc( ihi-ilo+loff+1, nb, myrow, ilrow, nprow )
266  inlq = numroc( n-ilo+loff+1, nb, mycol, ilcol, npcol )
267  lwork = nb*( nb + max( ihip+1, ihlp+inlq ) )
268  workhrd = lwork + ipostpad
269  worksiz = workhrd
270 *
271 * Figure the amount of workspace required by the check
272 *
273  IF( check ) THEN
274  lcm = ilcm( nprow, npcol )
275  lcmq = lcm / npcol
276  ihlq = numroc( ihi-ilo+loff+1, nb, mycol, ilcol,
277  $ npcol )
278  itemp = nb*max( ihlp+inlq, ihlq+max( ihip,
279  $ ihlp+numroc( numroc( ihi-ilo+loff+1, nb, 0, 0,
280  $ npcol ), nb, 0, 0, lcmq ) ) )
281  worksiz = max( nb*nb + nb*ihlp + itemp, nb * np ) +
282  $ ipostpad
283  END IF
284 *
285 * Check for adequate memory for problem size
286 *
287  ierr( 1 ) = 0
288  IF( ipw+worksiz.GT.memsiz ) THEN
289  IF( iam.EQ.0 )
290  $ WRITE( nout, fmt = 9996 ) 'Hessenberg reduction',
291  $ ( ipw+worksiz )*realsz
292  ierr( 1 ) = 1
293  END IF
294 *
295 * Check all processes for an error
296 *
297  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
298 *
299  IF( ierr( 1 ).GT.0 ) THEN
300  IF( iam.EQ.0 )
301  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
302  kskip = kskip + 1
303  GO TO 10
304  END IF
305 *
306 * Generate A
307 *
308  CALL psmatgen( ictxt, 'No', 'No', desca( m_ ),
309  $ desca( n_ ), desca( mb_ ), desca( nb_ ),
310  $ mem( ipa ), desca( lld_ ), desca( rsrc_ ),
311  $ desca( csrc_ ),
312  $ iaseed, 0, np, 0, nq, myrow, mycol,
313  $ nprow, npcol )
314 *
315 * Need Infinity-norm of A for checking
316 *
317  IF( check ) THEN
318  CALL psfillpad( ictxt, np, nq, mem( ipa-iprepad ),
319  $ desca( lld_ ), iprepad, ipostpad,
320  $ padval )
321  CALL psfillpad( ictxt, nq, 1, mem( ipt-iprepad ),
322  $ nq, iprepad, ipostpad, padval )
323  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
324  $ mem( ipw-iprepad ), worksiz-ipostpad,
325  $ iprepad, ipostpad, padval )
326  anorm = pslange( 'I', n, n, mem( ipa ), 1, 1, desca,
327  $ mem( ipw ) )
328  CALL pschekpad( ictxt, 'PSLANGE', np, nq,
329  $ mem( ipa-iprepad ), desca( lld_ ),
330  $ iprepad, ipostpad, padval )
331  CALL pschekpad( ictxt, 'PSLANGE',
332  $ worksiz-ipostpad, 1,
333  $ mem( ipw-iprepad ), worksiz-ipostpad,
334  $ iprepad, ipostpad, padval )
335  CALL psfillpad( ictxt, workhrd-ipostpad, 1,
336  $ mem( ipw-iprepad ), workhrd-ipostpad,
337  $ iprepad, ipostpad, padval )
338  END IF
339 *
340  CALL slboot()
341  CALL blacs_barrier( ictxt, 'All' )
342  CALL sltimer( 1 )
343 *
344 * Reduce Hessenberg form
345 *
346  CALL psgehrd( n, ilo, ihi, mem( ipa ), 1, 1, desca,
347  $ mem( ipt ), mem( ipw ), lwork, info )
348  CALL sltimer( 1 )
349 *
350  IF( check ) THEN
351 *
352 * Check for memory overwrite
353 *
354  CALL pschekpad( ictxt, 'PSGEHRD', np, nq,
355  $ mem( ipa-iprepad ), desca( lld_ ),
356  $ iprepad, ipostpad, padval )
357  CALL pschekpad( ictxt, 'PSGEHRD', nq, 1,
358  $ mem( ipt-iprepad ), nq, iprepad,
359  $ ipostpad, padval )
360  CALL pschekpad( ictxt, 'PSGEHRD', workhrd-ipostpad,
361  $ 1, mem( ipw-iprepad ),
362  $ workhrd-ipostpad, iprepad,
363  $ ipostpad, padval )
364  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
365  $ mem( ipw-iprepad ), worksiz-ipostpad,
366  $ iprepad, ipostpad, padval )
367 *
368 * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps)
369 *
370  CALL psgehdrv( n, ilo, ihi, mem( ipa ), 1, 1, desca,
371  $ mem( ipt ), mem( ipw ) )
372  CALL pslafchk( 'No', 'No', n, n, mem( ipa ), 1, 1,
373  $ desca, iaseed, anorm, fresid,
374  $ mem( ipw ) )
375 *
376 * Check for memory overwrite
377 *
378  CALL pschekpad( ictxt, 'PSGEHDRV', np, nq,
379  $ mem( ipa-iprepad ), desca( lld_ ),
380  $ iprepad, ipostpad, padval )
381  CALL pschekpad( ictxt, 'PSGEHDRV', nq, 1,
382  $ mem( ipt-iprepad ), nq, iprepad,
383  $ ipostpad, padval )
384  CALL pschekpad( ictxt, 'PSGEHDRV',
385  $ worksiz-ipostpad, 1,
386  $ mem( ipw-iprepad ), worksiz-ipostpad,
387  $ iprepad, ipostpad, padval )
388 *
389 * Test residual and detect NaN result
390 *
391  IF( fresid.LE.thresh .AND. fresid-fresid.EQ.0.0e+0 )
392  $ THEN
393  kpass = kpass + 1
394  passed = 'PASSED'
395  ELSE
396  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
397  $ WRITE( nout, fmt = 9986 ) fresid
398  kfail = kfail + 1
399  passed = 'FAILED'
400  END IF
401  ELSE
402 *
403 * Don't perform the checking, only the timing operation
404 *
405  kpass = kpass + 1
406  fresid = fresid - fresid
407  passed = 'BYPASS'
408  END IF
409 *
410 * Gather max. of all CPU and WALL clock timings
411 *
412  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
413  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
414 *
415 * Print results
416 *
417  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
418 *
419 * HRD requires 10/3 * N^3 floating point ops. (flops)
420 * more precisely,
421 * HRD requires 4/3*(IHI-ILO)^3 + 2*IHI*(IHI-ILO)^2 flops
422 *
423  nops = dble( ihi-ilo )
424  nops = nops * nops *
425  $ ( 2.0d0*dble( ihi ) + (4.0d0/3.0d0)*nops )
426  nops = nops / 1.0d+6
427 *
428 * Print WALL time
429 *
430  IF( wtime( 1 ).GT.0.0d+0 ) THEN
431  tmflops = nops / wtime( 1 )
432  ELSE
433  tmflops = 0.0d+0
434  END IF
435  IF( wtime( 1 ).GE.0.0d+0 )
436  $ WRITE( nout, fmt = 9993 ) 'WALL', n, ilo, ihi, nb,
437  $ nprow, npcol, wtime( 1 ), tmflops, fresid,
438  $ passed
439 *
440 * Print CPU time
441 *
442  IF( ctime( 1 ).GT.0.0d+0 ) THEN
443  tmflops = nops / ctime( 1 )
444  ELSE
445  tmflops = 0.0d+0
446  END IF
447  IF( ctime( 1 ).GE.0.0d+0 )
448  $ WRITE( nout, fmt = 9993 ) 'CPU ', n, ilo, ihi, nb,
449  $ nprow, npcol, ctime( 1 ), tmflops, fresid,
450  $ passed
451  END IF
452  10 CONTINUE
453  20 CONTINUE
454 *
455  CALL blacs_gridexit( ictxt )
456  30 CONTINUE
457 *
458 * Print ending messages and close output file
459 *
460  IF( iam.EQ.0 ) THEN
461  ktests = kpass + kfail + kskip
462  WRITE( nout, fmt = * )
463  WRITE( nout, fmt = 9992 ) ktests
464  IF( check ) THEN
465  WRITE( nout, fmt = 9991 ) kpass
466  WRITE( nout, fmt = 9989 ) kfail
467  ELSE
468  WRITE( nout, fmt = 9990 ) kpass
469  END IF
470  WRITE( nout, fmt = 9988 ) kskip
471  WRITE( nout, fmt = * )
472  WRITE( nout, fmt = * )
473  WRITE( nout, fmt = 9987 )
474  IF( nout.NE.6 .AND. nout.NE.0 )
475  $ CLOSE( nout )
476  END IF
477 *
478  CALL blacs_exit( 0 )
479 *
480  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
481  $ '; It should be at least 1' )
482  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
483  $ i4 )
484  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
485  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
486  $ i11 )
487  9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ',
488  $ ' MFLOPS Residual CHECK' )
489  9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ',
490  $ '----------- -------- ------' )
491  9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
492  $ f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
493  9992 FORMAT( 'Finished', i4, ' tests, with the following results:' )
494  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
495  9990 FORMAT( i5, ' tests completed without checking.' )
496  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
497  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
498  9987 FORMAT( 'END OF TESTS.' )
499  9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', g25.7 )
500 *
501  stop
502 *
503 * End of PSHRDDRIVER
504 *
505  END
psgehrd
subroutine psgehrd(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgehrd.f:3
pslafchk
subroutine pslafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pslafchk.f:3
indxg2p
integer function indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
Definition: indxg2p.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pschekpad
subroutine pschekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pschekpad.f:3
pshrddriver
program pshrddriver
Definition: pshrddriver.f:1
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
psgehdrv
subroutine psgehdrv(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK)
Definition: psgehdrv.f:2
psmatgen
subroutine psmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: psmatgen.f:4
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pshrdinfo
subroutine pshrdinfo(SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pshrdinfo.f:5
psfillpad
subroutine psfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psfillpad.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267