ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcludriver.f
Go to the documentation of this file.
1  PROGRAM pcludriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * Purpose
9 * ========
10 *
11 * PCLUDRIVER is the main test program for the COMPLEX
12 * SCALAPACK LU routines. This test driver performs an LU factorization
13 * and solve. If the input matrix is non-square, only the factorization
14 * is performed. Condition estimation and iterative refinement are
15 * optionally performed.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 18 lines:
20 * 'SCALAPACK, Version 2.0, LU factorization input file'
21 * 'Intel iPSC/860 hypercube, gamma model.'
22 * 'LU.out' output file name (if any)
23 * 6 device out
24 * 1 number of problems sizes
25 * 31 201 values of M
26 * 31 201 values of N
27 * 1 number of NB's
28 * 2 values of NB
29 * 1 number of NRHS's
30 * 1 values of NRHS
31 * 1 number of NBRHS's
32 * 1 values of NBRHS
33 * 1 number of process grids (ordered pairs of P & Q)
34 * 2 1 4 2 3 8 values of P
35 * 2 4 1 3 2 1 values of Q
36 * 1.0 threshold
37 * T (T or F) Test Cond. Est. and Iter. Ref. Routines
38 *
39 *
40 * Internal Parameters
41 * ===================
42 *
43 * TOTMEM INTEGER, default = 2000000
44 * TOTMEM is a machine-specific parameter indicating the
45 * maximum amount of available memory in bytes.
46 * The user should customize TOTMEM to his platform. Remember
47 * to leave room in memory for the operating system, the BLACS
48 * buffer, etc. For example, on a system with 8 MB of memory
49 * per process (e.g., one processor on an Intel iPSC/860), the
50 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51 * code, BLACS buffer, etc). However, for PVM, we usually set
52 * TOTMEM = 2000000. Some experimenting with the maximum value
53 * of TOTMEM may be required.
54 *
55 * INTGSZ INTEGER, default = 4 bytes.
56 * CPLXSZ INTEGER, default = 8 bytes.
57 * INTGSZ and CPLXSZ indicate the length in bytes on the
58 * given platform for an integer and a single precision
59 * complex.
60 * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
61 *
62 * All arrays used by SCALAPACK routines are allocated from
63 * this array and referenced by pointers. The integer IPA,
64 * for example, is a pointer to the starting element of MEM for
65 * the matrix A.
66 *
67 * =====================================================================
68 *
69 * .. Parameters ..
70  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
71  $ lld_, mb_, m_, nb_, n_, rsrc_
72  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
73  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
74  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
75  INTEGER cplxsz, intgsz, memsiz, ntests, realsz, totmem
76  REAL zero
77  COMPLEX padval
78  parameter( cplxsz = 8, intgsz = 4, realsz = 4,
79  $ totmem = 4000000,
80  $ memsiz = totmem / cplxsz, ntests = 20,
81  $ padval = ( -9923.0e+0, -9923.0e+0 ),
82  $ zero = 0.0e+0 )
83 * ..
84 * .. Local Scalars ..
85  LOGICAL check, est
86  CHARACTER*6 passed
87  CHARACTER*80 outfile
88  INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
89  $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
90  $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
91  $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
92  $ lipiv, lrwork, lwork, lw2, m, maxmn,
93  $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
94  $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
95  $ nprocs, nprow, nq, nrhs, worksiz
96  REAL anorm, anorm1, fresid, rcond, sresid, sresid2,
97  $ thresh
98  DOUBLE PRECISION nops, tmflops
99 * ..
100 * .. Local Arrays ..
101  INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
102  $ mval( ntests ), nbrval( ntests ),
103  $ nbval( ntests ), nrval( ntests ),
104  $ nval( ntests ), pval( ntests ),
105  $ qval( ntests )
106  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
107  COMPLEX mem( memsiz )
108 * ..
109 * .. External Subroutines ..
110  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
111  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
112  $ blacs_pinfo, descinit, igsum2d, pcchekpad,
117 * ..
118 * .. External Functions ..
119  INTEGER iceil, ilcm, numroc
120  REAL pclange
121  EXTERNAL iceil, ilcm, numroc, pclange
122 * ..
123 * .. Intrinsic Functions ..
124  INTRINSIC dble, max, min
125 * ..
126 * .. Data Statements ..
127  DATA kfail, kpass, kskip, ktests / 4*0 /
128 * ..
129 * .. Executable Statements ..
130 *
131 * Get starting information
132 *
133  CALL blacs_pinfo( iam, nprocs )
134  iaseed = 100
135  ibseed = 200
136  CALL pcluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
137  $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
138  $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
139  $ est, mem, iam, nprocs )
140  check = ( thresh.GE.0.0e+0 )
141 *
142 * Print headings
143 *
144  IF( iam.EQ.0 ) THEN
145  WRITE( nout, fmt = * )
146  WRITE( nout, fmt = 9995 )
147  WRITE( nout, fmt = 9994 )
148  WRITE( nout, fmt = * )
149  END IF
150 *
151 * Loop over different process grids
152 *
153  DO 50 i = 1, ngrids
154 *
155  nprow = pval( i )
156  npcol = qval( i )
157 *
158 * Make sure grid information is correct
159 *
160  ierr( 1 ) = 0
161  IF( nprow.LT.1 ) THEN
162  IF( iam.EQ.0 )
163  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
164  ierr( 1 ) = 1
165  ELSE IF( npcol.LT.1 ) THEN
166  IF( iam.EQ.0 )
167  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
168  ierr( 1 ) = 1
169  ELSE IF( nprow*npcol.GT.nprocs ) THEN
170  IF( iam.EQ.0 )
171  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
172  ierr( 1 ) = 1
173  END IF
174 *
175  IF( ierr( 1 ).GT.0 ) THEN
176  IF( iam.EQ.0 )
177  $ WRITE( nout, fmt = 9997 ) 'grid'
178  kskip = kskip + 1
179  GO TO 50
180  END IF
181 *
182 * Define process grid
183 *
184  CALL blacs_get( -1, 0, ictxt )
185  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
186  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187 *
188 * Go to bottom of process grid loop if this case doesn't use my
189 * process
190 *
191  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
192  $ GO TO 50
193 *
194  DO 40 j = 1, nmat
195 *
196  m = mval( j )
197  n = nval( j )
198 *
199 * Make sure matrix information is correct
200 *
201  ierr( 1 ) = 0
202  IF( m.LT.1 ) THEN
203  IF( iam.EQ.0 )
204  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
205  ierr( 1 ) = 1
206  ELSE IF( n.LT.1 ) THEN
207  IF( iam.EQ.0 )
208  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
209  ierr( 1 ) = 1
210  END IF
211 *
212 * Check all processes for an error
213 *
214  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
215 *
216  IF( ierr( 1 ).GT.0 ) THEN
217  IF( iam.EQ.0 )
218  $ WRITE( nout, fmt = 9997 ) 'matrix'
219  kskip = kskip + 1
220  GO TO 40
221  END IF
222 *
223  DO 30 k = 1, nnb
224 *
225  nb = nbval( k )
226 *
227 * Make sure nb is legal
228 *
229  ierr( 1 ) = 0
230  IF( nb.LT.1 ) THEN
231  ierr( 1 ) = 1
232  IF( iam.EQ.0 )
233  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
234  END IF
235 *
236 * Check all processes for an error
237 *
238  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
239 *
240  IF( ierr( 1 ).GT.0 ) THEN
241  IF( iam.EQ.0 )
242  $ WRITE( nout, fmt = 9997 ) 'NB'
243  kskip = kskip + 1
244  GO TO 30
245  END IF
246 *
247 * Padding constants
248 *
249  mp = numroc( m, nb, myrow, 0, nprow )
250  np = numroc( n, nb, myrow, 0, nprow )
251  nq = numroc( n, nb, mycol, 0, npcol )
252  IF( check ) THEN
253  iprepad = max( nb, mp )
254  imidpad = nb
255  ipostpad = max( nb, nq )
256  ELSE
257  iprepad = 0
258  imidpad = 0
259  ipostpad = 0
260  END IF
261 *
262 * Initialize the array descriptor for the matrix A
263 *
264  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
265  $ max( 1, mp )+imidpad, ierr( 1 ) )
266 *
267 * Check all processes for an error
268 *
269  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
270 *
271  IF( ierr( 1 ).LT.0 ) THEN
272  IF( iam.EQ.0 )
273  $ WRITE( nout, fmt = 9997 ) 'descriptor'
274  kskip = kskip + 1
275  GO TO 30
276  END IF
277 *
278 * Assign pointers into MEM for SCALAPACK arrays, A is
279 * allocated starting at position MEM( IPREPAD+1 )
280 *
281  ipa = iprepad+1
282  IF( est .AND. m.EQ.n ) THEN
283  ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
284  ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
285  ELSE
286  ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
287  END IF
288  lipiv = iceil( intgsz*( mp+nb ), cplxsz )
289  ipw = ippiv + lipiv + ipostpad + iprepad
290 *
291  IF( check ) THEN
292 *
293 * Calculate the amount of workspace required by the
294 * checking routines PCLANGE, PCGETRRV, and
295 * PCLAFCHK
296 *
297  worksiz = max( 2, nq )
298 *
299  worksiz = max( worksiz, mp*desca( nb_ )+
300  $ nq*desca( mb_ ) )
301 *
302  worksiz = max( worksiz, mp * desca( nb_ ) )
303 *
304  worksiz = worksiz + ipostpad
305 *
306  ELSE
307 *
308  worksiz = ipostpad
309 *
310  END IF
311 *
312 * Check for adequate memory for problem size
313 *
314  ierr( 1 ) = 0
315  IF( ipw+worksiz.GT.memsiz ) THEN
316  IF( iam.EQ.0 )
317  $ WRITE( nout, fmt = 9996 ) 'factorization',
318  $ ( ipw+worksiz )*cplxsz
319  ierr( 1 ) = 1
320  END IF
321 *
322 * Check all processes for an error
323 *
324  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
325 *
326  IF( ierr( 1 ).GT.0 ) THEN
327  IF( iam.EQ.0 )
328  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
329  kskip = kskip + 1
330  GO TO 30
331  END IF
332 *
333 * Generate matrix A of Ax = b
334 *
335  CALL pcmatgen( ictxt, 'No transpose', 'No transpose',
336  $ desca( m_ ), desca( n_ ), desca( mb_ ),
337  $ desca( nb_ ), mem( ipa ), desca( lld_ ),
338  $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
339  $ mp, 0, nq, myrow, mycol, nprow, npcol )
340 *
341 * Calculate inf-norm of A for residual error-checking
342 *
343  IF( check ) THEN
344  CALL pcfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
345  $ desca( lld_ ), iprepad, ipostpad,
346  $ padval )
347  CALL pcfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
348  $ lipiv, iprepad, ipostpad, padval )
349  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
350  $ mem( ipw-iprepad ), worksiz-ipostpad,
351  $ iprepad, ipostpad, padval )
352  anorm = pclange( 'I', m, n, mem( ipa ), 1, 1, desca,
353  $ mem( ipw ) )
354  anorm1 = pclange( '1', m, n, mem( ipa ), 1, 1, desca,
355  $ mem( ipw ) )
356  CALL pcchekpad( ictxt, 'PCLANGE', mp, nq,
357  $ mem( ipa-iprepad ), desca( lld_ ),
358  $ iprepad, ipostpad, padval )
359  CALL pcchekpad( ictxt, 'PCLANGE', worksiz-ipostpad,
360  $ 1, mem( ipw-iprepad ),
361  $ worksiz-ipostpad, iprepad, ipostpad,
362  $ padval )
363  END IF
364 *
365  IF( est .AND. m.EQ.n ) THEN
366  CALL pcmatgen( ictxt, 'No transpose', 'No transpose',
367  $ desca( m_ ), desca( n_ ), desca( mb_ ),
368  $ desca( nb_ ), mem( ipa0 ),
369  $ desca( lld_ ), desca( rsrc_ ),
370  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
371  $ myrow, mycol, nprow, npcol )
372  IF( check )
373  $ CALL pcfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
374  $ desca( lld_ ), iprepad, ipostpad,
375  $ padval )
376  END IF
377 *
378  CALL slboot()
379  CALL blacs_barrier( ictxt, 'All' )
380  CALL sltimer( 1 )
381 *
382 * Perform LU factorization
383 *
384  CALL pcgetrf( m, n, mem( ipa ), 1, 1, desca,
385  $ mem( ippiv ), info )
386 *
387  CALL sltimer( 1 )
388 *
389  IF( info.NE.0 ) THEN
390  IF( iam.EQ.0 )
391  $ WRITE( nout, fmt = * ) 'PCGETRF INFO=', info
392  kfail = kfail + 1
393  rcond = zero
394  GO TO 30
395  END IF
396 *
397  IF( check ) THEN
398 *
399 * Check for memory overwrite in LU factorization
400 *
401  CALL pcchekpad( ictxt, 'PCGETRF', mp, nq,
402  $ mem( ipa-iprepad ), desca( lld_ ),
403  $ iprepad, ipostpad, padval )
404  CALL pcchekpad( ictxt, 'PCGETRF', lipiv, 1,
405  $ mem( ippiv-iprepad ), lipiv, iprepad,
406  $ ipostpad, padval )
407  END IF
408 *
409  IF( m.NE.n ) THEN
410 *
411 * For non-square matrices, factorization only
412 *
413  nrhs = 0
414  nbrhs = 0
415 *
416  IF( check ) THEN
417 *
418 * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
419 *
420  CALL pcgetrrv( m, n, mem( ipa ), 1, 1, desca,
421  $ mem( ippiv ), mem( ipw ) )
422  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
423  $ desca, iaseed, anorm, fresid,
424  $ mem( ipw ) )
425 *
426 * Check for memory overwrite
427 *
428  CALL pcchekpad( ictxt, 'PCGETRRV', mp, nq,
429  $ mem( ipa-iprepad ), desca( lld_ ),
430  $ iprepad, ipostpad, padval )
431  CALL pcchekpad( ictxt, 'PCGETRRV', lipiv, 1,
432  $ mem( ippiv-iprepad ), lipiv,
433  $ iprepad, ipostpad, padval )
434  CALL pcchekpad( ictxt, 'PCGETRRV',
435  $ worksiz-ipostpad, 1,
436  $ mem( ipw-iprepad ),
437  $ worksiz-ipostpad, iprepad,
438  $ ipostpad, padval )
439 *
440 * Test residual and detect NaN result
441 *
442  IF( ( fresid.LE.thresh ) .AND.
443  $ ( (fresid-fresid).EQ.0.0e+0 ) ) THEN
444  kpass = kpass + 1
445  passed = 'PASSED'
446  ELSE
447  kfail = kfail + 1
448  passed = 'FAILED'
449  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
450  $ WRITE( nout, fmt = 9986 ) fresid
451  END IF
452 *
453  ELSE
454 *
455 * Don't perform the checking, only timing
456 *
457  kpass = kpass + 1
458  fresid = fresid - fresid
459  passed = 'BYPASS'
460 *
461  END IF
462 *
463 * Gather maximum of all CPU and WALL clock timings
464 *
465  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
466  $ wtime )
467  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
468  $ ctime )
469 *
470 * Print results
471 *
472  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
473 *
474  maxmn = max( m, n )
475  minmn = min( m, n )
476 *
477 * 4 M N^2 - 4/3 N^3 + 2 M N - 3 N^2 flops for LU
478 * factorization M >= N
479 *
480  nops = 4.0d+0*dble(maxmn)*(dble(minmn)**2) -
481  $ (4.0d+0 / 3.0d+0)*( dble( minmn )**3 ) +
482  $ (2.0d+0)*dble( maxmn )*dble( minmn ) -
483  $ (3.0d+0)*( dble( minmn )**2 )
484 *
485 * Calculate total megaflops -- factorization only,
486 * -- for WALL and CPU time, and print output
487 *
488 * Print WALL time if machine supports it
489 *
490  IF( wtime( 1 ).GT.0.0d+0 ) THEN
491  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
492  ELSE
493  tmflops = 0.0d+0
494  END IF
495 *
496  wtime( 2 ) = 0.0d+0
497  IF( wtime( 1 ).GE.0.0d+0 )
498  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
499  $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
500  $ wtime( 2 ), tmflops, passed
501 *
502 * Print CPU time if machine supports it
503 *
504  IF( ctime( 1 ).GT.0.0d+0 ) THEN
505  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
506  ELSE
507  tmflops = 0.0d+0
508  END IF
509 *
510  ctime( 2 ) = 0.0d+0
511  IF( ctime( 1 ).GE.0.0d+0 )
512  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
513  $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
514  $ ctime( 2 ), tmflops, passed
515  END IF
516 *
517  ELSE
518 *
519 * If M = N
520 *
521  IF( est ) THEN
522 *
523 * Calculate workspace required for PCGECON
524 *
525  lwork = max( 1, 2*np ) +
526  $ max( 2, desca( nb_ )*
527  $ max( 1, iceil( nprow-1, npcol ) ),
528  $ nq + desca( nb_ )*
529  $ max( 1, iceil( npcol-1, nprow ) ) )
530  ipw2 = ipw + lwork + ipostpad + iprepad
531  lrwork = max( 1, 2*nq )
532  lw2 = iceil( lrwork*realsz, cplxsz ) + ipostpad
533 *
534  ierr( 1 ) = 0
535  IF( ipw2+lw2.GT.memsiz ) THEN
536  IF( iam.EQ.0 )
537  $ WRITE( nout, fmt = 9996 )'cond est',
538  $ ( ipw2+lw2 )*cplxsz
539  ierr( 1 ) = 1
540  END IF
541 *
542 * Check all processes for an error
543 *
544  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
545  $ -1, 0 )
546 *
547  IF( ierr( 1 ).GT.0 ) THEN
548  IF( iam.EQ.0 )
549  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
550  kskip = kskip + 1
551  GO TO 30
552  END IF
553 *
554  IF( check ) THEN
555  CALL pcfillpad( ictxt, lwork, 1,
556  $ mem( ipw-iprepad ), lwork,
557  $ iprepad, ipostpad, padval )
558  CALL pcfillpad( ictxt, lw2-ipostpad, 1,
559  $ mem( ipw2-iprepad ),
560  $ lw2-ipostpad, iprepad,
561  $ ipostpad, padval )
562  END IF
563 *
564 * Compute condition number of the matrix
565 *
566  CALL pcgecon( '1', n, mem( ipa ), 1, 1, desca,
567  $ anorm1, rcond, mem( ipw ), lwork,
568  $ mem( ipw2 ), lrwork, info )
569 *
570  IF( check ) THEN
571  CALL pcchekpad( ictxt, 'PCGECON', np, nq,
572  $ mem( ipa-iprepad ),
573  $ desca( lld_ ), iprepad,
574  $ ipostpad, padval )
575  CALL pcchekpad( ictxt, 'PCGECON', lwork, 1,
576  $ mem( ipw-iprepad ), lwork,
577  $ iprepad, ipostpad, padval )
578  CALL pcchekpad( ictxt, 'PCGECON',
579  $ lw2-ipostpad, 1,
580  $ mem( ipw2-iprepad ),
581  $ lw2-ipostpad, iprepad,
582  $ ipostpad, padval )
583  END IF
584  END IF
585 *
586 * Loop over the different values for NRHS
587 *
588  DO 20 hh = 1, nnr
589 *
590  nrhs = nrval( hh )
591 *
592  DO 10 kk = 1, nnbr
593 *
594  nbrhs = nbrval( kk )
595 *
596 * Initialize Array Descriptor for rhs
597 *
598  CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
599  $ ictxt, max( 1, np )+imidpad,
600  $ ierr( 1 ) )
601 *
602 * Check all processes for an error
603 *
604  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
605  $ -1, 0 )
606 *
607  IF( ierr( 1 ).LT.0 ) THEN
608  IF( iam.EQ.0 )
609  $ WRITE( nout, fmt = 9997 ) 'descriptor'
610  kskip = kskip + 1
611  GO TO 10
612  END IF
613 *
614 * move IPW to allow room for RHS
615 *
616  myrhs = numroc( descb( n_ ), descb( nb_ ),
617  $ mycol, descb( csrc_ ), npcol )
618  ipb = ipw
619 *
620  IF( est ) THEN
621  ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
622  $ iprepad
623  ipferr = ipb0 + descb( lld_ )*myrhs +
624  $ ipostpad + iprepad
625  ipberr = myrhs + ipferr + ipostpad + iprepad
626  ipw = myrhs + ipberr + ipostpad + iprepad
627  ELSE
628  ipw = ipb + descb( lld_ )*myrhs + ipostpad +
629  $ iprepad
630  END IF
631 *
632 * Set worksiz: routines requiring most workspace
633 * is PCLASCHK
634 *
635  IF( check ) THEN
636  lcm = ilcm( nprow, npcol )
637  lcmq = lcm / npcol
638  worksiz = max( worksiz-ipostpad,
639  $ nq * nbrhs + np * nbrhs +
640  $ max( max( nq*nb, 2*nbrhs ),
641  $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
642  $ 0,0,lcmq ) ) )
643  worksiz = ipostpad + worksiz
644  ELSE
645  worksiz = ipostpad
646  END IF
647 *
648  ierr( 1 ) = 0
649  IF( ipw+worksiz.GT.memsiz ) THEN
650  IF( iam.EQ.0 )
651  $ WRITE( nout, fmt = 9996 )'solve',
652  $ ( ipw+worksiz )*cplxsz
653  ierr( 1 ) = 1
654  END IF
655 *
656 * Check all processes for an error
657 *
658  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
659  $ -1, 0 )
660 *
661  IF( ierr( 1 ).GT.0 ) THEN
662  IF( iam.EQ.0 )
663  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
664  kskip = kskip + 1
665  GO TO 10
666  END IF
667 *
668 * Generate RHS
669 *
670  CALL pcmatgen( ictxt, 'No', 'No', descb( m_ ),
671  $ descb( n_ ), descb( mb_ ),
672  $ descb( nb_ ), mem( ipb ),
673  $ descb( lld_ ), descb( rsrc_ ),
674  $ descb( csrc_ ), ibseed, 0, np, 0,
675  $ myrhs, myrow, mycol, nprow,
676  $ npcol )
677 *
678  IF( check )
679  $ CALL pcfillpad( ictxt, np, myrhs,
680  $ mem( ipb-iprepad ),
681  $ descb( lld_ ), iprepad,
682  $ ipostpad, padval )
683 *
684  IF( est ) THEN
685  CALL pcmatgen( ictxt, 'No', 'No',
686  $ descb( m_ ), descb( n_ ),
687  $ descb( mb_ ), descb( nb_ ),
688  $ mem( ipb0 ), descb( lld_ ),
689  $ descb( rsrc_ ),
690  $ descb( csrc_ ), ibseed, 0, np,
691  $ 0, myrhs, myrow, mycol, nprow,
692  $ npcol )
693  IF( check ) THEN
694  CALL pcfillpad( ictxt, np, myrhs,
695  $ mem( ipb0-iprepad ),
696  $ descb( lld_ ), iprepad,
697  $ ipostpad, padval )
698  CALL pcfillpad( ictxt, 1, myrhs,
699  $ mem( ipferr-iprepad ), 1,
700  $ iprepad, ipostpad,
701  $ padval )
702  CALL pcfillpad( ictxt, 1, myrhs,
703  $ mem( ipberr-iprepad ), 1,
704  $ iprepad, ipostpad,
705  $ padval )
706  END IF
707  END IF
708 *
709  CALL blacs_barrier( ictxt, 'All' )
710  CALL sltimer( 2 )
711 *
712 * Solve linear sytem via LU factorization
713 *
714  CALL pcgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
715  $ desca, mem( ippiv ), mem( ipb ),
716  $ 1, 1, descb, info )
717 *
718  CALL sltimer( 2 )
719 *
720  IF( check ) THEN
721 *
722 * check for memory overwrite
723 *
724  CALL pcchekpad( ictxt, 'PCGETRS', np, nq,
725  $ mem( ipa-iprepad ),
726  $ desca( lld_ ), iprepad,
727  $ ipostpad, padval )
728  CALL pcchekpad( ictxt, 'PCGETRS', lipiv, 1,
729  $ mem( ippiv-iprepad ), lipiv,
730  $ iprepad, ipostpad, padval )
731  CALL pcchekpad( ictxt, 'PCGETRS', np,
732  $ myrhs, mem( ipb-iprepad ),
733  $ descb( lld_ ), iprepad,
734  $ ipostpad, padval )
735 *
736  CALL pcfillpad( ictxt, worksiz-ipostpad,
737  $ 1, mem( ipw-iprepad ),
738  $ worksiz-ipostpad, iprepad,
739  $ ipostpad, padval )
740 *
741 * check the solution to rhs
742 *
743  CALL pclaschk( 'No', 'N', n, nrhs,
744  $ mem( ipb ), 1, 1, descb,
745  $ iaseed, 1, 1, desca, ibseed,
746  $ anorm, sresid, mem( ipw ) )
747 *
748  IF( iam.EQ.0 .AND. sresid.GT.thresh )
749  $ WRITE( nout, fmt = 9985 ) sresid
750 *
751 * check for memory overwrite
752 *
753  CALL pcchekpad( ictxt, 'PCLASCHK', np,
754  $ myrhs, mem( ipb-iprepad ),
755  $ descb( lld_ ), iprepad,
756  $ ipostpad, padval )
757  CALL pcchekpad( ictxt, 'PCLASCHK',
758  $ worksiz-ipostpad, 1,
759  $ mem( ipw-iprepad ),
760  $ worksiz-ipostpad,
761  $ iprepad, ipostpad, padval )
762 *
763 * The second test is a NaN trap
764 *
765  IF( sresid.LE.thresh .AND.
766  $ ( sresid-sresid ).EQ.0.0e+0 ) THEN
767  kpass = kpass + 1
768  passed = 'PASSED'
769  ELSE
770  kfail = kfail + 1
771  passed = 'FAILED'
772  END IF
773  ELSE
774  kpass = kpass + 1
775  sresid = sresid - sresid
776  passed = 'BYPASS'
777  END IF
778 *
779  IF( est ) THEN
780 *
781 * Calculate workspace required for PCGERFS
782 *
783  lwork = max( 1, 2*np )
784  ipw2 = ipw + lwork + ipostpad + iprepad
785  lrwork = max( 1, np )
786  lw2 = iceil( lrwork*realsz, cplxsz ) +
787  $ ipostpad
788 *
789  ierr( 1 ) = 0
790  IF( ipw2+lw2.GT.memsiz ) THEN
791  IF( iam.EQ.0 )
792  $ WRITE( nout, fmt = 9996 )
793  $ 'iter ref', ( ipw2+lw2 )*cplxsz
794  ierr( 1 ) = 1
795  END IF
796 *
797 * Check all processes for an error
798 *
799  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
800  $ ierr, 1, -1, 0 )
801 *
802  IF( ierr( 1 ).GT.0 ) THEN
803  IF( iam.EQ.0 )
804  $ WRITE( nout, fmt = 9997 )
805  $ 'MEMORY'
806  kskip = kskip + 1
807  GO TO 10
808  END IF
809 *
810  IF( check ) THEN
811  CALL pcfillpad( ictxt, lwork, 1,
812  $ mem( ipw-iprepad ),
813  $ lwork, iprepad, ipostpad,
814  $ padval )
815  CALL pcfillpad( ictxt, lw2-ipostpad, 1,
816  $ mem( ipw2-iprepad ),
817  $ lw2-ipostpad, iprepad,
818  $ ipostpad, padval )
819  END IF
820 *
821 * Use iterative refinement to improve the
822 * computed solution
823 *
824  CALL pcgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
825  $ 1, desca, mem( ipa ), 1, 1,
826  $ desca, mem( ippiv ),
827  $ mem( ipb0 ), 1, 1, descb,
828  $ mem( ipb ), 1, 1, descb,
829  $ mem( ipferr ), mem( ipberr ),
830  $ mem( ipw ), lwork, mem( ipw2 ),
831  $ lrwork, info )
832 *
833  IF( check ) THEN
834  CALL pcchekpad( ictxt, 'PCGERFS', np,
835  $ nq, mem( ipa0-iprepad ),
836  $ desca( lld_ ), iprepad,
837  $ ipostpad, padval )
838  CALL pcchekpad( ictxt, 'PCGERFS', np,
839  $ nq, mem( ipa-iprepad ),
840  $ desca( lld_ ), iprepad,
841  $ ipostpad, padval )
842  CALL pcchekpad( ictxt, 'PCGERFS', lipiv,
843  $ 1, mem( ippiv-iprepad ),
844  $ lipiv, iprepad,
845  $ ipostpad, padval )
846  CALL pcchekpad( ictxt, 'PCGERFS', np,
847  $ myrhs, mem( ipb-iprepad ),
848  $ descb( lld_ ), iprepad,
849  $ ipostpad, padval )
850  CALL pcchekpad( ictxt, 'PCGERFS', np,
851  $ myrhs,
852  $ mem( ipb0-iprepad ),
853  $ descb( lld_ ), iprepad,
854  $ ipostpad, padval )
855  CALL pcchekpad( ictxt, 'PCGERFS', 1,
856  $ myrhs,
857  $ mem( ipferr-iprepad ), 1,
858  $ iprepad, ipostpad,
859  $ padval )
860  CALL pcchekpad( ictxt, 'PCGERFS', 1,
861  $ myrhs,
862  $ mem( ipberr-iprepad ), 1,
863  $ iprepad, ipostpad,
864  $ padval )
865  CALL pcchekpad( ictxt, 'PCGERFS', lwork,
866  $ 1, mem( ipw-iprepad ),
867  $ lwork, iprepad, ipostpad,
868  $ padval )
869  CALL pcchekpad( ictxt, 'PCGERFS',
870  $ lw2-ipostpad, 1,
871  $ mem( ipw2-iprepad ),
872  $ lw2-ipostpad, iprepad,
873  $ ipostpad, padval )
874 *
875  CALL pcfillpad( ictxt, worksiz-ipostpad,
876  $ 1, mem( ipw-iprepad ),
877  $ worksiz-ipostpad, iprepad,
878  $ ipostpad, padval )
879 *
880 * check the solution to rhs
881 *
882  CALL pclaschk( 'No', 'N', n, nrhs,
883  $ mem( ipb ), 1, 1, descb,
884  $ iaseed, 1, 1, desca,
885  $ ibseed, anorm, sresid2,
886  $ mem( ipw ) )
887 *
888  IF( iam.EQ.0 .AND. sresid2.GT.thresh )
889  $ WRITE( nout, fmt = 9985 ) sresid2
890 *
891 * check for memory overwrite
892 *
893  CALL pcchekpad( ictxt, 'PCLASCHK', np,
894  $ myrhs, mem( ipb-iprepad ),
895  $ descb( lld_ ), iprepad,
896  $ ipostpad, padval )
897  CALL pcchekpad( ictxt, 'PCLASCHK',
898  $ worksiz-ipostpad, 1,
899  $ mem( ipw-iprepad ),
900  $ worksiz-ipostpad, iprepad,
901  $ ipostpad, padval )
902  END IF
903  END IF
904 *
905 * Gather max. of all CPU and WALL clock timings
906 *
907  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
908  $ wtime )
909  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
910  $ ctime )
911 *
912 * Print results
913 *
914  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
915 *
916 * 8/3 N^3 - N^2 flops for LU factorization
917 *
918  nops = (8.0d+0/3.0d+0)*( dble(n)**3 ) -
919  $ dble(n)**2
920 *
921 * nrhs * 8 N^2 flops for LU solve.
922 *
923  nops = nops + 8.0d+0*(dble(n)**2)*dble(nrhs)
924 *
925 * Calculate total megaflops -- factorization
926 * and solve -- for WALL and CPU time, and print
927 * output
928 *
929 * Print WALL time if machine supports it
930 *
931  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
932  $ THEN
933  tmflops = nops /
934  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
935  ELSE
936  tmflops = 0.0d+0
937  END IF
938 *
939 * Print WALL time if supported
940 *
941  IF( wtime( 2 ).GE.0.0d+0 )
942  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
943  $ nb, nrhs, nbrhs, nprow, npcol,
944  $ wtime( 1 ), wtime( 2 ), tmflops,
945  $ passed
946 *
947 * Print CPU time if supported
948 *
949  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
950  $ THEN
951  tmflops = nops /
952  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
953  ELSE
954  tmflops = 0.0d+0
955  END IF
956 *
957  IF( ctime( 2 ).GE.0.0d+0 )
958  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
959  $ nb, nrhs, nbrhs, nprow, npcol,
960  $ ctime( 1 ), ctime( 2 ), tmflops,
961  $ passed
962  END IF
963  10 CONTINUE
964  20 CONTINUE
965 *
966  IF( check.AND.( sresid.GT.thresh ) ) THEN
967 *
968 * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
969 *
970  CALL pcgetrrv( m, n, mem( ipa ), 1, 1, desca,
971  $ mem( ippiv ), mem( ipw ) )
972  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
973  $ 1, desca, iaseed, anorm, fresid,
974  $ mem( ipw ) )
975 *
976 * Check for memory overwrite
977 *
978  CALL pcchekpad( ictxt, 'PCGETRRV', np, nq,
979  $ mem( ipa-iprepad ), desca( lld_ ),
980  $ iprepad, ipostpad, padval )
981  CALL pcchekpad( ictxt, 'PCGETRRV', lipiv,
982  $ 1, mem( ippiv-iprepad ), lipiv,
983  $ iprepad, ipostpad, padval )
984  CALL pcchekpad( ictxt, 'PCGETRRV',
985  $ worksiz-ipostpad, 1,
986  $ mem( ipw-iprepad ),
987  $ worksiz-ipostpad, iprepad,
988  $ ipostpad, padval )
989 *
990  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
991  $ WRITE( nout, fmt = 9986 ) fresid
992  END IF
993  END IF
994  30 CONTINUE
995  40 CONTINUE
996  CALL blacs_gridexit( ictxt )
997  50 CONTINUE
998 *
999 * Print ending messages and close output file
1000 *
1001  60 CONTINUE
1002  IF( iam.EQ.0 ) THEN
1003  ktests = kpass + kfail + kskip
1004  WRITE( nout, fmt = * )
1005  WRITE( nout, fmt = 9992 ) ktests
1006  IF( check ) THEN
1007  WRITE( nout, fmt = 9991 ) kpass
1008  WRITE( nout, fmt = 9989 ) kfail
1009  ELSE
1010  WRITE( nout, fmt = 9990 ) kpass
1011  END IF
1012  WRITE( nout, fmt = 9988 ) kskip
1013  WRITE( nout, fmt = * )
1014  WRITE( nout, fmt = * )
1015  WRITE( nout, fmt = 9987 )
1016  IF( nout.NE.6 .AND. nout.NE.0 )
1017  $ CLOSE( nout )
1018  END IF
1019 *
1020  CALL blacs_exit( 0 )
1021 *
1022  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1023  $ '; It should be at least 1' )
1024  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1025  $ i4 )
1026  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1027  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1028  $ i11 )
1029  9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1030  $ 'Sol Time MFLOPS CHECK' )
1031  9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1032  $ '-------- -------- ------' )
1033  9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1034  $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1035  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1036  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1037  9990 FORMAT( i5, ' tests completed without checking.' )
1038  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1039  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1040  9987 FORMAT( 'END OF TESTS.' )
1041  9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1042  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1043 *
1044  stop
1045 *
1046 * End of PCLUDRIVER
1047 *
1048  END
pclafchk
subroutine pclafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pclafchk.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pcgerfs
subroutine pcgerfs(TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pcgerfs.f:5
pcgetrf
subroutine pcgetrf(M, N, A, IA, JA, DESCA, IPIV, INFO)
Definition: pcgetrf.f:2
pcluinfo
subroutine pcluinfo(SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, EST, WORK, IAM, NPROCS)
Definition: pcluinfo.f:5
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pcgetrs
subroutine pcgetrs(TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO)
Definition: pcgetrs.f:3
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
pcgetrrv
subroutine pcgetrrv(M, N, A, IA, JA, DESCA, IPIV, WORK)
Definition: pcgetrrv.f:2
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pclaschk
subroutine pclaschk(SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK)
Definition: pclaschk.f:4
pcludriver
program pcludriver
Definition: pcludriver.f:1
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pcgecon
subroutine pcgecon(NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pcgecon.f:3
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2