ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzludriver.f
Go to the documentation of this file.
1  PROGRAM pzludriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * Purpose
9 * ========
10 *
11 * PZLUDRIVER is the main test program for the COMPLEX*16
12 * SCALAPACK LU routines. This test driver performs an LU factorization
13 * and solve. If the input matrix is non-square, only the factorization
14 * is performed. Condition estimation and iterative refinement are
15 * optionally performed.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 18 lines:
20 * 'SCALAPACK, Version 2.0, LU factorization input file'
21 * 'Intel iPSC/860 hypercube, gamma model.'
22 * 'LU.out' output file name (if any)
23 * 6 device out
24 * 1 number of problems sizes
25 * 31 201 values of M
26 * 31 201 values of N
27 * 1 number of NB's
28 * 2 values of NB
29 * 1 number of NRHS's
30 * 1 values of NRHS
31 * 1 number of NBRHS's
32 * 1 values of NBRHS
33 * 1 number of process grids (ordered pairs of P & Q)
34 * 2 1 4 2 3 8 values of P
35 * 2 4 1 3 2 1 values of Q
36 * 1.0 threshold
37 * T (T or F) Test Cond. Est. and Iter. Ref. Routines
38 *
39 *
40 * Internal Parameters
41 * ===================
42 *
43 * TOTMEM INTEGER, default = 2000000
44 * TOTMEM is a machine-specific parameter indicating the
45 * maximum amount of available memory in bytes.
46 * The user should customize TOTMEM to his platform. Remember
47 * to leave room in memory for the operating system, the BLACS
48 * buffer, etc. For example, on a system with 8 MB of memory
49 * per process (e.g., one processor on an Intel iPSC/860), the
50 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51 * code, BLACS buffer, etc). However, for PVM, we usually set
52 * TOTMEM = 2000000. Some experimenting with the maximum value
53 * of TOTMEM may be required.
54 *
55 * INTGSZ INTEGER, default = 4 bytes.
56 * ZPLXSZ INTEGER, default = 16 bytes.
57 * INTGSZ and ZPLXSZ indicate the length in bytes on the
58 * given platform for an integer and a double precision
59 * complex.
60 * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ )
61 *
62 * All arrays used by SCALAPACK routines are allocated from
63 * this array and referenced by pointers. The integer IPA,
64 * for example, is a pointer to the starting element of MEM for
65 * the matrix A.
66 *
67 * =====================================================================
68 *
69 * .. Parameters ..
70  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
71  $ lld_, mb_, m_, nb_, n_, rsrc_
72  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
73  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
74  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
75  INTEGER intgsz, dblesz, memsiz, ntests, totmem, zplxsz
76  DOUBLE PRECISION zero
77  COMPLEX*16 padval
78  parameter( intgsz = 4, dblesz = 8, totmem = 8000000,
79  $ zplxsz = 16, memsiz = totmem / zplxsz,
80  $ ntests = 20,
81  $ padval = ( -9923.0d+0, -9923.0d+0 ),
82  $ zero = 0.0d+0 )
83 * ..
84 * .. Local Scalars ..
85  LOGICAL check, est
86  CHARACTER*6 passed
87  CHARACTER*80 outfile
88  INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
89  $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
90  $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
91  $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
92  $ lipiv, lrwork, lwork, lw2, m, maxmn,
93  $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
94  $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
95  $ nprocs, nprow, nq, nrhs, worksiz
96  REAL thresh
97  DOUBLE PRECISION anorm, anorm1, fresid, nops, rcond,
98  $ sresid, sresid2, tmflops
99 * ..
100 * .. Local Arrays ..
101  INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
102  $ mval( ntests ), nbrval( ntests ),
103  $ nbval( ntests ), nrval( ntests ),
104  $ nval( ntests ), pval( ntests ),
105  $ qval( ntests )
106  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
107  COMPLEX*16 mem( memsiz )
108 * ..
109 * .. External Subroutines ..
110  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
111  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
112  $ blacs_pinfo, descinit, igsum2d, pzchekpad,
117 * ..
118 * .. External Functions ..
119  INTEGER iceil, ilcm, numroc
120  DOUBLE PRECISION pzlange
121  EXTERNAL iceil, ilcm, numroc, pzlange
122 * ..
123 * .. Intrinsic Functions ..
124  INTRINSIC dble, max, min
125 * ..
126 * .. Data Statements ..
127  DATA kfail, kpass, kskip, ktests / 4*0 /
128 * ..
129 * .. Executable Statements ..
130 *
131 * Get starting information
132 *
133  CALL blacs_pinfo( iam, nprocs )
134  iaseed = 100
135  ibseed = 200
136  CALL pzluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
137  $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
138  $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
139  $ est, mem, iam, nprocs )
140  check = ( thresh.GE.0.0e+0 )
141 *
142 * Print headings
143 *
144  IF( iam.EQ.0 ) THEN
145  WRITE( nout, fmt = * )
146  WRITE( nout, fmt = 9995 )
147  WRITE( nout, fmt = 9994 )
148  WRITE( nout, fmt = * )
149  END IF
150 *
151 * Loop over different process grids
152 *
153  DO 50 i = 1, ngrids
154 *
155  nprow = pval( i )
156  npcol = qval( i )
157 *
158 * Make sure grid information is correct
159 *
160  ierr( 1 ) = 0
161  IF( nprow.LT.1 ) THEN
162  IF( iam.EQ.0 )
163  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
164  ierr( 1 ) = 1
165  ELSE IF( npcol.LT.1 ) THEN
166  IF( iam.EQ.0 )
167  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
168  ierr( 1 ) = 1
169  ELSE IF( nprow*npcol.GT.nprocs ) THEN
170  IF( iam.EQ.0 )
171  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
172  ierr( 1 ) = 1
173  END IF
174 *
175  IF( ierr( 1 ).GT.0 ) THEN
176  IF( iam.EQ.0 )
177  $ WRITE( nout, fmt = 9997 ) 'grid'
178  kskip = kskip + 1
179  GO TO 50
180  END IF
181 *
182 * Define process grid
183 *
184  CALL blacs_get( -1, 0, ictxt )
185  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
186  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187 *
188 * Go to bottom of process grid loop if this case doesn't use my
189 * process
190 *
191  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
192  $ GO TO 50
193 *
194  DO 40 j = 1, nmat
195 *
196  m = mval( j )
197  n = nval( j )
198 *
199 * Make sure matrix information is correct
200 *
201  ierr( 1 ) = 0
202  IF( m.LT.1 ) THEN
203  IF( iam.EQ.0 )
204  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
205  ierr( 1 ) = 1
206  ELSE IF( n.LT.1 ) THEN
207  IF( iam.EQ.0 )
208  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
209  ierr( 1 ) = 1
210  END IF
211 *
212 * Check all processes for an error
213 *
214  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
215 *
216  IF( ierr( 1 ).GT.0 ) THEN
217  IF( iam.EQ.0 )
218  $ WRITE( nout, fmt = 9997 ) 'matrix'
219  kskip = kskip + 1
220  GO TO 40
221  END IF
222 *
223  DO 30 k = 1, nnb
224 *
225  nb = nbval( k )
226 *
227 * Make sure nb is legal
228 *
229  ierr( 1 ) = 0
230  IF( nb.LT.1 ) THEN
231  ierr( 1 ) = 1
232  IF( iam.EQ.0 )
233  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
234  END IF
235 *
236 * Check all processes for an error
237 *
238  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
239 *
240  IF( ierr( 1 ).GT.0 ) THEN
241  IF( iam.EQ.0 )
242  $ WRITE( nout, fmt = 9997 ) 'NB'
243  kskip = kskip + 1
244  GO TO 30
245  END IF
246 *
247 * Padding constants
248 *
249  mp = numroc( m, nb, myrow, 0, nprow )
250  np = numroc( n, nb, myrow, 0, nprow )
251  nq = numroc( n, nb, mycol, 0, npcol )
252  IF( check ) THEN
253  iprepad = max( nb, mp )
254  imidpad = nb
255  ipostpad = max( nb, nq )
256  ELSE
257  iprepad = 0
258  imidpad = 0
259  ipostpad = 0
260  END IF
261 *
262 * Initialize the array descriptor for the matrix A
263 *
264  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
265  $ max( 1, mp )+imidpad, ierr( 1 ) )
266 *
267 * Check all processes for an error
268 *
269  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
270 *
271  IF( ierr( 1 ).LT.0 ) THEN
272  IF( iam.EQ.0 )
273  $ WRITE( nout, fmt = 9997 ) 'descriptor'
274  kskip = kskip + 1
275  GO TO 30
276  END IF
277 *
278 * Assign pointers into MEM for SCALAPACK arrays, A is
279 * allocated starting at position MEM( IPREPAD+1 )
280 *
281  ipa = iprepad+1
282  IF( est .AND. m.EQ.n ) THEN
283  ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
284  ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
285  ELSE
286  ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
287  END IF
288  lipiv = iceil( intgsz*( mp+nb ), zplxsz )
289  ipw = ippiv + lipiv + ipostpad + iprepad
290 *
291  IF( check ) THEN
292 *
293 * Calculate the amount of workspace required by the
294 * checking routines PZLANGE, PZGETRRV, and
295 * PZLAFCHK
296 *
297  worksiz = max( 2, nq )
298 *
299  worksiz = max( worksiz, mp*desca( nb_ )+
300  $ nq*desca( mb_ ) )
301 *
302  worksiz = max( worksiz, mp * desca( nb_ ) )
303 *
304  worksiz = worksiz + ipostpad
305 *
306  ELSE
307 *
308  worksiz = ipostpad
309 *
310  END IF
311 *
312 * Check for adequate memory for problem size
313 *
314  ierr( 1 ) = 0
315  IF( ipw+worksiz.GT.memsiz ) THEN
316  IF( iam.EQ.0 )
317  $ WRITE( nout, fmt = 9996 ) 'factorization',
318  $ ( ipw+worksiz )*zplxsz
319  ierr( 1 ) = 1
320  END IF
321 *
322 * Check all processes for an error
323 *
324  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
325 *
326  IF( ierr( 1 ).GT.0 ) THEN
327  IF( iam.EQ.0 )
328  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
329  kskip = kskip + 1
330  GO TO 30
331  END IF
332 *
333 * Generate matrix A of Ax = b
334 *
335  CALL pzmatgen( ictxt, 'No transpose', 'No transpose',
336  $ desca( m_ ), desca( n_ ), desca( mb_ ),
337  $ desca( nb_ ), mem( ipa ), desca( lld_ ),
338  $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
339  $ mp, 0, nq, myrow, mycol, nprow, npcol )
340 *
341 * Calculate inf-norm of A for residual error-checking
342 *
343  IF( check ) THEN
344  CALL pzfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
345  $ desca( lld_ ), iprepad, ipostpad,
346  $ padval )
347  CALL pzfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
348  $ lipiv, iprepad, ipostpad, padval )
349  CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
350  $ mem( ipw-iprepad ), worksiz-ipostpad,
351  $ iprepad, ipostpad, padval )
352  anorm = pzlange( 'I', m, n, mem( ipa ), 1, 1, desca,
353  $ mem( ipw ) )
354  anorm1 = pzlange( '1', m, n, mem( ipa ), 1, 1, desca,
355  $ mem( ipw ) )
356  CALL pzchekpad( ictxt, 'PZLANGE', mp, nq,
357  $ mem( ipa-iprepad ), desca( lld_ ),
358  $ iprepad, ipostpad, padval )
359  CALL pzchekpad( ictxt, 'PZLANGE', worksiz-ipostpad,
360  $ 1, mem( ipw-iprepad ),
361  $ worksiz-ipostpad, iprepad, ipostpad,
362  $ padval )
363  END IF
364 *
365  IF( est .AND. m.EQ.n ) THEN
366  CALL pzmatgen( ictxt, 'No transpose', 'No transpose',
367  $ desca( m_ ), desca( n_ ), desca( mb_ ),
368  $ desca( nb_ ), mem( ipa0 ),
369  $ desca( lld_ ), desca( rsrc_ ),
370  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
371  $ myrow, mycol, nprow, npcol )
372  IF( check )
373  $ CALL pzfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
374  $ desca( lld_ ), iprepad, ipostpad,
375  $ padval )
376  END IF
377 *
378  CALL slboot()
379  CALL blacs_barrier( ictxt, 'All' )
380  CALL sltimer( 1 )
381 *
382 * Perform LU factorization
383 *
384  CALL pzgetrf( m, n, mem( ipa ), 1, 1, desca,
385  $ mem( ippiv ), info )
386 *
387  CALL sltimer( 1 )
388 *
389  IF( info.NE.0 ) THEN
390  IF( iam.EQ.0 )
391  $ WRITE( nout, fmt = * ) 'PZGETRF INFO=', info
392  kfail = kfail + 1
393  rcond = zero
394  GO TO 30
395  END IF
396 *
397  IF( check ) THEN
398 *
399 * Check for memory overwrite in LU factorization
400 *
401  CALL pzchekpad( ictxt, 'PZGETRF', mp, nq,
402  $ mem( ipa-iprepad ), desca( lld_ ),
403  $ iprepad, ipostpad, padval )
404  CALL pzchekpad( ictxt, 'PZGETRF', lipiv, 1,
405  $ mem( ippiv-iprepad ), lipiv, iprepad,
406  $ ipostpad, padval )
407  END IF
408 *
409  IF( m.NE.n ) THEN
410 *
411 * For non-square matrices, factorization only
412 *
413  nrhs = 0
414  nbrhs = 0
415 *
416  IF( check ) THEN
417 *
418 * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
419 *
420  CALL pzgetrrv( m, n, mem( ipa ), 1, 1, desca,
421  $ mem( ippiv ), mem( ipw ) )
422  CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
423  $ desca, iaseed, anorm, fresid,
424  $ mem( ipw ) )
425 *
426 * Check for memory overwrite
427 *
428  CALL pzchekpad( ictxt, 'PZGETRRV', mp, nq,
429  $ mem( ipa-iprepad ), desca( lld_ ),
430  $ iprepad, ipostpad, padval )
431  CALL pzchekpad( ictxt, 'PZGETRRV', lipiv, 1,
432  $ mem( ippiv-iprepad ), lipiv,
433  $ iprepad, ipostpad, padval )
434  CALL pzchekpad( ictxt, 'PZGETRRV',
435  $ worksiz-ipostpad, 1,
436  $ mem( ipw-iprepad ),
437  $ worksiz-ipostpad, iprepad,
438  $ ipostpad, padval )
439 *
440 * Test residual and detect NaN result
441 *
442  IF( ( fresid.LE.thresh ) .AND.
443  $ ( (fresid-fresid).EQ.0.0d+0 ) ) THEN
444  kpass = kpass + 1
445  passed = 'PASSED'
446  ELSE
447  kfail = kfail + 1
448  passed = 'FAILED'
449  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
450  $ WRITE( nout, fmt = 9986 ) fresid
451  END IF
452 *
453  ELSE
454 *
455 * Don't perform the checking, only timing
456 *
457  kpass = kpass + 1
458  fresid = fresid - fresid
459  passed = 'BYPASS'
460 *
461  END IF
462 *
463 * Gather maximum of all CPU and WALL clock timings
464 *
465  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
466  $ wtime )
467  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
468  $ ctime )
469 *
470 * Print results
471 *
472  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
473 *
474  maxmn = max( m, n )
475  minmn = min( m, n )
476 *
477 * 4 M N^2 - 4/3 N^3 + 2 M N - 3 N^2 flops for LU
478 * factorization M >= N
479 *
480  nops = 4.0d+0*dble(maxmn)*(dble(minmn)**2) -
481  $ (4.0d+0 / 3.0d+0)*( dble( minmn )**3 ) +
482  $ (2.0d+0)*dble( maxmn )*dble( minmn ) -
483  $ (3.0d+0)*( dble( minmn )**2 )
484 *
485 * Calculate total megaflops -- factorization only,
486 * -- for WALL and CPU time, and print output
487 *
488 * Print WALL time if machine supports it
489 *
490  IF( wtime( 1 ).GT.0.0d+0 ) THEN
491  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
492  ELSE
493  tmflops = 0.0d+0
494  END IF
495 *
496  wtime( 2 ) = 0.0d+0
497  IF( wtime( 1 ).GE.0.0d+0 )
498  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
499  $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
500  $ wtime( 2 ), tmflops, passed
501 *
502 * Print CPU time if machine supports it
503 *
504  IF( ctime( 1 ).GT.0.0d+0 ) THEN
505  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
506  ELSE
507  tmflops = 0.0d+0
508  END IF
509 *
510  ctime( 2 ) = 0.0d+0
511  IF( ctime( 1 ).GE.0.0d+0 )
512  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
513  $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
514  $ ctime( 2 ), tmflops, passed
515  END IF
516 *
517  ELSE
518 *
519 * If M = N
520 *
521  IF( est ) THEN
522 *
523 * Calculate workspace required for PZGECON
524 *
525  lwork = max( 1, 2*np ) +
526  $ max( 2, desca( nb_ )*
527  $ max( 1, iceil( nprow-1, npcol ) ),
528  $ nq + desca( nb_ )*
529  $ max( 1, iceil( npcol-1, nprow ) ) )
530  ipw2 = ipw + lwork + ipostpad + iprepad
531  lrwork = max( 1, 2*nq )
532  lw2 = iceil( lrwork*dblesz, zplxsz ) + ipostpad
533 *
534  ierr( 1 ) = 0
535  IF( ipw2+lw2.GT.memsiz ) THEN
536  IF( iam.EQ.0 )
537  $ WRITE( nout, fmt = 9996 )'cond est',
538  $ ( ipw2+lw2 )*zplxsz
539  ierr( 1 ) = 1
540  END IF
541 *
542 * Check all processes for an error
543 *
544  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
545  $ -1, 0 )
546 *
547  IF( ierr( 1 ).GT.0 ) THEN
548  IF( iam.EQ.0 )
549  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
550  kskip = kskip + 1
551  GO TO 30
552  END IF
553 *
554  IF( check ) THEN
555  CALL pzfillpad( ictxt, lwork, 1,
556  $ mem( ipw-iprepad ), lwork,
557  $ iprepad, ipostpad, padval )
558  CALL pzfillpad( ictxt, lw2-ipostpad, 1,
559  $ mem( ipw2-iprepad ),
560  $ lw2-ipostpad, iprepad,
561  $ ipostpad, padval )
562  END IF
563 *
564 * Compute condition number of the matrix
565 *
566  CALL pzgecon( '1', n, mem( ipa ), 1, 1, desca,
567  $ anorm1, rcond, mem( ipw ), lwork,
568  $ mem( ipw2 ), lrwork, info )
569 *
570  IF( check ) THEN
571  CALL pzchekpad( ictxt, 'PZGECON', np, nq,
572  $ mem( ipa-iprepad ),
573  $ desca( lld_ ), iprepad,
574  $ ipostpad, padval )
575  CALL pzchekpad( ictxt, 'PZGECON', lwork, 1,
576  $ mem( ipw-iprepad ), lwork,
577  $ iprepad, ipostpad, padval )
578  CALL pzchekpad( ictxt, 'PZGECON',
579  $ lw2-ipostpad, 1,
580  $ mem( ipw2-iprepad ),
581  $ lw2-ipostpad, iprepad,
582  $ ipostpad, padval )
583  END IF
584  END IF
585 *
586 * Loop over the different values for NRHS
587 *
588  DO 20 hh = 1, nnr
589 *
590  nrhs = nrval( hh )
591 *
592  DO 10 kk = 1, nnbr
593 *
594  nbrhs = nbrval( kk )
595 *
596 * Initialize Array Descriptor for rhs
597 *
598  CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
599  $ ictxt, max( 1, np )+imidpad,
600  $ ierr( 1 ) )
601 *
602 * Check all processes for an error
603 *
604  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
605  $ -1, 0 )
606 *
607  IF( ierr( 1 ).LT.0 ) THEN
608  IF( iam.EQ.0 )
609  $ WRITE( nout, fmt = 9997 ) 'descriptor'
610  kskip = kskip + 1
611  GO TO 10
612  END IF
613 *
614 * move IPW to allow room for RHS
615 *
616  myrhs = numroc( descb( n_ ), descb( nb_ ),
617  $ mycol, descb( csrc_ ), npcol )
618  ipb = ipw
619 *
620  IF( est ) THEN
621  ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
622  $ iprepad
623  ipferr = ipb0 + descb( lld_ )*myrhs +
624  $ ipostpad + iprepad
625  ipberr = myrhs + ipferr + ipostpad + iprepad
626  ipw = myrhs + ipberr + ipostpad + iprepad
627  ELSE
628  ipw = ipb + descb( lld_ )*myrhs + ipostpad +
629  $ iprepad
630  END IF
631 *
632 * Set worksiz: routines requiring most workspace
633 * is PZLASCHK
634 *
635  IF( check ) THEN
636  lcm = ilcm( nprow, npcol )
637  lcmq = lcm / npcol
638  worksiz = max( worksiz-ipostpad,
639  $ nq * nbrhs + np * nbrhs +
640  $ max( max( nq*nb, 2*nbrhs ),
641  $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
642  $ 0,0,lcmq ) ) )
643  worksiz = ipostpad + worksiz
644  ELSE
645  worksiz = ipostpad
646  END IF
647 *
648  ierr( 1 ) = 0
649  IF( ipw+worksiz.GT.memsiz ) THEN
650  IF( iam.EQ.0 )
651  $ WRITE( nout, fmt = 9996 )'solve',
652  $ ( ipw+worksiz )*zplxsz
653  ierr( 1 ) = 1
654  END IF
655 *
656 * Check all processes for an error
657 *
658  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
659  $ -1, 0 )
660 *
661  IF( ierr( 1 ).GT.0 ) THEN
662  IF( iam.EQ.0 )
663  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
664  kskip = kskip + 1
665  GO TO 10
666  END IF
667 *
668 * Generate RHS
669 *
670  CALL pzmatgen( ictxt, 'No', 'No', descb( m_ ),
671  $ descb( n_ ), descb( mb_ ),
672  $ descb( nb_ ), mem( ipb ),
673  $ descb( lld_ ), descb( rsrc_ ),
674  $ descb( csrc_ ), ibseed, 0, np, 0,
675  $ myrhs, myrow, mycol, nprow,
676  $ npcol )
677 *
678  IF( check )
679  $ CALL pzfillpad( ictxt, np, myrhs,
680  $ mem( ipb-iprepad ),
681  $ descb( lld_ ), iprepad,
682  $ ipostpad, padval )
683 *
684  IF( est ) THEN
685  CALL pzmatgen( ictxt, 'No', 'No',
686  $ descb( m_ ), descb( n_ ),
687  $ descb( mb_ ), descb( nb_ ),
688  $ mem( ipb0 ), descb( lld_ ),
689  $ descb( rsrc_ ),
690  $ descb( csrc_ ), ibseed, 0, np,
691  $ 0, myrhs, myrow, mycol, nprow,
692  $ npcol )
693  IF( check ) THEN
694  CALL pzfillpad( ictxt, np, myrhs,
695  $ mem( ipb0-iprepad ),
696  $ descb( lld_ ), iprepad,
697  $ ipostpad, padval )
698  CALL pzfillpad( ictxt, 1, myrhs,
699  $ mem( ipferr-iprepad ), 1,
700  $ iprepad, ipostpad,
701  $ padval )
702  CALL pzfillpad( ictxt, 1, myrhs,
703  $ mem( ipberr-iprepad ), 1,
704  $ iprepad, ipostpad,
705  $ padval )
706  END IF
707  END IF
708 *
709  CALL blacs_barrier( ictxt, 'All' )
710  CALL sltimer( 2 )
711 *
712 * Solve linear sytem via LU factorization
713 *
714  CALL pzgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
715  $ desca, mem( ippiv ), mem( ipb ),
716  $ 1, 1, descb, info )
717 *
718  CALL sltimer( 2 )
719 *
720  IF( check ) THEN
721 *
722 * check for memory overwrite
723 *
724  CALL pzchekpad( ictxt, 'PZGETRS', np, nq,
725  $ mem( ipa-iprepad ),
726  $ desca( lld_ ), iprepad,
727  $ ipostpad, padval )
728  CALL pzchekpad( ictxt, 'PZGETRS', lipiv, 1,
729  $ mem( ippiv-iprepad ), lipiv,
730  $ iprepad, ipostpad, padval )
731  CALL pzchekpad( ictxt, 'PZGETRS', np,
732  $ myrhs, mem( ipb-iprepad ),
733  $ descb( lld_ ), iprepad,
734  $ ipostpad, padval )
735 *
736  CALL pzfillpad( ictxt, worksiz-ipostpad,
737  $ 1, mem( ipw-iprepad ),
738  $ worksiz-ipostpad, iprepad,
739  $ ipostpad, padval )
740 *
741 * check the solution to rhs
742 *
743  CALL pzlaschk( 'No', 'N', n, nrhs,
744  $ mem( ipb ), 1, 1, descb,
745  $ iaseed, 1, 1, desca, ibseed,
746  $ anorm, sresid, mem( ipw ) )
747 *
748  IF( iam.EQ.0 .AND. sresid.GT.thresh )
749  $ WRITE( nout, fmt = 9985 ) sresid
750 *
751 * check for memory overwrite
752 *
753  CALL pzchekpad( ictxt, 'PZLASCHK', np,
754  $ myrhs, mem( ipb-iprepad ),
755  $ descb( lld_ ), iprepad,
756  $ ipostpad, padval )
757  CALL pzchekpad( ictxt, 'PZLASCHK',
758  $ worksiz-ipostpad, 1,
759  $ mem( ipw-iprepad ),
760  $ worksiz-ipostpad,
761  $ iprepad, ipostpad, padval )
762 *
763 * The second test is a NaN trap
764 *
765  IF( sresid.LE.thresh .AND.
766  $ ( sresid-sresid ).EQ.0.0d+0 ) THEN
767  kpass = kpass + 1
768  passed = 'PASSED'
769  ELSE
770  kfail = kfail + 1
771  passed = 'FAILED'
772  END IF
773  ELSE
774  kpass = kpass + 1
775  sresid = sresid - sresid
776  passed = 'BYPASS'
777  END IF
778 *
779  IF( est ) THEN
780 *
781 * Calculate workspace required for PZGERFS
782 *
783  lwork = max( 1, 2*np )
784  ipw2 = ipw + lwork + ipostpad + iprepad
785  lrwork = max( 1, np )
786  lw2 = iceil( lrwork*dblesz, zplxsz ) +
787  $ ipostpad
788 *
789  ierr( 1 ) = 0
790  IF( ipw2+lw2.GT.memsiz ) THEN
791  IF( iam.EQ.0 )
792  $ WRITE( nout, fmt = 9996 )
793  $ 'iter ref', ( ipw2+lw2 )*zplxsz
794  ierr( 1 ) = 1
795  END IF
796 *
797 * Check all processes for an error
798 *
799  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
800  $ ierr, 1, -1, 0 )
801 *
802  IF( ierr( 1 ).GT.0 ) THEN
803  IF( iam.EQ.0 )
804  $ WRITE( nout, fmt = 9997 )
805  $ 'MEMORY'
806  kskip = kskip + 1
807  GO TO 10
808  END IF
809 *
810  IF( check ) THEN
811  CALL pzfillpad( ictxt, lwork, 1,
812  $ mem( ipw-iprepad ),
813  $ lwork, iprepad, ipostpad,
814  $ padval )
815  CALL pzfillpad( ictxt, lw2-ipostpad, 1,
816  $ mem( ipw2-iprepad ),
817  $ lw2-ipostpad, iprepad,
818  $ ipostpad, padval )
819  END IF
820 *
821 * Use iterative refinement to improve the
822 * computed solution
823 *
824  CALL pzgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
825  $ 1, desca, mem( ipa ), 1, 1,
826  $ desca, mem( ippiv ),
827  $ mem( ipb0 ), 1, 1, descb,
828  $ mem( ipb ), 1, 1, descb,
829  $ mem( ipferr ), mem( ipberr ),
830  $ mem( ipw ), lwork, mem( ipw2 ),
831  $ lrwork, info )
832 *
833  IF( check ) THEN
834  CALL pzchekpad( ictxt, 'PZGERFS', np,
835  $ nq, mem( ipa0-iprepad ),
836  $ desca( lld_ ), iprepad,
837  $ ipostpad, padval )
838  CALL pzchekpad( ictxt, 'PZGERFS', np,
839  $ nq, mem( ipa-iprepad ),
840  $ desca( lld_ ), iprepad,
841  $ ipostpad, padval )
842  CALL pzchekpad( ictxt, 'PZGERFS', lipiv,
843  $ 1, mem( ippiv-iprepad ),
844  $ lipiv, iprepad,
845  $ ipostpad, padval )
846  CALL pzchekpad( ictxt, 'PZGERFS', np,
847  $ myrhs, mem( ipb-iprepad ),
848  $ descb( lld_ ), iprepad,
849  $ ipostpad, padval )
850  CALL pzchekpad( ictxt, 'PZGERFS', np,
851  $ myrhs,
852  $ mem( ipb0-iprepad ),
853  $ descb( lld_ ), iprepad,
854  $ ipostpad, padval )
855  CALL pzchekpad( ictxt, 'PZGERFS', 1,
856  $ myrhs,
857  $ mem( ipferr-iprepad ), 1,
858  $ iprepad, ipostpad,
859  $ padval )
860  CALL pzchekpad( ictxt, 'PZGERFS', 1,
861  $ myrhs,
862  $ mem( ipberr-iprepad ), 1,
863  $ iprepad, ipostpad,
864  $ padval )
865  CALL pzchekpad( ictxt, 'PZGERFS', lwork,
866  $ 1, mem( ipw-iprepad ),
867  $ lwork, iprepad, ipostpad,
868  $ padval )
869  CALL pzchekpad( ictxt, 'PZGERFS',
870  $ lw2-ipostpad, 1,
871  $ mem( ipw2-iprepad ),
872  $ lw2-ipostpad, iprepad,
873  $ ipostpad, padval )
874 *
875  CALL pzfillpad( ictxt, worksiz-ipostpad,
876  $ 1, mem( ipw-iprepad ),
877  $ worksiz-ipostpad, iprepad,
878  $ ipostpad, padval )
879 *
880 * check the solution to rhs
881 *
882  CALL pzlaschk( 'No', 'N', n, nrhs,
883  $ mem( ipb ), 1, 1, descb,
884  $ iaseed, 1, 1, desca,
885  $ ibseed, anorm, sresid2,
886  $ mem( ipw ) )
887 *
888  IF( iam.EQ.0 .AND. sresid2.GT.thresh )
889  $ WRITE( nout, fmt = 9985 ) sresid2
890 *
891 * check for memory overwrite
892 *
893  CALL pzchekpad( ictxt, 'PZLASCHK', np,
894  $ myrhs, mem( ipb-iprepad ),
895  $ descb( lld_ ), iprepad,
896  $ ipostpad, padval )
897  CALL pzchekpad( ictxt, 'PZLASCHK',
898  $ worksiz-ipostpad, 1,
899  $ mem( ipw-iprepad ),
900  $ worksiz-ipostpad, iprepad,
901  $ ipostpad, padval )
902  END IF
903  END IF
904 *
905 * Gather max. of all CPU and WALL clock timings
906 *
907  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
908  $ wtime )
909  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
910  $ ctime )
911 *
912 * Print results
913 *
914  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
915 *
916 * 8/3 N^3 - N^2 flops for LU factorization
917 *
918  nops = (8.0d+0/3.0d+0)*( dble(n)**3 ) -
919  $ dble(n)**2
920 *
921 * nrhs * 8 N^2 flops for LU solve.
922 *
923  nops = nops + 8.0d+0*(dble(n)**2)*dble(nrhs)
924 *
925 * Calculate total megaflops -- factorization
926 * and solve -- for WALL and CPU time, and print
927 * output
928 *
929 * Print WALL time if machine supports it
930 *
931  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
932  $ THEN
933  tmflops = nops /
934  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
935  ELSE
936  tmflops = 0.0d+0
937  END IF
938 *
939 * Print WALL time if supported
940 *
941  IF( wtime( 2 ).GE.0.0d+0 )
942  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
943  $ nb, nrhs, nbrhs, nprow, npcol,
944  $ wtime( 1 ), wtime( 2 ), tmflops,
945  $ passed
946 *
947 * Print CPU time if supported
948 *
949  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
950  $ THEN
951  tmflops = nops /
952  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
953  ELSE
954  tmflops = 0.0d+0
955  END IF
956 *
957  IF( ctime( 2 ).GE.0.0d+0 )
958  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
959  $ nb, nrhs, nbrhs, nprow, npcol,
960  $ ctime( 1 ), ctime( 2 ), tmflops,
961  $ passed
962  END IF
963  10 CONTINUE
964  20 CONTINUE
965 *
966  IF( check.AND.( sresid.GT.thresh ) ) THEN
967 *
968 * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
969 *
970  CALL pzgetrrv( m, n, mem( ipa ), 1, 1, desca,
971  $ mem( ippiv ), mem( ipw ) )
972  CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
973  $ 1, desca, iaseed, anorm, fresid,
974  $ mem( ipw ) )
975 *
976 * Check for memory overwrite
977 *
978  CALL pzchekpad( ictxt, 'PZGETRRV', np, nq,
979  $ mem( ipa-iprepad ), desca( lld_ ),
980  $ iprepad, ipostpad, padval )
981  CALL pzchekpad( ictxt, 'PZGETRRV', lipiv,
982  $ 1, mem( ippiv-iprepad ), lipiv,
983  $ iprepad, ipostpad, padval )
984  CALL pzchekpad( ictxt, 'PZGETRRV',
985  $ worksiz-ipostpad, 1,
986  $ mem( ipw-iprepad ),
987  $ worksiz-ipostpad, iprepad,
988  $ ipostpad, padval )
989 *
990  IF( myrow.EQ.0 .AND. mycol.EQ.0 )
991  $ WRITE( nout, fmt = 9986 ) fresid
992  END IF
993  END IF
994  30 CONTINUE
995  40 CONTINUE
996  CALL blacs_gridexit( ictxt )
997  50 CONTINUE
998 *
999 * Print ending messages and close output file
1000 *
1001  60 CONTINUE
1002  IF( iam.EQ.0 ) THEN
1003  ktests = kpass + kfail + kskip
1004  WRITE( nout, fmt = * )
1005  WRITE( nout, fmt = 9992 ) ktests
1006  IF( check ) THEN
1007  WRITE( nout, fmt = 9991 ) kpass
1008  WRITE( nout, fmt = 9989 ) kfail
1009  ELSE
1010  WRITE( nout, fmt = 9990 ) kpass
1011  END IF
1012  WRITE( nout, fmt = 9988 ) kskip
1013  WRITE( nout, fmt = * )
1014  WRITE( nout, fmt = * )
1015  WRITE( nout, fmt = 9987 )
1016  IF( nout.NE.6 .AND. nout.NE.0 )
1017  $ CLOSE( nout )
1018  END IF
1019 *
1020  CALL blacs_exit( 0 )
1021 *
1022  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1023  $ '; It should be at least 1' )
1024  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1025  $ i4 )
1026  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1027  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1028  $ i11 )
1029  9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1030  $ 'Sol Time MFLOPS CHECK' )
1031  9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1032  $ '-------- -------- ------' )
1033  9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1034  $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1035  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1036  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1037  9990 FORMAT( i5, ' tests completed without checking.' )
1038  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1039  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1040  9987 FORMAT( 'END OF TESTS.' )
1041  9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1042  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1043 *
1044  stop
1045 *
1046 * End of PZLUDRIVER
1047 *
1048  END
max
#define max(A, B)
Definition: pcgemr.c:180
pzluinfo
subroutine pzluinfo(SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, EST, WORK, IAM, NPROCS)
Definition: pzluinfo.f:5
pzgerfs
subroutine pzgerfs(TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pzgerfs.f:5
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pzlafchk
subroutine pzlafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pzlafchk.f:3
pzgecon
subroutine pzgecon(NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pzgecon.f:3
pzgetrrv
subroutine pzgetrrv(M, N, A, IA, JA, DESCA, IPIV, WORK)
Definition: pzgetrrv.f:2
pzmatgen
subroutine pzmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pzlaschk
subroutine pzlaschk(SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK)
Definition: pzlaschk.f:4
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pzgetrf
subroutine pzgetrf(M, N, A, IA, JA, DESCA, IPIV, INFO)
Definition: pzgetrf.f:2
pzludriver
program pzludriver
Definition: pzludriver.f:1
pzchekpad
subroutine pzchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzchekpad.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
pzfillpad
subroutine pzfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzfillpad.f:2
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2
pzgetrs
subroutine pzgetrs(TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO)
Definition: pzgetrs.f:3