SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pdludriver.f
Go to the documentation of this file.
1 PROGRAM pdludriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* ========
10*
11* PDLUDRIVER is the main test program for the DOUBLE PRECISION
12* SCALAPACK LU routines. This test driver performs an LU factorization
13* and solve. If the input matrix is non-square, only the factorization
14* is performed. Condition estimation and iterative refinement are
15* optionally performed.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 18 lines:
20* 'SCALAPACK, Version 2.0, LU factorization input file'
21* 'Intel iPSC/860 hypercube, gamma model.'
22* 'LU.out' output file name (if any)
23* 6 device out
24* 1 number of problems sizes
25* 31 201 values of M
26* 31 201 values of N
27* 1 number of NB's
28* 2 values of NB
29* 1 number of NRHS's
30* 1 values of NRHS
31* 1 number of NBRHS's
32* 1 values of NBRHS
33* 1 number of process grids (ordered pairs of P & Q)
34* 2 1 4 2 3 8 values of P
35* 2 4 1 3 2 1 values of Q
36* 1.0 threshold
37* T (T or F) Test Cond. Est. and Iter. Ref. Routines
38*
39*
40* Internal Parameters
41* ===================
42*
43* TOTMEM INTEGER, default = 2000000
44* TOTMEM is a machine-specific parameter indicating the
45* maximum amount of available memory in bytes.
46* The user should customize TOTMEM to his platform. Remember
47* to leave room in memory for the operating system, the BLACS
48* buffer, etc. For example, on a system with 8 MB of memory
49* per process (e.g., one processor on an Intel iPSC/860), the
50* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51* code, BLACS buffer, etc). However, for PVM, we usually set
52* TOTMEM = 2000000. Some experimenting with the maximum value
53* of TOTMEM may be required.
54*
55* INTGSZ INTEGER, default = 4 bytes.
56* DBLESZ INTEGER, default = 8 bytes.
57* INTGSZ and DBLESZ indicate the length in bytes on the
58* given platform for an integer and a double precision real.
59* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
60*
61* All arrays used by SCALAPACK routines are allocated from
62* this array and referenced by pointers. The integer IPA,
63* for example, is a pointer to the starting element of MEM for
64* the matrix A.
65*
66* =====================================================================
67*
68* .. Parameters ..
69 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70 $ lld_, mb_, m_, nb_, n_, rsrc_
71 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74 INTEGER dblesz, intgsz, memsiz, ntests, totmem
75 DOUBLE PRECISION padval, zero
76 parameter( dblesz = 8, intgsz = 4, totmem = 4000000,
77 $ memsiz = totmem / dblesz, ntests = 20,
78 $ padval = -9923.0d+0, zero = 0.0d+0 )
79* ..
80* .. Local Scalars ..
81 LOGICAL check, est
82 CHARACTER*6 passed
83 CHARACTER*80 outfile
84 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88 $ lipiv, liwork, lwork, lw2, m, maxmn,
89 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90 $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91 $ nprocs, nprow, nq, nrhs, worksiz
92 REAL thresh
93 DOUBLE PRECISION anorm, anorm1, fresid, nops, rcond,
94 $ sresid, sresid2, tmflops
95* ..
96* .. Local Arrays ..
97 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98 $ mval( ntests ), nbrval( ntests ),
99 $ nbval( ntests ), nrval( ntests ),
100 $ nval( ntests ), pval( ntests ),
101 $ qval( ntests )
102 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
103* ..
104* .. External Subroutines ..
105 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
106 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
107 $ blacs_pinfo, descinit, igsum2d, pdchekpad,
112* ..
113* .. External Functions ..
114 INTEGER iceil, ilcm, numroc
115 DOUBLE PRECISION pdlange
116 EXTERNAL iceil, ilcm, numroc, pdlange
117* ..
118* .. Intrinsic Functions ..
119 INTRINSIC dble, max, min
120* ..
121* .. Data Statements ..
122 DATA kfail, kpass, kskip, ktests / 4*0 /
123* ..
124* .. Executable Statements ..
125*
126* Get starting information
127*
128 CALL blacs_pinfo( iam, nprocs )
129 iaseed = 100
130 ibseed = 200
131 CALL pdluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
132 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
133 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
134 $ est, mem, iam, nprocs )
135 check = ( thresh.GE.0.0e+0 )
136*
137* Print headings
138*
139 IF( iam.EQ.0 ) THEN
140 WRITE( nout, fmt = * )
141 WRITE( nout, fmt = 9995 )
142 WRITE( nout, fmt = 9994 )
143 WRITE( nout, fmt = * )
144 END IF
145*
146* Loop over different process grids
147*
148 DO 50 i = 1, ngrids
149*
150 nprow = pval( i )
151 npcol = qval( i )
152*
153* Make sure grid information is correct
154*
155 ierr( 1 ) = 0
156 IF( nprow.LT.1 ) THEN
157 IF( iam.EQ.0 )
158 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
159 ierr( 1 ) = 1
160 ELSE IF( npcol.LT.1 ) THEN
161 IF( iam.EQ.0 )
162 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
163 ierr( 1 ) = 1
164 ELSE IF( nprow*npcol.GT.nprocs ) THEN
165 IF( iam.EQ.0 )
166 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
167 ierr( 1 ) = 1
168 END IF
169*
170 IF( ierr( 1 ).GT.0 ) THEN
171 IF( iam.EQ.0 )
172 $ WRITE( nout, fmt = 9997 ) 'grid'
173 kskip = kskip + 1
174 GO TO 50
175 END IF
176*
177* Define process grid
178*
179 CALL blacs_get( -1, 0, ictxt )
180 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
181 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
182*
183* Go to bottom of process grid loop if this case doesn't use my
184* process
185*
186 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
187 $ GO TO 50
188*
189 DO 40 j = 1, nmat
190*
191 m = mval( j )
192 n = nval( j )
193*
194* Make sure matrix information is correct
195*
196 ierr( 1 ) = 0
197 IF( m.LT.1 ) THEN
198 IF( iam.EQ.0 )
199 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
200 ierr( 1 ) = 1
201 ELSE IF( n.LT.1 ) THEN
202 IF( iam.EQ.0 )
203 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
204 ierr( 1 ) = 1
205 END IF
206*
207* Check all processes for an error
208*
209 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
210*
211 IF( ierr( 1 ).GT.0 ) THEN
212 IF( iam.EQ.0 )
213 $ WRITE( nout, fmt = 9997 ) 'matrix'
214 kskip = kskip + 1
215 GO TO 40
216 END IF
217*
218 DO 30 k = 1, nnb
219*
220 nb = nbval( k )
221*
222* Make sure nb is legal
223*
224 ierr( 1 ) = 0
225 IF( nb.LT.1 ) THEN
226 ierr( 1 ) = 1
227 IF( iam.EQ.0 )
228 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
229 END IF
230*
231* Check all processes for an error
232*
233 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'NB'
238 kskip = kskip + 1
239 GO TO 30
240 END IF
241*
242* Padding constants
243*
244 mp = numroc( m, nb, myrow, 0, nprow )
245 np = numroc( n, nb, myrow, 0, nprow )
246 nq = numroc( n, nb, mycol, 0, npcol )
247 IF( check ) THEN
248 iprepad = max( nb, mp )
249 imidpad = nb
250 ipostpad = max( nb, nq )
251 ELSE
252 iprepad = 0
253 imidpad = 0
254 ipostpad = 0
255 END IF
256*
257* Initialize the array descriptor for the matrix A
258*
259 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
260 $ max( 1, mp )+imidpad, ierr( 1 ) )
261*
262* Check all processes for an error
263*
264 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
265*
266 IF( ierr( 1 ).LT.0 ) THEN
267 IF( iam.EQ.0 )
268 $ WRITE( nout, fmt = 9997 ) 'descriptor'
269 kskip = kskip + 1
270 GO TO 30
271 END IF
272*
273* Assign pointers into MEM for SCALAPACK arrays, A is
274* allocated starting at position MEM( IPREPAD+1 )
275*
276 ipa = iprepad+1
277 IF( est .AND. m.EQ.n ) THEN
278 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
279 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
280 ELSE
281 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
282 END IF
283 lipiv = iceil( intgsz*( mp+nb ), dblesz )
284 ipw = ippiv + lipiv + ipostpad + iprepad
285*
286 IF( check ) THEN
287*
288* Calculate the amount of workspace required by the
289* checking routines PDLANGE, PDGETRRV, and
290* PDLAFCHK
291*
292 worksiz = max( 2, nq )
293*
294 worksiz = max( worksiz, mp*desca( nb_ )+
295 $ nq*desca( mb_ ) )
296*
297 worksiz = max( worksiz, mp * desca( nb_ ) )
298*
299 worksiz = worksiz + ipostpad
300*
301 ELSE
302*
303 worksiz = ipostpad
304*
305 END IF
306*
307* Check for adequate memory for problem size
308*
309 ierr( 1 ) = 0
310 IF( ipw+worksiz.GT.memsiz ) THEN
311 IF( iam.EQ.0 )
312 $ WRITE( nout, fmt = 9996 ) 'factorization',
313 $ ( ipw+worksiz )*dblesz
314 ierr( 1 ) = 1
315 END IF
316*
317* Check all processes for an error
318*
319 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
320*
321 IF( ierr( 1 ).GT.0 ) THEN
322 IF( iam.EQ.0 )
323 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
324 kskip = kskip + 1
325 GO TO 30
326 END IF
327*
328* Generate matrix A of Ax = b
329*
330 CALL pdmatgen( ictxt, 'No transpose', 'No transpose',
331 $ desca( m_ ), desca( n_ ), desca( mb_ ),
332 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
333 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
334 $ mp, 0, nq, myrow, mycol, nprow, npcol )
335*
336* Calculate inf-norm of A for residual error-checking
337*
338 IF( check ) THEN
339 CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
340 $ desca( lld_ ), iprepad, ipostpad,
341 $ padval )
342 CALL pdfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
343 $ lipiv, iprepad, ipostpad, padval )
344 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
345 $ mem( ipw-iprepad ), worksiz-ipostpad,
346 $ iprepad, ipostpad, padval )
347 anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1, desca,
348 $ mem( ipw ) )
349 anorm1 = pdlange( '1', m, n, mem( ipa ), 1, 1, desca,
350 $ mem( ipw ) )
351 CALL pdchekpad( ictxt, 'PDLANGE', mp, nq,
352 $ mem( ipa-iprepad ), desca( lld_ ),
353 $ iprepad, ipostpad, padval )
354 CALL pdchekpad( ictxt, 'PDLANGE', worksiz-ipostpad,
355 $ 1, mem( ipw-iprepad ),
356 $ worksiz-ipostpad, iprepad, ipostpad,
357 $ padval )
358 END IF
359*
360 IF( est .AND. m.EQ.n ) THEN
361 CALL pdmatgen( ictxt, 'No transpose', 'No transpose',
362 $ desca( m_ ), desca( n_ ), desca( mb_ ),
363 $ desca( nb_ ), mem( ipa0 ),
364 $ desca( lld_ ), desca( rsrc_ ),
365 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
366 $ myrow, mycol, nprow, npcol )
367 IF( check )
368 $ CALL pdfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
369 $ desca( lld_ ), iprepad, ipostpad,
370 $ padval )
371 END IF
372*
373 CALL slboot()
374 CALL blacs_barrier( ictxt, 'All' )
375 CALL sltimer( 1 )
376*
377* Perform LU factorization
378*
379 CALL pdgetrf( m, n, mem( ipa ), 1, 1, desca,
380 $ mem( ippiv ), info )
381*
382 CALL sltimer( 1 )
383*
384 IF( info.NE.0 ) THEN
385 IF( iam.EQ.0 )
386 $ WRITE( nout, fmt = * ) 'PDGETRF INFO=', info
387 kfail = kfail + 1
388 rcond = zero
389 GO TO 30
390 END IF
391*
392 IF( check ) THEN
393*
394* Check for memory overwrite in LU factorization
395*
396 CALL pdchekpad( ictxt, 'PDGETRF', mp, nq,
397 $ mem( ipa-iprepad ), desca( lld_ ),
398 $ iprepad, ipostpad, padval )
399 CALL pdchekpad( ictxt, 'PDGETRF', lipiv, 1,
400 $ mem( ippiv-iprepad ), lipiv, iprepad,
401 $ ipostpad, padval )
402 END IF
403*
404 IF( m.NE.n ) THEN
405*
406* For non-square matrices, factorization only
407*
408 nrhs = 0
409 nbrhs = 0
410*
411 IF( check ) THEN
412*
413* Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
414*
415 CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,
416 $ mem( ippiv ), mem( ipw ) )
417 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
418 $ desca, iaseed, anorm, fresid,
419 $ mem( ipw ) )
420*
421* Check for memory overwrite
422*
423 CALL pdchekpad( ictxt, 'PDGETRRV', mp, nq,
424 $ mem( ipa-iprepad ), desca( lld_ ),
425 $ iprepad, ipostpad, padval )
426 CALL pdchekpad( ictxt, 'PDGETRRV', lipiv, 1,
427 $ mem( ippiv-iprepad ), lipiv,
428 $ iprepad, ipostpad, padval )
429 CALL pdchekpad( ictxt, 'PDGETRRV',
430 $ worksiz-ipostpad, 1,
431 $ mem( ipw-iprepad ),
432 $ worksiz-ipostpad, iprepad,
433 $ ipostpad, padval )
434*
435* Test residual and detect NaN result
436*
437 IF( ( fresid.LE.thresh ) .AND.
438 $ ( (fresid-fresid).EQ.0.0d+0 ) ) THEN
439 kpass = kpass + 1
440 passed = 'PASSED'
441 ELSE
442 kfail = kfail + 1
443 passed = 'FAILED'
444 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
445 $ WRITE( nout, fmt = 9986 ) fresid
446 END IF
447*
448 ELSE
449*
450* Don't perform the checking, only timing
451*
452 kpass = kpass + 1
453 fresid = fresid - fresid
454 passed = 'BYPASS'
455*
456 END IF
457*
458* Gather maximum of all CPU and WALL clock timings
459*
460 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
461 $ wtime )
462 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
463 $ ctime )
464*
465* Print results
466*
467 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
468*
469 maxmn = max( m, n )
470 minmn = min( m, n )
471*
472* M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU
473* factorization when M >= N
474*
475 nops = dble( maxmn )*( dble( minmn )**2 ) -
476 $ (1.0d+0 / 3.0d+0)*( dble( minmn )**3 ) -
477 $ (1.0d+0 / 2.0d+0)*( dble( minmn )**2 )
478*
479* Calculate total megaflops -- factorization only,
480* -- for WALL and CPU time, and print output
481*
482* Print WALL time if machine supports it
483*
484 IF( wtime( 1 ).GT.0.0d+0 ) THEN
485 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
486 ELSE
487 tmflops = 0.0d+0
488 END IF
489*
490 wtime( 2 ) = 0.0d+0
491 IF( wtime( 1 ).GE.0.0d+0 )
492 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,
493 $ nrhs, nbrhs, nprow, npcol, wtime( 1 ),
494 $ wtime( 2 ), tmflops, passed
495*
496* Print CPU time if machine supports it
497*
498 IF( ctime( 1 ).GT.0.0d+0 ) THEN
499 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
500 ELSE
501 tmflops = 0.0d+0
502 END IF
503*
504 ctime( 2 ) = 0.0d+0
505 IF( ctime( 1 ).GE.0.0d+0 )
506 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,
507 $ nrhs, nbrhs, nprow, npcol, ctime( 1 ),
508 $ ctime( 2 ), tmflops, passed
509 END IF
510*
511 ELSE
512*
513* If M = N
514*
515 IF( est ) THEN
516*
517* Calculate workspace required for PDGECON
518*
519 lwork = max( 1, 2*np ) + max( 1, 2*nq ) +
520 $ max( 2, desca( nb_ )*
521 $ max( 1, iceil( nprow-1, npcol ) ),
522 $ nq + desca( nb_ )*
523 $ max( 1, iceil( npcol-1, nprow ) ) )
524 ipw2 = ipw + lwork + ipostpad + iprepad
525 liwork = max( 1, np )
526 lw2 = iceil( liwork*intgsz, dblesz ) + ipostpad
527*
528 ierr( 1 ) = 0
529 IF( ipw2+lw2.GT.memsiz ) THEN
530 IF( iam.EQ.0 )
531 $ WRITE( nout, fmt = 9996 )'cond est',
532 $ ( ipw2+lw2 )*dblesz
533 ierr( 1 ) = 1
534 END IF
535*
536* Check all processes for an error
537*
538 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
539 $ -1, 0 )
540*
541 IF( ierr( 1 ).GT.0 ) THEN
542 IF( iam.EQ.0 )
543 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
544 kskip = kskip + 1
545 GO TO 30
546 END IF
547*
548 IF( check ) THEN
549 CALL pdfillpad( ictxt, lwork, 1,
550 $ mem( ipw-iprepad ), lwork,
551 $ iprepad, ipostpad, padval )
552 CALL pdfillpad( ictxt, lw2-ipostpad, 1,
553 $ mem( ipw2-iprepad ),
554 $ lw2-ipostpad, iprepad,
555 $ ipostpad, padval )
556 END IF
557*
558* Compute condition number of the matrix
559*
560 CALL pdgecon( '1', n, mem( ipa ), 1, 1, desca,
561 $ anorm1, rcond, mem( ipw ), lwork,
562 $ mem( ipw2 ), liwork, info )
563*
564 IF( check ) THEN
565 CALL pdchekpad( ictxt, 'PDGECON', np, nq,
566 $ mem( ipa-iprepad ),
567 $ desca( lld_ ), iprepad,
568 $ ipostpad, padval )
569 CALL pdchekpad( ictxt, 'PDGECON', lwork, 1,
570 $ mem( ipw-iprepad ), lwork,
571 $ iprepad, ipostpad, padval )
572 CALL pdchekpad( ictxt, 'PDGECON',
573 $ lw2-ipostpad, 1,
574 $ mem( ipw2-iprepad ),
575 $ lw2-ipostpad, iprepad,
576 $ ipostpad, padval )
577 END IF
578 END IF
579*
580* Loop over the different values for NRHS
581*
582 DO 20 hh = 1, nnr
583*
584 nrhs = nrval( hh )
585*
586 DO 10 kk = 1, nnbr
587*
588 nbrhs = nbrval( kk )
589*
590* Initialize Array Descriptor for rhs
591*
592 CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,
593 $ ictxt, max( 1, np )+imidpad,
594 $ ierr( 1 ) )
595*
596* Check all processes for an error
597*
598 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
599 $ -1, 0 )
600*
601 IF( ierr( 1 ).LT.0 ) THEN
602 IF( iam.EQ.0 )
603 $ WRITE( nout, fmt = 9997 ) 'descriptor'
604 kskip = kskip + 1
605 GO TO 10
606 END IF
607*
608* move IPW to allow room for RHS
609*
610 myrhs = numroc( descb( n_ ), descb( nb_ ),
611 $ mycol, descb( csrc_ ), npcol )
612 ipb = ipw
613*
614 IF( est ) THEN
615 ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
616 $ iprepad
617 ipferr = ipb0 + descb( lld_ )*myrhs +
618 $ ipostpad + iprepad
619 ipberr = myrhs + ipferr + ipostpad + iprepad
620 ipw = myrhs + ipberr + ipostpad + iprepad
621 ELSE
622 ipw = ipb + descb( lld_ )*myrhs + ipostpad +
623 $ iprepad
624 END IF
625*
626* Set worksiz: routines requiring most workspace
627* is PDLASCHK
628*
629 IF( check ) THEN
630 lcm = ilcm( nprow, npcol )
631 lcmq = lcm / npcol
632 worksiz = max( worksiz-ipostpad,
633 $ nq * nbrhs + np * nbrhs +
634 $ max( max( nq*nb, 2*nbrhs ),
635 $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
636 $ 0,0,lcmq ) ) )
637 worksiz = ipostpad + worksiz
638 ELSE
639 worksiz = ipostpad
640 END IF
641*
642 ierr( 1 ) = 0
643 IF( ipw+worksiz.GT.memsiz ) THEN
644 IF( iam.EQ.0 )
645 $ WRITE( nout, fmt = 9996 )'solve',
646 $ ( ipw+worksiz )*dblesz
647 ierr( 1 ) = 1
648 END IF
649*
650* Check all processes for an error
651*
652 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
653 $ -1, 0 )
654*
655 IF( ierr( 1 ).GT.0 ) THEN
656 IF( iam.EQ.0 )
657 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
658 kskip = kskip + 1
659 GO TO 10
660 END IF
661*
662* Generate RHS
663*
664 CALL pdmatgen( ictxt, 'No', 'No', descb( m_ ),
665 $ descb( n_ ), descb( mb_ ),
666 $ descb( nb_ ), mem( ipb ),
667 $ descb( lld_ ), descb( rsrc_ ),
668 $ descb( csrc_ ), ibseed, 0, np, 0,
669 $ myrhs, myrow, mycol, nprow,
670 $ npcol )
671*
672 IF( check )
673 $ CALL pdfillpad( ictxt, np, myrhs,
674 $ mem( ipb-iprepad ),
675 $ descb( lld_ ), iprepad,
676 $ ipostpad, padval )
677*
678 IF( est ) THEN
679 CALL pdmatgen( ictxt, 'No', 'No',
680 $ descb( m_ ), descb( n_ ),
681 $ descb( mb_ ), descb( nb_ ),
682 $ mem( ipb0 ), descb( lld_ ),
683 $ descb( rsrc_ ),
684 $ descb( csrc_ ), ibseed, 0, np,
685 $ 0, myrhs, myrow, mycol, nprow,
686 $ npcol )
687 IF( check ) THEN
688 CALL pdfillpad( ictxt, np, myrhs,
689 $ mem( ipb0-iprepad ),
690 $ descb( lld_ ), iprepad,
691 $ ipostpad, padval )
692 CALL pdfillpad( ictxt, 1, myrhs,
693 $ mem( ipferr-iprepad ), 1,
694 $ iprepad, ipostpad,
695 $ padval )
696 CALL pdfillpad( ictxt, 1, myrhs,
697 $ mem( ipberr-iprepad ), 1,
698 $ iprepad, ipostpad,
699 $ padval )
700 END IF
701 END IF
702*
703 CALL blacs_barrier( ictxt, 'All' )
704 CALL sltimer( 2 )
705*
706* Solve linear sytem via LU factorization
707*
708 CALL pdgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,
709 $ desca, mem( ippiv ), mem( ipb ),
710 $ 1, 1, descb, info )
711*
712 CALL sltimer( 2 )
713*
714 IF( check ) THEN
715*
716* check for memory overwrite
717*
718 CALL pdchekpad( ictxt, 'PDGETRS', np, nq,
719 $ mem( ipa-iprepad ),
720 $ desca( lld_ ), iprepad,
721 $ ipostpad, padval )
722 CALL pdchekpad( ictxt, 'PDGETRS', lipiv, 1,
723 $ mem( ippiv-iprepad ), lipiv,
724 $ iprepad, ipostpad, padval )
725 CALL pdchekpad( ictxt, 'PDGETRS', np,
726 $ myrhs, mem( ipb-iprepad ),
727 $ descb( lld_ ), iprepad,
728 $ ipostpad, padval )
729*
730 CALL pdfillpad( ictxt, worksiz-ipostpad,
731 $ 1, mem( ipw-iprepad ),
732 $ worksiz-ipostpad, iprepad,
733 $ ipostpad, padval )
734*
735* check the solution to rhs
736*
737 CALL pdlaschk( 'No', 'N', n, nrhs,
738 $ mem( ipb ), 1, 1, descb,
739 $ iaseed, 1, 1, desca, ibseed,
740 $ anorm, sresid, mem( ipw ) )
741*
742 IF( iam.EQ.0 .AND. sresid.GT.thresh )
743 $ WRITE( nout, fmt = 9985 ) sresid
744*
745* check for memory overwrite
746*
747 CALL pdchekpad( ictxt, 'PDLASCHK', np,
748 $ myrhs, mem( ipb-iprepad ),
749 $ descb( lld_ ), iprepad,
750 $ ipostpad, padval )
751 CALL pdchekpad( ictxt, 'PDLASCHK',
752 $ worksiz-ipostpad, 1,
753 $ mem( ipw-iprepad ),
754 $ worksiz-ipostpad,
755 $ iprepad, ipostpad, padval )
756*
757* The second test is a NaN trap
758*
759 IF( sresid.LE.thresh .AND.
760 $ ( sresid-sresid ).EQ.0.0d+0 ) THEN
761 kpass = kpass + 1
762 passed = 'PASSED'
763 ELSE
764 kfail = kfail + 1
765 passed = 'FAILED'
766 END IF
767 ELSE
768 kpass = kpass + 1
769 sresid = sresid - sresid
770 passed = 'BYPASS'
771 END IF
772*
773 IF( est ) THEN
774*
775* Calculate workspace required for PDGERFS
776*
777 lwork = max( 1, 3*np )
778 ipw2 = ipw + lwork + ipostpad + iprepad
779 liwork = max( 1, np )
780 lw2 = iceil( liwork*intgsz, dblesz ) +
781 $ ipostpad
782*
783 ierr( 1 ) = 0
784 IF( ipw2+lw2.GT.memsiz ) THEN
785 IF( iam.EQ.0 )
786 $ WRITE( nout, fmt = 9996 )
787 $ 'iter ref', ( ipw2+lw2 )*dblesz
788 ierr( 1 ) = 1
789 END IF
790*
791* Check all processes for an error
792*
793 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
794 $ ierr, 1, -1, 0 )
795*
796 IF( ierr( 1 ).GT.0 ) THEN
797 IF( iam.EQ.0 )
798 $ WRITE( nout, fmt = 9997 )
799 $ 'MEMORY'
800 kskip = kskip + 1
801 GO TO 10
802 END IF
803*
804 IF( check ) THEN
805 CALL pdfillpad( ictxt, lwork, 1,
806 $ mem( ipw-iprepad ),
807 $ lwork, iprepad, ipostpad,
808 $ padval )
809 CALL pdfillpad( ictxt, lw2-ipostpad, 1,
810 $ mem( ipw2-iprepad ),
811 $ lw2-ipostpad, iprepad,
812 $ ipostpad, padval )
813 END IF
814*
815* Use iterative refinement to improve the
816* computed solution
817*
818 CALL pdgerfs( 'No', n, nrhs, mem( ipa0 ), 1,
819 $ 1, desca, mem( ipa ), 1, 1,
820 $ desca, mem( ippiv ),
821 $ mem( ipb0 ), 1, 1, descb,
822 $ mem( ipb ), 1, 1, descb,
823 $ mem( ipferr ), mem( ipberr ),
824 $ mem( ipw ), lwork, mem( ipw2 ),
825 $ liwork, info )
826*
827 IF( check ) THEN
828 CALL pdchekpad( ictxt, 'PDGERFS', np,
829 $ nq, mem( ipa0-iprepad ),
830 $ desca( lld_ ), iprepad,
831 $ ipostpad, padval )
832 CALL pdchekpad( ictxt, 'PDGERFS', np,
833 $ nq, mem( ipa-iprepad ),
834 $ desca( lld_ ), iprepad,
835 $ ipostpad, padval )
836 CALL pdchekpad( ictxt, 'PDGERFS', lipiv,
837 $ 1, mem( ippiv-iprepad ),
838 $ lipiv, iprepad,
839 $ ipostpad, padval )
840 CALL pdchekpad( ictxt, 'PDGERFS', np,
841 $ myrhs, mem( ipb-iprepad ),
842 $ descb( lld_ ), iprepad,
843 $ ipostpad, padval )
844 CALL pdchekpad( ictxt, 'PDGERFS', np,
845 $ myrhs,
846 $ mem( ipb0-iprepad ),
847 $ descb( lld_ ), iprepad,
848 $ ipostpad, padval )
849 CALL pdchekpad( ictxt, 'PDGERFS', 1,
850 $ myrhs,
851 $ mem( ipferr-iprepad ), 1,
852 $ iprepad, ipostpad,
853 $ padval )
854 CALL pdchekpad( ictxt, 'PDGERFS', 1,
855 $ myrhs,
856 $ mem( ipberr-iprepad ), 1,
857 $ iprepad, ipostpad,
858 $ padval )
859 CALL pdchekpad( ictxt, 'PDGERFS', lwork,
860 $ 1, mem( ipw-iprepad ),
861 $ lwork, iprepad, ipostpad,
862 $ padval )
863 CALL pdchekpad( ictxt, 'PDGERFS',
864 $ lw2-ipostpad, 1,
865 $ mem( ipw2-iprepad ),
866 $ lw2-ipostpad, iprepad,
867 $ ipostpad, padval )
868*
869 CALL pdfillpad( ictxt, worksiz-ipostpad,
870 $ 1, mem( ipw-iprepad ),
871 $ worksiz-ipostpad, iprepad,
872 $ ipostpad, padval )
873*
874* check the solution to rhs
875*
876 CALL pdlaschk( 'No', 'N', n, nrhs,
877 $ mem( ipb ), 1, 1, descb,
878 $ iaseed, 1, 1, desca,
879 $ ibseed, anorm, sresid2,
880 $ mem( ipw ) )
881*
882 IF( iam.EQ.0 .AND. sresid2.GT.thresh )
883 $ WRITE( nout, fmt = 9985 ) sresid2
884*
885* check for memory overwrite
886*
887 CALL pdchekpad( ictxt, 'PDLASCHK', np,
888 $ myrhs, mem( ipb-iprepad ),
889 $ descb( lld_ ), iprepad,
890 $ ipostpad, padval )
891 CALL pdchekpad( ictxt, 'PDLASCHK',
892 $ worksiz-ipostpad, 1,
893 $ mem( ipw-iprepad ),
894 $ worksiz-ipostpad, iprepad,
895 $ ipostpad, padval )
896 END IF
897 END IF
898*
899* Gather max. of all CPU and WALL clock timings
900*
901 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
902 $ wtime )
903 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
904 $ ctime )
905*
906* Print results
907*
908 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
909*
910* 2/3 N^3 - 1/2 N^2 flops for LU factorization
911*
912 nops = (2.0d+0/3.0d+0)*( dble(n)**3 ) -
913 $ (1.0d+0/2.0d+0)*( dble(n)**2 )
914*
915* nrhs * 2 N^2 flops for LU solve.
916*
917 nops = nops + 2.0d+0*(dble(n)**2)*dble(nrhs)
918*
919* Calculate total megaflops -- factorization
920* and solve -- for WALL and CPU time, and print
921* output
922*
923* Print WALL time if machine supports it
924*
925 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )
926 $ THEN
927 tmflops = nops /
928 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
929 ELSE
930 tmflops = 0.0d+0
931 END IF
932*
933* Print WALL time if supported
934*
935 IF( wtime( 2 ).GE.0.0d+0 )
936 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n,
937 $ nb, nrhs, nbrhs, nprow, npcol,
938 $ wtime( 1 ), wtime( 2 ), tmflops,
939 $ passed
940*
941* Print CPU time if supported
942*
943 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )
944 $ THEN
945 tmflops = nops /
946 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
947 ELSE
948 tmflops = 0.0d+0
949 END IF
950*
951 IF( ctime( 2 ).GE.0.0d+0 )
952 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n,
953 $ nb, nrhs, nbrhs, nprow, npcol,
954 $ ctime( 1 ), ctime( 2 ), tmflops,
955 $ passed
956 END IF
957 10 CONTINUE
958 20 CONTINUE
959*
960 IF( check.AND.( sresid.GT.thresh ) ) THEN
961*
962* Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
963*
964 CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,
965 $ mem( ippiv ), mem( ipw ) )
966 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
967 $ 1, desca, iaseed, anorm, fresid,
968 $ mem( ipw ) )
969*
970* Check for memory overwrite
971*
972 CALL pdchekpad( ictxt, 'PDGETRRV', np, nq,
973 $ mem( ipa-iprepad ), desca( lld_ ),
974 $ iprepad, ipostpad, padval )
975 CALL pdchekpad( ictxt, 'PDGETRRV', lipiv,
976 $ 1, mem( ippiv-iprepad ), lipiv,
977 $ iprepad, ipostpad, padval )
978 CALL pdchekpad( ictxt, 'PDGETRRV',
979 $ worksiz-ipostpad, 1,
980 $ mem( ipw-iprepad ),
981 $ worksiz-ipostpad, iprepad,
982 $ ipostpad, padval )
983*
984 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
985 $ WRITE( nout, fmt = 9986 ) fresid
986 END IF
987 END IF
988 30 CONTINUE
989 40 CONTINUE
990 CALL blacs_gridexit( ictxt )
991 50 CONTINUE
992*
993* Print ending messages and close output file
994*
995 60 CONTINUE
996 IF( iam.EQ.0 ) THEN
997 ktests = kpass + kfail + kskip
998 WRITE( nout, fmt = * )
999 WRITE( nout, fmt = 9992 ) ktests
1000 IF( check ) THEN
1001 WRITE( nout, fmt = 9991 ) kpass
1002 WRITE( nout, fmt = 9989 ) kfail
1003 ELSE
1004 WRITE( nout, fmt = 9990 ) kpass
1005 END IF
1006 WRITE( nout, fmt = 9988 ) kskip
1007 WRITE( nout, fmt = * )
1008 WRITE( nout, fmt = * )
1009 WRITE( nout, fmt = 9987 )
1010 IF( nout.NE.6 .AND. nout.NE.0 )
1011 $ CLOSE( nout )
1012 END IF
1013*
1014 CALL blacs_exit( 0 )
1015*
1016 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1017 $ '; It should be at least 1' )
1018 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1019 $ i4 )
1020 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1021 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1022 $ i11 )
1023 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1024 $ 'Sol Time MFLOPS CHECK' )
1025 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1026 $ '-------- -------- ------' )
1027 9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1028 $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1029 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1030 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1031 9990 FORMAT( i5, ' tests completed without checking.' )
1032 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1033 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1034 9987 FORMAT( 'END OF TESTS.' )
1035 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1036 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1037*
1038 stop
1039*
1040* End of PDLUDRIVER
1041*
1042 END
subroutine pdlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pdlafchk.f:3
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
subroutine pdgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, iwork, liwork, info)
Definition pdgecon.f:3
subroutine pdgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, iwork, liwork, info)
Definition pdgerfs.f:5
subroutine pdgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition pdgetrf.f:2
subroutine pdgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition pdgetrrv.f:2
subroutine pdgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pdgetrs.f:3
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
subroutine pdlaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pdlaschk.f:4
program pdludriver
Definition pdludriver.f:1
subroutine pdluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition pdluinfo.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267