SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pddbdriver.f
Go to the documentation of this file.
1 PROGRAM pddbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PDDBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by DDB. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* DBLESZ INTEGER, default = 8 bytes.
56* INTGSZ and DBLESZ indicate the length in bytes on the
57* given platform for an integer and a double precision real.
58* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
59* All arrays used by ScaLAPACK routines are allocated from
60* this array and referenced by pointers. The integer IPB,
61* for example, is a pointer to the starting element of MEM for
62* the solution vector(s) B.
63*
64* =====================================================================
65*
66* Code Developer: Andrew J. Cleary, University of Tennessee.
67* Current address: Lawrence Livermore National Labs.
68* This version released: August, 2001.
69*
70* =====================================================================
71*
72* .. Parameters ..
73 INTEGER totmem
74 parameter( totmem = 3000000 )
75 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76 $ lld_, mb_, m_, nb_, n_, rsrc_
77 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
80*
81 DOUBLE PRECISION zero
82 INTEGER dblesz, memsiz, ntests
83 DOUBLE PRECISION padval
84 parameter( dblesz = 8,
85 $ memsiz = totmem / dblesz, ntests = 20,
86 $ padval = -9923.0d+0, zero = 0.0d+0 )
87 INTEGER int_one
88 parameter( int_one = 1 )
89* ..
90* .. Local Scalars ..
91 LOGICAL check
92 CHARACTER trans
93 CHARACTER*6 passed
94 CHARACTER*80 outfile
95 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
96 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
97 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
98 $ iprepad, ipw, ipw_size, ipw_solve,
99 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102 $ nnr, nout, np, npcol, nprocs, nprocs_real,
103 $ nprow, nq, nrhs, n_first, n_last, worksiz
104 REAL thresh
105 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
106 $ tmflops2
107* ..
108* .. Local Arrays ..
109 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
110 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
111 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
112 $ nrval( ntests ), nval( ntests ),
113 $ pval( ntests ), qval( ntests )
114 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
115* ..
116* .. External Subroutines ..
117 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
119 $ blacs_pinfo, descinit, igsum2d, pdbmatgen,
123* ..
124* .. External Functions ..
125 INTEGER numroc
126 LOGICAL lsame
127 DOUBLE PRECISION pdlange
128 EXTERNAL lsame, numroc, pdlange
129* ..
130* .. Intrinsic Functions ..
131 INTRINSIC dble, max, min, mod
132* ..
133* .. Data Statements ..
134 DATA kfail, kpass, kskip, ktests / 4*0 /
135* ..
136*
137*
138*
139* .. Executable Statements ..
140*
141* Get starting information
142*
143 CALL blacs_pinfo( iam, nprocs )
144 iaseed = 100
145 ibseed = 200
146*
147 CALL pddbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
148 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
149 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
150 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
151*
152 check = ( thresh.GE.0.0d+0 )
153*
154* Print headings
155*
156 IF( iam.EQ.0 ) THEN
157 WRITE( nout, fmt = * )
158 WRITE( nout, fmt = 9995 )
159 WRITE( nout, fmt = 9994 )
160 WRITE( nout, fmt = * )
161 END IF
162*
163* Loop over different process grids
164*
165 DO 60 i = 1, ngrids
166*
167 nprow = pval( i )
168 npcol = qval( i )
169*
170* Make sure grid information is correct
171*
172 ierr( 1 ) = 0
173 IF( nprow.LT.1 ) THEN
174 IF( iam.EQ.0 )
175 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
176 ierr( 1 ) = 1
177 ELSE IF( npcol.LT.1 ) THEN
178 IF( iam.EQ.0 )
179 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
180 ierr( 1 ) = 1
181 ELSE IF( nprow*npcol.GT.nprocs ) THEN
182 IF( iam.EQ.0 )
183 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
184 ierr( 1 ) = 1
185 END IF
186*
187 IF( ierr( 1 ).GT.0 ) THEN
188 IF( iam.EQ.0 )
189 $ WRITE( nout, fmt = 9997 ) 'grid'
190 kskip = kskip + 1
191 GO TO 50
192 END IF
193*
194* Define process grid
195*
196 CALL blacs_get( -1, 0, ictxt )
197 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
198*
199*
200* Define transpose process grid
201*
202 CALL blacs_get( -1, 0, ictxtb )
203 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
204*
205* Go to bottom of process grid loop if this case doesn't use my
206* process
207*
208 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209*
210 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
211 GO TO 50
212 ENDIF
213*
214 DO 40 j = 1, nmat
215*
216 ierr( 1 ) = 0
217*
218 n = nval( j )
219*
220* Make sure matrix information is correct
221*
222 IF( n.LT.1 ) THEN
223 IF( iam.EQ.0 )
224 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
225 ierr( 1 ) = 1
226 END IF
227*
228* Check all processes for an error
229*
230 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
231 $ -1, 0 )
232*
233 IF( ierr( 1 ).GT.0 ) THEN
234 IF( iam.EQ.0 )
235 $ WRITE( nout, fmt = 9997 ) 'size'
236 kskip = kskip + 1
237 GO TO 40
238 END IF
239*
240*
241 DO 45 bw_num = 1, nbw
242*
243 ierr( 1 ) = 0
244*
245 bwl = bwlval( bw_num )
246 IF( bwl.LT.1 ) THEN
247 IF( iam.EQ.0 )
248 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
249 ierr( 1 ) = 1
250 END IF
251*
252 bwu = bwuval( bw_num )
253 IF( bwu.LT.1 ) THEN
254 IF( iam.EQ.0 )
255 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
256 ierr( 1 ) = 1
257 END IF
258*
259 IF( bwl.GT.n-1 ) THEN
260 IF( iam.EQ.0 ) THEN
261 ierr( 1 ) = 1
262 ENDIF
263 END IF
264*
265 IF( bwu.GT.n-1 ) THEN
266 IF( iam.EQ.0 ) THEN
267 ierr( 1 ) = 1
268 ENDIF
269 END IF
270*
271* Check all processes for an error
272*
273 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
274 $ -1, 0 )
275*
276 IF( ierr( 1 ).GT.0 ) THEN
277 kskip = kskip + 1
278 GO TO 45
279 END IF
280*
281 DO 30 k = 1, nnb
282*
283 ierr( 1 ) = 0
284*
285 nb = nbval( k )
286 IF( nb.LT.0 ) THEN
287 nb =( (n-(npcol-1)*max(bwl,bwu)-1)/npcol + 1 )
288 $ + max(bwl,bwu)
289 nb = max( nb, 2*max(bwl,bwu) )
290 nb = min( n, nb )
291 END IF
292*
293* Make sure NB is legal
294*
295 ierr( 1 ) = 0
296 IF( nb.LT.min( 2*max(bwl,bwu), n ) ) THEN
297 ierr( 1 ) = 1
298 END IF
299*
300* Check all processes for an error
301*
302 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
303 $ -1, 0 )
304*
305 IF( ierr( 1 ).GT.0 ) THEN
306 kskip = kskip + 1
307 GO TO 30
308 END IF
309*
310* Padding constants
311*
312 np = numroc( (bwl+bwu+1), (bwl+bwu+1),
313 $ myrow, 0, nprow )
314 nq = numroc( n, nb, mycol, 0, npcol )
315*
316 IF( check ) THEN
317 iprepad = ((bwl+bwu+1)+10)
318 imidpad = 10
319 ipostpad = ((bwl+bwu+1)+10)
320 ELSE
321 iprepad = 0
322 imidpad = 0
323 ipostpad = 0
324 END IF
325*
326* Initialize the array descriptor for the matrix A
327*
328 CALL descinit( desca2d, (bwl+bwu+1), n,
329 $ (bwl+bwu+1), nb, 0, 0,
330 $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
331*
332* Convert this to 1D descriptor
333*
334 desca( 1 ) = 501
335 desca( 3 ) = n
336 desca( 4 ) = nb
337 desca( 5 ) = 0
338 desca( 2 ) = ictxt
339 desca( 6 ) = ((bwl+bwu+1)+10)
340 desca( 7 ) = 0
341*
342 ierr_temp = ierr( 1 )
343 ierr( 1 ) = 0
344 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
345*
346* Check all processes for an error
347*
348 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
349*
350 IF( ierr( 1 ).LT.0 ) THEN
351 IF( iam.EQ.0 )
352 $ WRITE( nout, fmt = 9997 ) 'descriptor'
353 kskip = kskip + 1
354 GO TO 30
355 END IF
356*
357* Assign pointers into MEM for SCALAPACK arrays, A is
358* allocated starting at position MEM( IPREPAD+1 )
359*
360 free_ptr = 1
361 ipb = 0
362*
363* Save room for prepadding
364 free_ptr = free_ptr + iprepad
365*
366 ipa = free_ptr
367 free_ptr = free_ptr + desca2d( lld_ )*
368 $ desca2d( nb_ )
369 $ + ipostpad
370*
371* Add memory for fillin
372* Fillin space needs to store:
373* Fillin spike:
374* Contribution to previous proc's diagonal block of
375* reduced system:
376* Off-diagonal block of reduced system:
377* Diagonal block of reduced system:
378*
379 fillin_size =
380 $ nb*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
381*
382* Claim memory for fillin
383*
384 free_ptr = free_ptr + iprepad
385 ip_fillin = free_ptr
386 free_ptr = free_ptr + fillin_size
387*
388* Workspace needed by computational routines:
389*
390 ipw_size = 0
391*
392* factorization:
393*
394 ipw_size = max(bwl,bwu)*max(bwl,bwu)
395*
396* Claim memory for IPW
397*
398 ipw = free_ptr
399 free_ptr = free_ptr + ipw_size
400*
401* Check for adequate memory for problem size
402*
403 ierr( 1 ) = 0
404 IF( free_ptr.GT.memsiz ) THEN
405 IF( iam.EQ.0 )
406 $ WRITE( nout, fmt = 9996 )
407 $ 'divide and conquer factorization',
408 $ (free_ptr )*dblesz
409 ierr( 1 ) = 1
410 END IF
411*
412* Check all processes for an error
413*
414 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
415 $ 1, -1, 0 )
416*
417 IF( ierr( 1 ).GT.0 ) THEN
418 IF( iam.EQ.0 )
419 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
420 kskip = kskip + 1
421 GO TO 30
422 END IF
423*
424* Worksize needed for LAPRNT
425 worksiz = max( ((bwl+bwu+1)+10), nb )
426*
427 IF( check ) THEN
428*
429* Calculate the amount of workspace required by
430* the checking routines.
431*
432* PDLANGE
433 worksiz = max( worksiz, desca2d( nb_ ) )
434*
435* PDDBLASCHK
436 worksiz = max( worksiz,
437 $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
438 END IF
439*
440 free_ptr = free_ptr + iprepad
441 ip_driver_w = free_ptr
442 free_ptr = free_ptr + worksiz + ipostpad
443*
444*
445* Check for adequate memory for problem size
446*
447 ierr( 1 ) = 0
448 IF( free_ptr.GT.memsiz ) THEN
449 IF( iam.EQ.0 )
450 $ WRITE( nout, fmt = 9996 ) 'factorization',
451 $ ( free_ptr )*dblesz
452 ierr( 1 ) = 1
453 END IF
454*
455* Check all processes for an error
456*
457 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
458 $ 1, -1, 0 )
459*
460 IF( ierr( 1 ).GT.0 ) THEN
461 IF( iam.EQ.0 )
462 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
463 kskip = kskip + 1
464 GO TO 30
465 END IF
466*
467 CALL pdbmatgen( ictxt, 'G', 'D', bwl, bwu, n,
468 $ (bwl+bwu+1), nb, mem( ipa ),
469 $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
470 $ mycol, nprow, npcol )
471*
472 CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
473 $ ((bwl+bwu+1)+10), iprepad, ipostpad,
474 $ padval )
475*
476 CALL pdfillpad( ictxt, worksiz, 1,
477 $ mem( ip_driver_w-iprepad ), worksiz,
478 $ iprepad, ipostpad, padval )
479*
480* Calculate norm of A for residual error-checking
481*
482 IF( check ) THEN
483*
484 anorm = pdlange( '1', (bwl+bwu+1),
485 $ n, mem( ipa ), 1, 1,
486 $ desca2d, mem( ip_driver_w ) )
487 CALL pdchekpad( ictxt, 'PDLANGE', np, nq,
488 $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
489 $ iprepad, ipostpad, padval )
490 CALL pdchekpad( ictxt, 'PDLANGE',
491 $ worksiz, 1,
492 $ mem( ip_driver_w-iprepad ), worksiz,
493 $ iprepad, ipostpad, padval )
494 END IF
495*
496*
497 CALL slboot()
498 CALL blacs_barrier( ictxt, 'All' )
499*
500* Perform factorization
501*
502 CALL sltimer( 1 )
503*
504 CALL pddbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
505 $ mem( ip_fillin ), fillin_size, mem( ipw ),
506 $ ipw_size, info )
507*
508 CALL sltimer( 1 )
509*
510 IF( info.NE.0 ) THEN
511 IF( iam.EQ.0 ) THEN
512 WRITE( nout, fmt = * ) 'PDDBTRF INFO=', info
513 ENDIF
514 kfail = kfail + 1
515 GO TO 30
516 END IF
517*
518 IF( check ) THEN
519*
520* Check for memory overwrite in factorization
521*
522 CALL pdchekpad( ictxt, 'PDDBTRF', np,
523 $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
524 $ iprepad, ipostpad, padval )
525 END IF
526*
527*
528* Loop over the different values for NRHS
529*
530 DO 20 hh = 1, nnr
531*
532 ierr( 1 ) = 0
533*
534 nrhs = nrval( hh )
535*
536* Initialize Array Descriptor for rhs
537*
538 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
539 $ ictxtb, nb+10, ierr( 1 ) )
540*
541* Convert this to 1D descriptor
542*
543 descb( 1 ) = 502
544 descb( 3 ) = n
545 descb( 4 ) = nb
546 descb( 5 ) = 0
547 descb( 2 ) = ictxt
548 descb( 6 ) = descb2d( lld_ )
549 descb( 7 ) = 0
550*
551* reset free_ptr to reuse space for right hand sides
552*
553 IF( ipb .GT. 0 ) THEN
554 free_ptr = ipb
555 ENDIF
556*
557 free_ptr = free_ptr + iprepad
558 ipb = free_ptr
559 free_ptr = free_ptr + nrhs*descb2d( lld_ )
560 $ + ipostpad
561*
562* Allocate workspace for workspace in TRS routine:
563*
564 ipw_solve_size = (max(bwl,bwu)*nrhs)
565*
566 ipw_solve = free_ptr
567 free_ptr = free_ptr + ipw_solve_size
568*
569 ierr( 1 ) = 0
570 IF( free_ptr.GT.memsiz ) THEN
571 IF( iam.EQ.0 )
572 $ WRITE( nout, fmt = 9996 )'solve',
573 $ ( free_ptr )*dblesz
574 ierr( 1 ) = 1
575 END IF
576*
577* Check all processes for an error
578*
579 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
580 $ ierr, 1, -1, 0 )
581*
582 IF( ierr( 1 ).GT.0 ) THEN
583 IF( iam.EQ.0 )
584 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
585 kskip = kskip + 1
586 GO TO 15
587 END IF
588*
589 myrhs_size = numroc( n, nb, mycol, 0, npcol )
590*
591* Generate RHS
592*
593 CALL pdmatgen(ictxtb, 'No', 'No',
594 $ descb2d( m_ ), descb2d( n_ ),
595 $ descb2d( mb_ ), descb2d( nb_ ),
596 $ mem( ipb ),
597 $ descb2d( lld_ ), descb2d( rsrc_ ),
598 $ descb2d( csrc_ ),
599 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
600 $ myrow, npcol, nprow )
601*
602 IF( check ) THEN
603 CALL pdfillpad( ictxtb, nb, nrhs,
604 $ mem( ipb-iprepad ),
605 $ descb2d( lld_ ),
606 $ iprepad, ipostpad,
607 $ padval )
608 CALL pdfillpad( ictxt, worksiz, 1,
609 $ mem( ip_driver_w-iprepad ),
610 $ worksiz, iprepad,
611 $ ipostpad, padval )
612 END IF
613*
614*
615 CALL blacs_barrier( ictxt, 'All')
616 CALL sltimer( 2 )
617*
618* Solve linear system via factorization
619*
620 CALL pddbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
621 $ 1, desca, mem( ipb ), 1, descb,
622 $ mem( ip_fillin ), fillin_size,
623 $ mem( ipw_solve ), ipw_solve_size,
624 $ info )
625*
626 CALL sltimer( 2 )
627*
628 IF( info.NE.0 ) THEN
629 IF( iam.EQ.0 )
630 $ WRITE( nout, fmt = * ) 'PDDBTRS INFO=', info
631 kfail = kfail + 1
632 passed = 'FAILED'
633 GO TO 20
634 END IF
635*
636 IF( check ) THEN
637*
638* check for memory overwrite
639*
640 CALL pdchekpad( ictxt, 'PDDBTRS-work',
641 $ worksiz, 1,
642 $ mem( ip_driver_w-iprepad ),
643 $ worksiz, iprepad,
644 $ ipostpad, padval )
645*
646* check the solution to rhs
647*
648 sresid = zero
649*
650 CALL pddblaschk( 'N', 'D', trans,
651 $ n, bwl, bwu, nrhs,
652 $ mem( ipb ), 1, 1, descb2d,
653 $ iaseed, mem( ipa ), 1, 1, desca2d,
654 $ ibseed, anorm, sresid,
655 $ mem( ip_driver_w ), worksiz )
656*
657 IF( iam.EQ.0 ) THEN
658 IF( sresid.GT.thresh )
659 $ WRITE( nout, fmt = 9985 ) sresid
660 END IF
661*
662* The second test is a NaN trap
663*
664 IF( ( sresid.LE.thresh ).AND.
665 $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
666 kpass = kpass + 1
667 passed = 'PASSED'
668 ELSE
669 kfail = kfail + 1
670 passed = 'FAILED'
671 END IF
672*
673 END IF
674*
675 15 CONTINUE
676* Skipped tests jump to here to print out "SKIPPED"
677*
678* Gather maximum of all CPU and WALL clock timings
679*
680 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
681 $ wtime )
682 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
683 $ ctime )
684*
685* Print results
686*
687 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
688*
689 nops = 0
690 nops2 = 0
691*
692 n_first = nb
693 nprocs_real = ( n-1 )/nb + 1
694 n_last = mod( n-1, nb ) + 1
695*
696* 2 N bwl bwu + N (bwl) flops
697* for LU factorization
698*
699 nops = 2*(dble(n)*dble(bwl)*
700 $ dble(bwu)) +
701 $ (dble(n)*dble(bwl))
702*
703* nrhs * 2 N*(bwl+bwu) flops for LU solve.
704*
705 nops = nops +
706 $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
707 $ *dble(nrhs))
708*
709* Second calc to represent actual hardware speed
710*
711* 2*N_FIRST bwl*bwu Flops for LU
712* factorization in proc 1
713*
714 nops2 = 2*( (dble(n_first)*
715 $ dble(bwl)*dble(bwu)))
716*
717 IF ( nprocs_real .GT. 1) THEN
718* 8 N_LAST bwl*bwu
719* flops for LU and spike
720* calc in last processor
721*
722 nops2 = nops2 +
723 $ 8*( (dble(n_last)*dble(bwl)
724 $ *dble(bwu)) )
725 ENDIF
726*
727 IF ( nprocs_real .GT. 2) THEN
728* 8 NB bwl*bwu flops for LU and spike
729* calc in other processors
730*
731 nops2 = nops2 + (nprocs_real-2)*
732 $ 8*( (dble(nb)*dble(bwl)
733 $ *dble(bwu)) )
734 ENDIF
735*
736* Reduced system
737*
738 nops2 = nops2 +
739 $ 2*( nprocs_real-1 ) *
740 $ ( bwl*bwu*bwl/3 )
741 IF( nprocs_real .GT. 1 ) THEN
742 nops2 = nops2 +
743 $ 2*( nprocs_real-2 ) *
744 $ (2*bwl*bwu*bwl)
745 ENDIF
746*
747* Solve stage
748*
749* nrhs*2 n_first*
750* (bwl+bwu)
751* flops for L,U solve in proc 1.
752*
753 nops2 = nops2 +
754 $ 2*
755 $ dble(n_first)*
756 $ dble(nrhs) *
757 $ ( dble(bwl)+dble(bwu))
758*
759 IF ( nprocs_real .GT. 1 ) THEN
760*
761* 2*nrhs*2 n_last
762* (bwl+bwu)
763* flops for LU solve in other procs
764*
765 nops2 = nops2 +
766 $ 4*
767 $ (dble(n_last)*(dble(bwl)+
768 $ dble(bwu)))*dble(nrhs)
769 ENDIF
770*
771 IF ( nprocs_real .GT. 2 ) THEN
772*
773* 2*nrhs*2 NB
774* (bwl+bwu)
775* flops for LU solve in other procs
776*
777 nops2 = nops2 +
778 $ ( nprocs_real-2)*2*
779 $ ( (dble(nb)*(dble(bwl)+
780 $ dble(bwu)))*dble(nrhs) )
781 ENDIF
782*
783* Reduced system
784*
785 nops2 = nops2 +
786 $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
787 IF( nprocs_real .GT. 1 ) THEN
788 nops2 = nops2 +
789 $ nrhs*( nprocs_real-2 ) *
790 $ ( 6 * bwl*bwu )
791 ENDIF
792*
793*
794* Calculate total megaflops - factorization and/or
795* solve -- for WALL and CPU time, and print output
796*
797* Print WALL time if machine supports it
798*
799 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
800 tmflops = nops /
801 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
802 ELSE
803 tmflops = 0.0d+0
804 END IF
805*
806 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
807 tmflops2 = nops2 /
808 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
809 ELSE
810 tmflops2 = 0.0d+0
811 END IF
812*
813 IF( wtime( 2 ).GE.0.0d+0 )
814 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
815 $ n,
816 $ bwl, bwu,
817 $ nb, nrhs, nprow, npcol,
818 $ wtime( 1 ), wtime( 2 ), tmflops,
819 $ tmflops2, passed
820*
821* Print CPU time if machine supports it
822*
823 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
824 tmflops = nops /
825 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
826 ELSE
827 tmflops = 0.0d+0
828 END IF
829*
830 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
831 tmflops2 = nops2 /
832 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
833 ELSE
834 tmflops2 = 0.0d+0
835 END IF
836*
837 IF( ctime( 2 ).GE.0.0d+0 )
838 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
839 $ n,
840 $ bwl, bwu,
841 $ nb, nrhs, nprow, npcol,
842 $ ctime( 1 ), ctime( 2 ), tmflops,
843 $ tmflops2, passed
844*
845 END IF
846 20 CONTINUE
847*
848*
849 30 CONTINUE
850* NNB loop
851*
852 45 CONTINUE
853* BW[] loop
854*
855 40 CONTINUE
856* NMAT loop
857*
858 CALL blacs_gridexit( ictxt )
859 CALL blacs_gridexit( ictxtb )
860*
861 50 CONTINUE
862* NGRIDS DROPOUT
863 60 CONTINUE
864* NGRIDS loop
865*
866* Print ending messages and close output file
867*
868 IF( iam.EQ.0 ) THEN
869 ktests = kpass + kfail + kskip
870 WRITE( nout, fmt = * )
871 WRITE( nout, fmt = 9992 ) ktests
872 IF( check ) THEN
873 WRITE( nout, fmt = 9991 ) kpass
874 WRITE( nout, fmt = 9989 ) kfail
875 ELSE
876 WRITE( nout, fmt = 9990 ) kpass
877 END IF
878 WRITE( nout, fmt = 9988 ) kskip
879 WRITE( nout, fmt = * )
880 WRITE( nout, fmt = * )
881 WRITE( nout, fmt = 9987 )
882 IF( nout.NE.6 .AND. nout.NE.0 )
883 $ CLOSE ( nout )
884 END IF
885*
886 CALL blacs_exit( 0 )
887*
888 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
889 $ '; It should be at least 1' )
890 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
891 $ i4 )
892 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
893 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
894 $ i11 )
895 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
896 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
897 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
898 $ '-------- -------- -------- ------' )
899 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
900 $ 1x,i4,1x,i4,1x,f9.3,
901 $ f9.4, f9.2, f9.2, 1x, a6 )
902 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
903 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
904 9990 FORMAT( i5, ' tests completed without checking.' )
905 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
906 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
907 9987 FORMAT( 'END OF TESTS.' )
908 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
909 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
910*
911 stop
912*
913* End of PDDBTRS_DRIVER
914*
915 END
916*
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pdbmatgen.f:5
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
program pddbdriver
Definition pddbdriver.f:1
subroutine pddbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pddbinfo.f:6
subroutine pddblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pddblaschk.f:4
subroutine pddbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
Definition pddbtrf.f:3
subroutine pddbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pddbtrs.f:3
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724