SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdgbdriver.f
Go to the documentation of this file.
1 PROGRAM pdgbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PDGBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by DGB. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTMEM INTEGER, default = 2048.
55* INTMEM is the size of the integer workspace used in this
56* driver as input as the IPIV vector. It represents an
57* upper bound on NB, the blocksize of the data
58* distribution.
59*
60* INTGSZ INTEGER, default = 4 bytes.
61* DBLESZ INTEGER, default = 8 bytes.
62* INTGSZ and DBLESZ indicate the length in bytes on the
63* given platform for an integer and a double precision real.
64* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
65* All arrays used by ScaLAPACK routines are allocated from
66* this array and referenced by pointers. The integer IPB,
67* for example, is a pointer to the starting element of MEM for
68* the solution vector(s) B.
69*
70* =====================================================================
71*
72* Code Developer: Andrew J. Cleary, University of Tennessee.
73* Current address: Lawrence Livermore National Labs.
74* This version released: August, 2001.
75*
76* =====================================================================
77*
78* .. Parameters ..
79 INTEGER totmem
80 parameter( totmem = 3000000 )
81 INTEGER intmem
82 parameter( intmem = 2048 )
83 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
84 $ lld_, mb_, m_, nb_, n_, rsrc_
85 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
86 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
87 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
88*
89 DOUBLE PRECISION zero
90 INTEGER dblesz, memsiz, ntests
91 DOUBLE PRECISION padval
92 parameter( dblesz = 8,
93 $ memsiz = totmem / dblesz, ntests = 20,
94 $ padval = -9923.0d+0, zero = 0.0d+0 )
95 INTEGER int_one
96 parameter( int_one = 1 )
97* ..
98* .. Local Scalars ..
99 LOGICAL check
100 CHARACTER trans
101 CHARACTER*6 passed
102 CHARACTER*80 outfile
103 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
104 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
105 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
106 $ iprepad, ipw, ipw_size, ipw_solve,
107 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
108 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
109 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
110 $ nnr, nout, np, npcol, nprocs, nprocs_real,
111 $ nprow, nq, nrhs, n_first, n_last, worksiz
112 REAL thresh
113 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
114 $ tmflops2
115* ..
116* .. Local Arrays ..
117 INTEGER ipiv(intmem)
118 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
119 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
120 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
121 $ nrval( ntests ), nval( ntests ),
122 $ pval( ntests ), qval( ntests )
123 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
124* ..
125* .. External Subroutines ..
126 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
127 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
128 $ blacs_pinfo, descinit, igsum2d, pdbmatgen,
131 $ sltimer
132* ..
133* .. External Functions ..
134 INTEGER numroc
135 LOGICAL lsame
136 DOUBLE PRECISION pdlange
137 EXTERNAL lsame, numroc, pdlange
138* ..
139* .. Intrinsic Functions ..
140 INTRINSIC dble, max, min, mod
141* ..
142* .. Data Statements ..
143 DATA kfail, kpass, kskip, ktests / 4*0 /
144* ..
145*
146*
147*
148* .. Executable Statements ..
149*
150* Get starting information
151*
152 CALL blacs_pinfo( iam, nprocs )
153 iaseed = 100
154 ibseed = 200
155*
156 CALL pdgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
157 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
158 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
159 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
160*
161 check = ( thresh.GE.0.0d+0 )
162*
163* Print headings
164*
165 IF( iam.EQ.0 ) THEN
166 WRITE( nout, fmt = * )
167 WRITE( nout, fmt = 9995 )
168 WRITE( nout, fmt = 9994 )
169 WRITE( nout, fmt = * )
170 END IF
171*
172* Loop over different process grids
173*
174 DO 60 i = 1, ngrids
175*
176 nprow = pval( i )
177 npcol = qval( i )
178*
179* Make sure grid information is correct
180*
181 ierr( 1 ) = 0
182 IF( nprow.LT.1 ) THEN
183 IF( iam.EQ.0 )
184 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
185 ierr( 1 ) = 1
186 ELSE IF( npcol.LT.1 ) THEN
187 IF( iam.EQ.0 )
188 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
189 ierr( 1 ) = 1
190 ELSE IF( nprow*npcol.GT.nprocs ) THEN
191 IF( iam.EQ.0 )
192 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
193 ierr( 1 ) = 1
194 END IF
195*
196 IF( ierr( 1 ).GT.0 ) THEN
197 IF( iam.EQ.0 )
198 $ WRITE( nout, fmt = 9997 ) 'grid'
199 kskip = kskip + 1
200 GO TO 50
201 END IF
202*
203* Define process grid
204*
205 CALL blacs_get( -1, 0, ictxt )
206 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
207*
208*
209* Define transpose process grid
210*
211 CALL blacs_get( -1, 0, ictxtb )
212 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
213*
214* Go to bottom of process grid loop if this case doesn't use my
215* process
216*
217 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
218*
219 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
220 GO TO 50
221 ENDIF
222*
223 DO 40 j = 1, nmat
224*
225 ierr( 1 ) = 0
226*
227 n = nval( j )
228*
229* Make sure matrix information is correct
230*
231 IF( n.LT.1 ) THEN
232 IF( iam.EQ.0 )
233 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
234 ierr( 1 ) = 1
235 END IF
236*
237* Check all processes for an error
238*
239 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
240 $ -1, 0 )
241*
242 IF( ierr( 1 ).GT.0 ) THEN
243 IF( iam.EQ.0 )
244 $ WRITE( nout, fmt = 9997 ) 'size'
245 kskip = kskip + 1
246 GO TO 40
247 END IF
248*
249*
250 DO 45 bw_num = 1, nbw
251*
252 ierr( 1 ) = 0
253*
254 bwl = bwlval( bw_num )
255 IF( bwl.LT.1 ) THEN
256 IF( iam.EQ.0 )
257 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
258 ierr( 1 ) = 1
259 END IF
260*
261 bwu = bwuval( bw_num )
262 IF( bwu.LT.1 ) THEN
263 IF( iam.EQ.0 )
264 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
265 ierr( 1 ) = 1
266 END IF
267*
268 IF( bwl.GT.n-1 ) THEN
269 IF( iam.EQ.0 ) THEN
270 ierr( 1 ) = 1
271 ENDIF
272 END IF
273*
274 IF( bwu.GT.n-1 ) THEN
275 IF( iam.EQ.0 ) THEN
276 ierr( 1 ) = 1
277 ENDIF
278 END IF
279*
280* Check all processes for an error
281*
282 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
283 $ -1, 0 )
284*
285 IF( ierr( 1 ).GT.0 ) THEN
286 kskip = kskip + 1
287 GO TO 45
288 END IF
289*
290 DO 30 k = 1, nnb
291*
292 ierr( 1 ) = 0
293*
294 nb = nbval( k )
295 IF( nb.LT.0 ) THEN
296 nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
297 $ + (bwl+bwu)
298 nb = max( nb, 2*(bwl+bwu) )
299 nb = min( n, nb )
300 END IF
301*
302* Make sure NB is legal
303*
304 ierr( 1 ) = 0
305*
306 IF( nb.GT.intmem ) THEN
307 ierr( 1 ) = 1
308 IF( iam.EQ.0 ) THEN
309 WRITE( nout,* )'You have chosen an '
310 $ ,'NB > INTMEM in the driver.'
311 WRITE(nout, *)'Please edit the driver '
312 $ ,'and increase the value of INTMEM'
313 ENDIF
314 END IF
315*
316* Check all processes for an error
317*
318 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
319 $ -1, 0 )
320*
321 IF( ierr( 1 ).GT.0 ) THEN
322 kskip = kskip + 1
323 GO TO 30
324 END IF
325*
326* Padding constants
327*
328 np = numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
329 $ myrow, 0, nprow )
330 nq = numroc( n, nb, mycol, 0, npcol )
331*
332 IF( check ) THEN
333 iprepad = ((2*bwl+2*bwu+1)+10)
334 imidpad = 10
335 ipostpad = ((2*bwl+2*bwu+1)+10)
336 ELSE
337 iprepad = 0
338 imidpad = 0
339 ipostpad = 0
340 END IF
341*
342* Initialize the array descriptor for the matrix A
343*
344 CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
345 $ (2*bwl+2*bwu+1), nb, 0, 0,
346 $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
347*
348* Convert this to 1D descriptor
349*
350 desca( 1 ) = 501
351 desca( 3 ) = n
352 desca( 4 ) = nb
353 desca( 5 ) = 0
354 desca( 2 ) = ictxt
355 desca( 6 ) = ((2*bwl+2*bwu+1)+10)
356 desca( 7 ) = 0
357*
358 ierr_temp = ierr( 1 )
359 ierr( 1 ) = 0
360 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
361*
362* Check all processes for an error
363*
364 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
365*
366 IF( ierr( 1 ).LT.0 ) THEN
367 IF( iam.EQ.0 )
368 $ WRITE( nout, fmt = 9997 ) 'descriptor'
369 kskip = kskip + 1
370 GO TO 30
371 END IF
372*
373* Assign pointers into MEM for SCALAPACK arrays, A is
374* allocated starting at position MEM( IPREPAD+1 )
375*
376 free_ptr = 1
377 ipb = 0
378*
379* Save room for prepadding
380 free_ptr = free_ptr + iprepad
381*
382 ipa = free_ptr
383 free_ptr = free_ptr + desca2d( lld_ )*
384 $ desca2d( nb_ )
385 $ + ipostpad
386*
387* Add memory for fillin
388* Fillin space needs to store:
389* Fillin spike:
390* Contribution to previous proc's diagonal block of
391* reduced system:
392* Off-diagonal block of reduced system:
393* Diagonal block of reduced system:
394*
395 fillin_size =
396 $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
397*
398* Claim memory for fillin
399*
400 free_ptr = free_ptr + iprepad
401 ip_fillin = free_ptr
402 free_ptr = free_ptr + fillin_size
403*
404* Workspace needed by computational routines:
405*
406 ipw_size = 0
407*
408* factorization:
409*
410 ipw_size = 1
411*
412* Claim memory for IPW
413*
414 ipw = free_ptr
415 free_ptr = free_ptr + ipw_size
416*
417* Check for adequate memory for problem size
418*
419 ierr( 1 ) = 0
420 IF( free_ptr.GT.memsiz ) THEN
421 IF( iam.EQ.0 )
422 $ WRITE( nout, fmt = 9996 )
423 $ 'divide and conquer factorization',
424 $ (free_ptr )*dblesz
425 ierr( 1 ) = 1
426 END IF
427*
428* Check all processes for an error
429*
430 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
431 $ 1, -1, 0 )
432*
433 IF( ierr( 1 ).GT.0 ) THEN
434 IF( iam.EQ.0 )
435 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
436 kskip = kskip + 1
437 GO TO 30
438 END IF
439*
440* Worksize needed for LAPRNT
441 worksiz = max( ((2*bwl+2*bwu+1)+10), nb )
442*
443 IF( check ) THEN
444*
445* Calculate the amount of workspace required by
446* the checking routines.
447*
448* PDLANGE
449 worksiz = max( worksiz, desca2d( nb_ ) )
450*
451* PDDBLASCHK
452 worksiz = max( worksiz,
453 $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
454 END IF
455*
456 free_ptr = free_ptr + iprepad
457 ip_driver_w = free_ptr
458 free_ptr = free_ptr + worksiz + ipostpad
459*
460*
461* Check for adequate memory for problem size
462*
463 ierr( 1 ) = 0
464 IF( free_ptr.GT.memsiz ) THEN
465 IF( iam.EQ.0 )
466 $ WRITE( nout, fmt = 9996 ) 'factorization',
467 $ ( free_ptr )*dblesz
468 ierr( 1 ) = 1
469 END IF
470*
471* Check all processes for an error
472*
473 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
474 $ 1, -1, 0 )
475*
476 IF( ierr( 1 ).GT.0 ) THEN
477 IF( iam.EQ.0 )
478 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
479 kskip = kskip + 1
480 GO TO 30
481 END IF
482*
483 CALL pdbmatgen( ictxt, 'G', 'N', bwl, bwu, n,
484 $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
485 $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
486 $ myrow, mycol, nprow, npcol )
487*
488 CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
489 $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
490 $ padval )
491*
492 CALL pdfillpad( ictxt, worksiz, 1,
493 $ mem( ip_driver_w-iprepad ), worksiz,
494 $ iprepad, ipostpad, padval )
495*
496* Calculate norm of A for residual error-checking
497*
498 IF( check ) THEN
499*
500 anorm = pdlange( '1', (2*bwl+2*bwu+1),
501 $ n, mem( ipa ), 1, 1,
502 $ desca2d, mem( ip_driver_w ) )
503 CALL pdchekpad( ictxt, 'PDLANGE', np, nq,
504 $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
505 $ iprepad, ipostpad, padval )
506 CALL pdchekpad( ictxt, 'PDLANGE',
507 $ worksiz, 1,
508 $ mem( ip_driver_w-iprepad ), worksiz,
509 $ iprepad, ipostpad, padval )
510 END IF
511*
512*
513 CALL slboot()
514 CALL blacs_barrier( ictxt, 'All' )
515*
516* Perform factorization
517*
518 CALL sltimer( 1 )
519*
520 CALL pdgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
521 $ mem( ip_fillin ), fillin_size, mem( ipw ),
522 $ ipw_size, info )
523*
524 CALL sltimer( 1 )
525*
526 IF( info.NE.0 ) THEN
527 IF( iam.EQ.0 ) THEN
528 WRITE( nout, fmt = * ) 'PDGBTRF INFO=', info
529 ENDIF
530 kfail = kfail + 1
531 GO TO 30
532 END IF
533*
534 IF( check ) THEN
535*
536* Check for memory overwrite in factorization
537*
538 CALL pdchekpad( ictxt, 'PDGBTRF', np,
539 $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
540 $ iprepad, ipostpad, padval )
541 END IF
542*
543*
544* Loop over the different values for NRHS
545*
546 DO 20 hh = 1, nnr
547*
548 ierr( 1 ) = 0
549*
550 nrhs = nrval( hh )
551*
552* Initialize Array Descriptor for rhs
553*
554 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
555 $ ictxtb, nb+10, ierr( 1 ) )
556*
557* Convert this to 1D descriptor
558*
559 descb( 1 ) = 502
560 descb( 3 ) = n
561 descb( 4 ) = nb
562 descb( 5 ) = 0
563 descb( 2 ) = ictxt
564 descb( 6 ) = descb2d( lld_ )
565 descb( 7 ) = 0
566*
567* reset free_ptr to reuse space for right hand sides
568*
569 IF( ipb .GT. 0 ) THEN
570 free_ptr = ipb
571 ENDIF
572*
573 free_ptr = free_ptr + iprepad
574 ipb = free_ptr
575 free_ptr = free_ptr + nrhs*descb2d( lld_ )
576 $ + ipostpad
577*
578* Allocate workspace for workspace in TRS routine:
579*
580 ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
581*
582 ipw_solve = free_ptr
583 free_ptr = free_ptr + ipw_solve_size
584*
585 ierr( 1 ) = 0
586 IF( free_ptr.GT.memsiz ) THEN
587 IF( iam.EQ.0 )
588 $ WRITE( nout, fmt = 9996 )'solve',
589 $ ( free_ptr )*dblesz
590 ierr( 1 ) = 1
591 END IF
592*
593* Check all processes for an error
594*
595 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
596 $ ierr, 1, -1, 0 )
597*
598 IF( ierr( 1 ).GT.0 ) THEN
599 IF( iam.EQ.0 )
600 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
601 kskip = kskip + 1
602 GO TO 15
603 END IF
604*
605 myrhs_size = numroc( n, nb, mycol, 0, npcol )
606*
607* Generate RHS
608*
609 CALL pdmatgen(ictxtb, 'No', 'No',
610 $ descb2d( m_ ), descb2d( n_ ),
611 $ descb2d( mb_ ), descb2d( nb_ ),
612 $ mem( ipb ),
613 $ descb2d( lld_ ), descb2d( rsrc_ ),
614 $ descb2d( csrc_ ),
615 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
616 $ myrow, npcol, nprow )
617*
618 IF( check ) THEN
619 CALL pdfillpad( ictxtb, nb, nrhs,
620 $ mem( ipb-iprepad ),
621 $ descb2d( lld_ ),
622 $ iprepad, ipostpad,
623 $ padval )
624 CALL pdfillpad( ictxt, worksiz, 1,
625 $ mem( ip_driver_w-iprepad ),
626 $ worksiz, iprepad,
627 $ ipostpad, padval )
628 END IF
629*
630*
631 CALL blacs_barrier( ictxt, 'All')
632 CALL sltimer( 2 )
633*
634* Solve linear system via factorization
635*
636 CALL pdgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
637 $ 1, desca, ipiv, mem( ipb ), 1, descb,
638 $ mem( ip_fillin ), fillin_size,
639 $ mem( ipw_solve ), ipw_solve_size,
640 $ info )
641*
642 CALL sltimer( 2 )
643*
644 IF( info.NE.0 ) THEN
645 IF( iam.EQ.0 )
646 $ WRITE( nout, fmt = * ) 'PDGBTRS INFO=', info
647 kfail = kfail + 1
648 passed = 'FAILED'
649 GO TO 20
650 END IF
651*
652 IF( check ) THEN
653*
654* check for memory overwrite
655*
656 CALL pdchekpad( ictxt, 'PDGBTRS-work',
657 $ worksiz, 1,
658 $ mem( ip_driver_w-iprepad ),
659 $ worksiz, iprepad,
660 $ ipostpad, padval )
661*
662* check the solution to rhs
663*
664 sresid = zero
665*
666 CALL pddblaschk( 'N', 'N', trans,
667 $ n, bwl, bwu, nrhs,
668 $ mem( ipb ), 1, 1, descb2d,
669 $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
670 $ ibseed, anorm, sresid,
671 $ mem( ip_driver_w ), worksiz )
672*
673 IF( iam.EQ.0 ) THEN
674 IF( sresid.GT.thresh )
675 $ WRITE( nout, fmt = 9985 ) sresid
676 END IF
677*
678* The second test is a NaN trap
679*
680 IF( ( sresid.LE.thresh ).AND.
681 $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
682 kpass = kpass + 1
683 passed = 'PASSED'
684 ELSE
685 kfail = kfail + 1
686 passed = 'FAILED'
687 END IF
688*
689 END IF
690*
691 15 CONTINUE
692* Skipped tests jump to here to print out "SKIPPED"
693*
694* Gather maximum of all CPU and WALL clock timings
695*
696 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
697 $ wtime )
698 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
699 $ ctime )
700*
701* Print results
702*
703 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
704*
705 nops = 0
706 nops2 = 0
707*
708 n_first = nb
709 nprocs_real = ( n-1 )/nb + 1
710 n_last = mod( n-1, nb ) + 1
711*
712* 2 N bwl (bwl+bwu) + N (bwl) flops
713* for LU factorization
714*
715 nops = 2*(dble(n)*dble(bwl)*
716 $ dble((bwl+bwu))) +
717 $ (dble(n)*dble(bwl))
718*
719* nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
720*
721 nops = nops +
722 $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
723 $ *dble(nrhs))
724*
725* Second calc to represent actual hardware speed
726*
727* 2*N_FIRST (bwl+bwu)*bwu Flops for LU
728* factorization in proc 1
729*
730 nops2 = 2*( (dble(n_first)*
731 $ dble((bwl+bwu))*dble(bwu)))
732*
733 IF ( nprocs_real .GT. 1) THEN
734* 8 N_LAST (bwl+bwu)*(bwl+bwu)
735* flops for LU and spike
736* calc in last processor
737*
738 nops2 = nops2 +
739 $ 8*( (dble(n_last)*dble((bwl+bwu))
740 $ *dble((bwl+bwu))) )
741 ENDIF
742*
743 IF ( nprocs_real .GT. 2) THEN
744* 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike
745* calc in other processors
746*
747 nops2 = nops2 + (nprocs_real-2)*
748 $ 8*( (dble(nb)*dble((bwl+bwu))
749 $ *dble((bwl+bwu))) )
750 ENDIF
751*
752* Reduced system
753*
754 nops2 = nops2 +
755 $ 2*( nprocs_real-1 ) *
756 $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
757 IF( nprocs_real .GT. 1 ) THEN
758 nops2 = nops2 +
759 $ 2*( nprocs_real-2 ) *
760 $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
761 ENDIF
762*
763* Solve stage
764*
765* nrhs*2 n_first*
766* (bwl+(bwl+bwu))
767* flops for L,U solve in proc 1.
768*
769 nops2 = nops2 +
770 $ 2*
771 $ dble(n_first)*
772 $ dble(nrhs) *
773 $ ( dble(bwl)+dble((bwl+bwu)))
774*
775 IF ( nprocs_real .GT. 1 ) THEN
776*
777* 2*nrhs*2 n_last
778* ((bwl+bwu)+(bwl+bwu))
779* flops for LU solve in other procs
780*
781 nops2 = nops2 +
782 $ 4*
783 $ (dble(n_last)*(dble((bwl+bwu))+
784 $ dble((bwl+bwu))))*dble(nrhs)
785 ENDIF
786*
787 IF ( nprocs_real .GT. 2 ) THEN
788*
789* 2*nrhs*2 NB
790* ((bwl+bwu)+(bwl+bwu))
791* flops for LU solve in other procs
792*
793 nops2 = nops2 +
794 $ ( nprocs_real-2)*2*
795 $ ( (dble(nb)*(dble((bwl+bwu))+
796 $ dble((bwl+bwu))))*dble(nrhs) )
797 ENDIF
798*
799* Reduced system
800*
801 nops2 = nops2 +
802 $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
803 IF( nprocs_real .GT. 1 ) THEN
804 nops2 = nops2 +
805 $ nrhs*( nprocs_real-2 ) *
806 $ ( 6 * (bwl+bwu)*(bwl+bwu) )
807 ENDIF
808*
809*
810* Calculate total megaflops - factorization and/or
811* solve -- for WALL and CPU time, and print output
812*
813* Print WALL time if machine supports it
814*
815 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
816 tmflops = nops /
817 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
818 ELSE
819 tmflops = 0.0d+0
820 END IF
821*
822 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
823 tmflops2 = nops2 /
824 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
825 ELSE
826 tmflops2 = 0.0d+0
827 END IF
828*
829 IF( wtime( 2 ).GE.0.0d+0 )
830 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
831 $ n,
832 $ bwl, bwu,
833 $ nb, nrhs, nprow, npcol,
834 $ wtime( 1 ), wtime( 2 ), tmflops,
835 $ tmflops2, passed
836*
837* Print CPU time if machine supports it
838*
839 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
840 tmflops = nops /
841 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
842 ELSE
843 tmflops = 0.0d+0
844 END IF
845*
846 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
847 tmflops2 = nops2 /
848 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
849 ELSE
850 tmflops2 = 0.0d+0
851 END IF
852*
853 IF( ctime( 2 ).GE.0.0d+0 )
854 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
855 $ n,
856 $ bwl, bwu,
857 $ nb, nrhs, nprow, npcol,
858 $ ctime( 1 ), ctime( 2 ), tmflops,
859 $ tmflops2, passed
860*
861 END IF
862 20 CONTINUE
863*
864*
865 30 CONTINUE
866* NNB loop
867*
868 45 CONTINUE
869* BW[] loop
870*
871 40 CONTINUE
872* NMAT loop
873*
874 CALL blacs_gridexit( ictxt )
875 CALL blacs_gridexit( ictxtb )
876*
877 50 CONTINUE
878* NGRIDS DROPOUT
879 60 CONTINUE
880* NGRIDS loop
881*
882* Print ending messages and close output file
883*
884 IF( iam.EQ.0 ) THEN
885 ktests = kpass + kfail + kskip
886 WRITE( nout, fmt = * )
887 WRITE( nout, fmt = 9992 ) ktests
888 IF( check ) THEN
889 WRITE( nout, fmt = 9991 ) kpass
890 WRITE( nout, fmt = 9989 ) kfail
891 ELSE
892 WRITE( nout, fmt = 9990 ) kpass
893 END IF
894 WRITE( nout, fmt = 9988 ) kskip
895 WRITE( nout, fmt = * )
896 WRITE( nout, fmt = * )
897 WRITE( nout, fmt = 9987 )
898 IF( nout.NE.6 .AND. nout.NE.0 )
899 $ CLOSE ( nout )
900 END IF
901*
902 CALL blacs_exit( 0 )
903*
904 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
905 $ '; It should be at least 1' )
906 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
907 $ i4 )
908 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
909 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
910 $ i11 )
911 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
912 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
913 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
914 $ '-------- -------- -------- ------' )
915 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
916 $ 1x,i4,1x,i4,1x,f9.3,
917 $ f9.4, f9.2, f9.2, 1x, a6 )
918 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
919 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
920 9990 FORMAT( i5, ' tests completed without checking.' )
921 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
922 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
923 9987 FORMAT( 'END OF TESTS.' )
924 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
925 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
926*
927 stop
928*
929* End of PDGBTRS_DRIVER
930*
931 END
932*
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pdbmatgen.f:5
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
subroutine pddblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pddblaschk.f:4
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
program pdgbdriver
Definition pdgbdriver.f:1
subroutine pdgbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pdgbinfo.f:6
subroutine pdgbtrf(n, bwl, bwu, a, ja, desca, ipiv, af, laf, work, lwork, info)
Definition pdgbtrf.f:3
subroutine pdgbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, ipiv, b, ib, descb, af, laf, work, lwork, info)
Definition pdgbtrs.f:3
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724