SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcdbdriver.f
Go to the documentation of this file.
1 PROGRAM pcdbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PCDBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by CDB. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* CPLXSZ INTEGER, default = 8 bytes.
56* INTGSZ and CPLXSZ indicate the length in bytes on the
57* given platform for an integer and a single precision
58* complex.
59* MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60* All arrays used by ScaLAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPB,
62* for example, is a pointer to the starting element of MEM for
63* the solution vector(s) B.
64*
65* =====================================================================
66*
67* Code Developer: Andrew J. Cleary, University of Tennessee.
68* Current address: Lawrence Livermore National Labs.
69* This version released: August, 2001.
70*
71* =====================================================================
72*
73* .. Parameters ..
74 INTEGER totmem
75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81*
82 REAL zero
83 INTEGER cplxsz, memsiz, ntests
84 COMPLEX padval
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
88 $ zero = 0.0e+0 )
89 INTEGER int_one
90 parameter( int_one = 1 )
91* ..
92* .. Local Scalars ..
93 LOGICAL check
94 CHARACTER trans
95 CHARACTER*6 passed
96 CHARACTER*80 outfile
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108* ..
109* .. Local Arrays ..
110 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
111 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
112 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
117* ..
118* .. External Subroutines ..
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121 $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
125* ..
126* .. External Functions ..
127 INTEGER numroc
128 LOGICAL lsame
129 REAL pclange
130 EXTERNAL lsame, numroc, pclange
131* ..
132* .. Intrinsic Functions ..
133 INTRINSIC dble, max, min, mod
134* ..
135* .. Data Statements ..
136 DATA kfail, kpass, kskip, ktests / 4*0 /
137* ..
138*
139*
140*
141* .. Executable Statements ..
142*
143* Get starting information
144*
145 CALL blacs_pinfo( iam, nprocs )
146 iaseed = 100
147 ibseed = 200
148*
149 CALL pcdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
153*
154 check = ( thresh.GE.0.0e+0 )
155*
156* Print headings
157*
158 IF( iam.EQ.0 ) THEN
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
163 END IF
164*
165* Loop over different process grids
166*
167 DO 60 i = 1, ngrids
168*
169 nprow = pval( i )
170 npcol = qval( i )
171*
172* Make sure grid information is correct
173*
174 ierr( 1 ) = 0
175 IF( nprow.LT.1 ) THEN
176 IF( iam.EQ.0 )
177 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178 ierr( 1 ) = 1
179 ELSE IF( npcol.LT.1 ) THEN
180 IF( iam.EQ.0 )
181 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
182 ierr( 1 ) = 1
183 ELSE IF( nprow*npcol.GT.nprocs ) THEN
184 IF( iam.EQ.0 )
185 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
186 ierr( 1 ) = 1
187 END IF
188*
189 IF( ierr( 1 ).GT.0 ) THEN
190 IF( iam.EQ.0 )
191 $ WRITE( nout, fmt = 9997 ) 'grid'
192 kskip = kskip + 1
193 GO TO 50
194 END IF
195*
196* Define process grid
197*
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
200*
201*
202* Define transpose process grid
203*
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
206*
207* Go to bottom of process grid loop if this case doesn't use my
208* process
209*
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
211*
212 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
213 GO TO 50
214 ENDIF
215*
216 DO 40 j = 1, nmat
217*
218 ierr( 1 ) = 0
219*
220 n = nval( j )
221*
222* Make sure matrix information is correct
223*
224 IF( n.LT.1 ) THEN
225 IF( iam.EQ.0 )
226 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
227 ierr( 1 ) = 1
228 END IF
229*
230* Check all processes for an error
231*
232 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
233 $ -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'size'
238 kskip = kskip + 1
239 GO TO 40
240 END IF
241*
242*
243 DO 45 bw_num = 1, nbw
244*
245 ierr( 1 ) = 0
246*
247 bwl = bwlval( bw_num )
248 IF( bwl.LT.1 ) THEN
249 IF( iam.EQ.0 )
250 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
251 ierr( 1 ) = 1
252 END IF
253*
254 bwu = bwuval( bw_num )
255 IF( bwu.LT.1 ) THEN
256 IF( iam.EQ.0 )
257 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
258 ierr( 1 ) = 1
259 END IF
260*
261 IF( bwl.GT.n-1 ) THEN
262 IF( iam.EQ.0 ) THEN
263 ierr( 1 ) = 1
264 ENDIF
265 END IF
266*
267 IF( bwu.GT.n-1 ) THEN
268 IF( iam.EQ.0 ) THEN
269 ierr( 1 ) = 1
270 ENDIF
271 END IF
272*
273* Check all processes for an error
274*
275 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
276 $ -1, 0 )
277*
278 IF( ierr( 1 ).GT.0 ) THEN
279 kskip = kskip + 1
280 GO TO 45
281 END IF
282*
283 DO 30 k = 1, nnb
284*
285 ierr( 1 ) = 0
286*
287 nb = nbval( k )
288 IF( nb.LT.0 ) THEN
289 nb =( (n-(npcol-1)*max(bwl,bwu)-1)/npcol + 1 )
290 $ + max(bwl,bwu)
291 nb = max( nb, 2*max(bwl,bwu) )
292 nb = min( n, nb )
293 END IF
294*
295* Make sure NB is legal
296*
297 ierr( 1 ) = 0
298 IF( nb.LT.min( 2*max(bwl,bwu), n ) ) THEN
299 ierr( 1 ) = 1
300 END IF
301*
302* Check all processes for an error
303*
304 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
305 $ -1, 0 )
306*
307 IF( ierr( 1 ).GT.0 ) THEN
308 kskip = kskip + 1
309 GO TO 30
310 END IF
311*
312* Padding constants
313*
314 np = numroc( (bwl+bwu+1), (bwl+bwu+1),
315 $ myrow, 0, nprow )
316 nq = numroc( n, nb, mycol, 0, npcol )
317*
318 IF( check ) THEN
319 iprepad = ((bwl+bwu+1)+10)
320 imidpad = 10
321 ipostpad = ((bwl+bwu+1)+10)
322 ELSE
323 iprepad = 0
324 imidpad = 0
325 ipostpad = 0
326 END IF
327*
328* Initialize the array descriptor for the matrix A
329*
330 CALL descinit( desca2d, (bwl+bwu+1), n,
331 $ (bwl+bwu+1), nb, 0, 0,
332 $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
333*
334* Convert this to 1D descriptor
335*
336 desca( 1 ) = 501
337 desca( 3 ) = n
338 desca( 4 ) = nb
339 desca( 5 ) = 0
340 desca( 2 ) = ictxt
341 desca( 6 ) = ((bwl+bwu+1)+10)
342 desca( 7 ) = 0
343*
344 ierr_temp = ierr( 1 )
345 ierr( 1 ) = 0
346 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
347*
348* Check all processes for an error
349*
350 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
351*
352 IF( ierr( 1 ).LT.0 ) THEN
353 IF( iam.EQ.0 )
354 $ WRITE( nout, fmt = 9997 ) 'descriptor'
355 kskip = kskip + 1
356 GO TO 30
357 END IF
358*
359* Assign pointers into MEM for SCALAPACK arrays, A is
360* allocated starting at position MEM( IPREPAD+1 )
361*
362 free_ptr = 1
363 ipb = 0
364*
365* Save room for prepadding
366 free_ptr = free_ptr + iprepad
367*
368 ipa = free_ptr
369 free_ptr = free_ptr + desca2d( lld_ )*
370 $ desca2d( nb_ )
371 $ + ipostpad
372*
373* Add memory for fillin
374* Fillin space needs to store:
375* Fillin spike:
376* Contribution to previous proc's diagonal block of
377* reduced system:
378* Off-diagonal block of reduced system:
379* Diagonal block of reduced system:
380*
381 fillin_size =
382 $ nb*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
383*
384* Claim memory for fillin
385*
386 free_ptr = free_ptr + iprepad
387 ip_fillin = free_ptr
388 free_ptr = free_ptr + fillin_size
389*
390* Workspace needed by computational routines:
391*
392 ipw_size = 0
393*
394* factorization:
395*
396 ipw_size = max(bwl,bwu)*max(bwl,bwu)
397*
398* Claim memory for IPW
399*
400 ipw = free_ptr
401 free_ptr = free_ptr + ipw_size
402*
403* Check for adequate memory for problem size
404*
405 ierr( 1 ) = 0
406 IF( free_ptr.GT.memsiz ) THEN
407 IF( iam.EQ.0 )
408 $ WRITE( nout, fmt = 9996 )
409 $ 'divide and conquer factorization',
410 $ (free_ptr )*cplxsz
411 ierr( 1 ) = 1
412 END IF
413*
414* Check all processes for an error
415*
416 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
417 $ 1, -1, 0 )
418*
419 IF( ierr( 1 ).GT.0 ) THEN
420 IF( iam.EQ.0 )
421 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
422 kskip = kskip + 1
423 GO TO 30
424 END IF
425*
426* Worksize needed for LAPRNT
427 worksiz = max( ((bwl+bwu+1)+10), nb )
428*
429 IF( check ) THEN
430*
431* Calculate the amount of workspace required by
432* the checking routines.
433*
434* PCLANGE
435 worksiz = max( worksiz, desca2d( nb_ ) )
436*
437* PCDBLASCHK
438 worksiz = max( worksiz,
439 $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
440 END IF
441*
442 free_ptr = free_ptr + iprepad
443 ip_driver_w = free_ptr
444 free_ptr = free_ptr + worksiz + ipostpad
445*
446*
447* Check for adequate memory for problem size
448*
449 ierr( 1 ) = 0
450 IF( free_ptr.GT.memsiz ) THEN
451 IF( iam.EQ.0 )
452 $ WRITE( nout, fmt = 9996 ) 'factorization',
453 $ ( free_ptr )*cplxsz
454 ierr( 1 ) = 1
455 END IF
456*
457* Check all processes for an error
458*
459 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
460 $ 1, -1, 0 )
461*
462 IF( ierr( 1 ).GT.0 ) THEN
463 IF( iam.EQ.0 )
464 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
465 kskip = kskip + 1
466 GO TO 30
467 END IF
468*
469 CALL pcbmatgen( ictxt, 'G', 'D', bwl, bwu, n,
470 $ (bwl+bwu+1), nb, mem( ipa ),
471 $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
472 $ mycol, nprow, npcol )
473*
474 CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
475 $ ((bwl+bwu+1)+10), iprepad, ipostpad,
476 $ padval )
477*
478 CALL pcfillpad( ictxt, worksiz, 1,
479 $ mem( ip_driver_w-iprepad ), worksiz,
480 $ iprepad, ipostpad, padval )
481*
482* Calculate norm of A for residual error-checking
483*
484 IF( check ) THEN
485*
486 anorm = pclange( '1', (bwl+bwu+1),
487 $ n, mem( ipa ), 1, 1,
488 $ desca2d, mem( ip_driver_w ) )
489 CALL pcchekpad( ictxt, 'PCLANGE', np, nq,
490 $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
491 $ iprepad, ipostpad, padval )
492 CALL pcchekpad( ictxt, 'PCLANGE',
493 $ worksiz, 1,
494 $ mem( ip_driver_w-iprepad ), worksiz,
495 $ iprepad, ipostpad, padval )
496 END IF
497*
498*
499 CALL slboot()
500 CALL blacs_barrier( ictxt, 'All' )
501*
502* Perform factorization
503*
504 CALL sltimer( 1 )
505*
506 CALL pcdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
507 $ mem( ip_fillin ), fillin_size, mem( ipw ),
508 $ ipw_size, info )
509*
510 CALL sltimer( 1 )
511*
512 IF( info.NE.0 ) THEN
513 IF( iam.EQ.0 ) THEN
514 WRITE( nout, fmt = * ) 'PCDBTRF INFO=', info
515 ENDIF
516 kfail = kfail + 1
517 GO TO 30
518 END IF
519*
520 IF( check ) THEN
521*
522* Check for memory overwrite in factorization
523*
524 CALL pcchekpad( ictxt, 'PCDBTRF', np,
525 $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
526 $ iprepad, ipostpad, padval )
527 END IF
528*
529*
530* Loop over the different values for NRHS
531*
532 DO 20 hh = 1, nnr
533*
534 ierr( 1 ) = 0
535*
536 nrhs = nrval( hh )
537*
538* Initialize Array Descriptor for rhs
539*
540 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
541 $ ictxtb, nb+10, ierr( 1 ) )
542*
543* Convert this to 1D descriptor
544*
545 descb( 1 ) = 502
546 descb( 3 ) = n
547 descb( 4 ) = nb
548 descb( 5 ) = 0
549 descb( 2 ) = ictxt
550 descb( 6 ) = descb2d( lld_ )
551 descb( 7 ) = 0
552*
553* reset free_ptr to reuse space for right hand sides
554*
555 IF( ipb .GT. 0 ) THEN
556 free_ptr = ipb
557 ENDIF
558*
559 free_ptr = free_ptr + iprepad
560 ipb = free_ptr
561 free_ptr = free_ptr + nrhs*descb2d( lld_ )
562 $ + ipostpad
563*
564* Allocate workspace for workspace in TRS routine:
565*
566 ipw_solve_size = (max(bwl,bwu)*nrhs)
567*
568 ipw_solve = free_ptr
569 free_ptr = free_ptr + ipw_solve_size
570*
571 ierr( 1 ) = 0
572 IF( free_ptr.GT.memsiz ) THEN
573 IF( iam.EQ.0 )
574 $ WRITE( nout, fmt = 9996 )'solve',
575 $ ( free_ptr )*cplxsz
576 ierr( 1 ) = 1
577 END IF
578*
579* Check all processes for an error
580*
581 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
582 $ ierr, 1, -1, 0 )
583*
584 IF( ierr( 1 ).GT.0 ) THEN
585 IF( iam.EQ.0 )
586 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
587 kskip = kskip + 1
588 GO TO 15
589 END IF
590*
591 myrhs_size = numroc( n, nb, mycol, 0, npcol )
592*
593* Generate RHS
594*
595 CALL pcmatgen(ictxtb, 'No', 'No',
596 $ descb2d( m_ ), descb2d( n_ ),
597 $ descb2d( mb_ ), descb2d( nb_ ),
598 $ mem( ipb ),
599 $ descb2d( lld_ ), descb2d( rsrc_ ),
600 $ descb2d( csrc_ ),
601 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
602 $ myrow, npcol, nprow )
603*
604 IF( check ) THEN
605 CALL pcfillpad( ictxtb, nb, nrhs,
606 $ mem( ipb-iprepad ),
607 $ descb2d( lld_ ),
608 $ iprepad, ipostpad,
609 $ padval )
610 CALL pcfillpad( ictxt, worksiz, 1,
611 $ mem( ip_driver_w-iprepad ),
612 $ worksiz, iprepad,
613 $ ipostpad, padval )
614 END IF
615*
616*
617 CALL blacs_barrier( ictxt, 'All')
618 CALL sltimer( 2 )
619*
620* Solve linear system via factorization
621*
622 CALL pcdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
623 $ 1, desca, mem( ipb ), 1, descb,
624 $ mem( ip_fillin ), fillin_size,
625 $ mem( ipw_solve ), ipw_solve_size,
626 $ info )
627*
628 CALL sltimer( 2 )
629*
630 IF( info.NE.0 ) THEN
631 IF( iam.EQ.0 )
632 $ WRITE( nout, fmt = * ) 'PCDBTRS INFO=', info
633 kfail = kfail + 1
634 passed = 'FAILED'
635 GO TO 20
636 END IF
637*
638 IF( check ) THEN
639*
640* check for memory overwrite
641*
642 CALL pcchekpad( ictxt, 'PCDBTRS-work',
643 $ worksiz, 1,
644 $ mem( ip_driver_w-iprepad ),
645 $ worksiz, iprepad,
646 $ ipostpad, padval )
647*
648* check the solution to rhs
649*
650 sresid = zero
651*
652 CALL pcdblaschk( 'N', 'D', trans,
653 $ n, bwl, bwu, nrhs,
654 $ mem( ipb ), 1, 1, descb2d,
655 $ iaseed, mem( ipa ), 1, 1, desca2d,
656 $ ibseed, anorm, sresid,
657 $ mem( ip_driver_w ), worksiz )
658*
659 IF( iam.EQ.0 ) THEN
660 IF( sresid.GT.thresh )
661 $ WRITE( nout, fmt = 9985 ) sresid
662 END IF
663*
664* The second test is a NaN trap
665*
666 IF( ( sresid.LE.thresh ).AND.
667 $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
668 kpass = kpass + 1
669 passed = 'PASSED'
670 ELSE
671 kfail = kfail + 1
672 passed = 'FAILED'
673 END IF
674*
675 END IF
676*
677 15 CONTINUE
678* Skipped tests jump to here to print out "SKIPPED"
679*
680* Gather maximum of all CPU and WALL clock timings
681*
682 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
683 $ wtime )
684 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
685 $ ctime )
686*
687* Print results
688*
689 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
690*
691 nops = 0
692 nops2 = 0
693*
694 n_first = nb
695 nprocs_real = ( n-1 )/nb + 1
696 n_last = mod( n-1, nb ) + 1
697*
698* 2 N bwl bwu + N (bwl) flops
699* for LU factorization
700*
701 nops = 2*(dble(n)*dble(bwl)*
702 $ dble(bwu)) +
703 $ (dble(n)*dble(bwl))
704*
705* nrhs * 2 N*(bwl+bwu) flops for LU solve.
706*
707 nops = nops +
708 $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
709 $ *dble(nrhs))
710*
711* Multiply by 4 to get complex count
712*
713 nops = nops * dble(4)
714*
715* Second calc to represent actual hardware speed
716*
717* 2*N_FIRST bwl*bwu Flops for LU
718* factorization in proc 1
719*
720 nops2 = 2*( (dble(n_first)*
721 $ dble(bwl)*dble(bwu)))
722*
723 IF ( nprocs_real .GT. 1) THEN
724* 8 N_LAST bwl*bwu
725* flops for LU and spike
726* calc in last processor
727*
728 nops2 = nops2 +
729 $ 8*( (dble(n_last)*dble(bwl)
730 $ *dble(bwu)) )
731 ENDIF
732*
733 IF ( nprocs_real .GT. 2) THEN
734* 8 NB bwl*bwu flops for LU and spike
735* calc in other processors
736*
737 nops2 = nops2 + (nprocs_real-2)*
738 $ 8*( (dble(nb)*dble(bwl)
739 $ *dble(bwu)) )
740 ENDIF
741*
742* Reduced system
743*
744 nops2 = nops2 +
745 $ 2*( nprocs_real-1 ) *
746 $ ( bwl*bwu*bwl/3 )
747 IF( nprocs_real .GT. 1 ) THEN
748 nops2 = nops2 +
749 $ 2*( nprocs_real-2 ) *
750 $ (2*bwl*bwu*bwl)
751 ENDIF
752*
753* Solve stage
754*
755* nrhs*2 n_first*
756* (bwl+bwu)
757* flops for L,U solve in proc 1.
758*
759 nops2 = nops2 +
760 $ 2*
761 $ dble(n_first)*
762 $ dble(nrhs) *
763 $ ( dble(bwl)+dble(bwu))
764*
765 IF ( nprocs_real .GT. 1 ) THEN
766*
767* 2*nrhs*2 n_last
768* (bwl+bwu)
769* flops for LU solve in other procs
770*
771 nops2 = nops2 +
772 $ 4*
773 $ (dble(n_last)*(dble(bwl)+
774 $ dble(bwu)))*dble(nrhs)
775 ENDIF
776*
777 IF ( nprocs_real .GT. 2 ) THEN
778*
779* 2*nrhs*2 NB
780* (bwl+bwu)
781* flops for LU solve in other procs
782*
783 nops2 = nops2 +
784 $ ( nprocs_real-2)*2*
785 $ ( (dble(nb)*(dble(bwl)+
786 $ dble(bwu)))*dble(nrhs) )
787 ENDIF
788*
789* Reduced system
790*
791 nops2 = nops2 +
792 $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
793 IF( nprocs_real .GT. 1 ) THEN
794 nops2 = nops2 +
795 $ nrhs*( nprocs_real-2 ) *
796 $ ( 6 * bwl*bwu )
797 ENDIF
798*
799*
800* Multiply by 4 to get complex count
801*
802 nops2 = nops2 * dble(4)
803*
804* Calculate total megaflops - factorization and/or
805* solve -- for WALL and CPU time, and print output
806*
807* Print WALL time if machine supports it
808*
809 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
810 tmflops = nops /
811 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
812 ELSE
813 tmflops = 0.0d+0
814 END IF
815*
816 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
817 tmflops2 = nops2 /
818 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
819 ELSE
820 tmflops2 = 0.0d+0
821 END IF
822*
823 IF( wtime( 2 ).GE.0.0d+0 )
824 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
825 $ n,
826 $ bwl, bwu,
827 $ nb, nrhs, nprow, npcol,
828 $ wtime( 1 ), wtime( 2 ), tmflops,
829 $ tmflops2, passed
830*
831* Print CPU time if machine supports it
832*
833 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
834 tmflops = nops /
835 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
836 ELSE
837 tmflops = 0.0d+0
838 END IF
839*
840 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
841 tmflops2 = nops2 /
842 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
843 ELSE
844 tmflops2 = 0.0d+0
845 END IF
846*
847 IF( ctime( 2 ).GE.0.0d+0 )
848 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
849 $ n,
850 $ bwl, bwu,
851 $ nb, nrhs, nprow, npcol,
852 $ ctime( 1 ), ctime( 2 ), tmflops,
853 $ tmflops2, passed
854*
855 END IF
856 20 CONTINUE
857*
858*
859 30 CONTINUE
860* NNB loop
861*
862 45 CONTINUE
863* BW[] loop
864*
865 40 CONTINUE
866* NMAT loop
867*
868 CALL blacs_gridexit( ictxt )
869 CALL blacs_gridexit( ictxtb )
870*
871 50 CONTINUE
872* NGRIDS DROPOUT
873 60 CONTINUE
874* NGRIDS loop
875*
876* Print ending messages and close output file
877*
878 IF( iam.EQ.0 ) THEN
879 ktests = kpass + kfail + kskip
880 WRITE( nout, fmt = * )
881 WRITE( nout, fmt = 9992 ) ktests
882 IF( check ) THEN
883 WRITE( nout, fmt = 9991 ) kpass
884 WRITE( nout, fmt = 9989 ) kfail
885 ELSE
886 WRITE( nout, fmt = 9990 ) kpass
887 END IF
888 WRITE( nout, fmt = 9988 ) kskip
889 WRITE( nout, fmt = * )
890 WRITE( nout, fmt = * )
891 WRITE( nout, fmt = 9987 )
892 IF( nout.NE.6 .AND. nout.NE.0 )
893 $ CLOSE ( nout )
894 END IF
895*
896 CALL blacs_exit( 0 )
897*
898 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
899 $ '; It should be at least 1' )
900 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
901 $ i4 )
902 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
903 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
904 $ i11 )
905 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
906 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
907 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
908 $ '-------- -------- -------- ------' )
909 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
910 $ 1x,i4,1x,i4,1x,f9.3,
911 $ f9.4, f9.2, f9.2, 1x, a6 )
912 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
913 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
914 9990 FORMAT( i5, ' tests completed without checking.' )
915 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
916 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
917 9987 FORMAT( 'END OF TESTS.' )
918 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
919 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
920*
921 stop
922*
923* End of PCDBTRS_DRIVER
924*
925 END
926*
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pcbmatgen.f:5
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
program pcdbdriver
Definition pcdbdriver.f:1
subroutine pcdbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pcdbinfo.f:6
subroutine pcdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pcdblaschk.f:4
subroutine pcdbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
Definition pcdbtrf.f:3
subroutine pcdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pcdbtrs.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724