SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzgbdriver.f
Go to the documentation of this file.
1 PROGRAM pzgbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PZGBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by ZGB. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTMEM INTEGER, default = 2048.
55* INTMEM is the size of the integer workspace used in this
56* driver as input as the IPIV vector. It represents an
57* upper bound on NB, the blocksize of the data
58* distribution.
59*
60* INTGSZ INTEGER, default = 4 bytes.
61* ZPLXSZ INTEGER, default = 16 bytes.
62* INTGSZ and ZPLXSZ indicate the length in bytes on the
63* given platform for an integer and a double precision
64* complex.
65* MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
66* All arrays used by ScaLAPACK routines are allocated from
67* this array and referenced by pointers. The integer IPB,
68* for example, is a pointer to the starting element of MEM for
69* the solution vector(s) B.
70*
71* =====================================================================
72*
73* Code Developer: Andrew J. Cleary, University of Tennessee.
74* Current address: Lawrence Livermore National Labs.
75* This version released: August, 2001.
76*
77* =====================================================================
78*
79* .. Parameters ..
80 INTEGER totmem
81 parameter( totmem = 3000000 )
82 INTEGER intmem
83 parameter( intmem = 2048 )
84 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
85 $ lld_, mb_, m_, nb_, n_, rsrc_
86 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
87 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
88 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
89*
90 DOUBLE PRECISION zero
91 INTEGER memsiz, ntests, zplxsz
92 COMPLEX*16 padval
93 parameter( zplxsz = 16,
94 $ memsiz = totmem / zplxsz, ntests = 20,
95 $ padval = ( -9923.0d+0, -9923.0d+0 ),
96 $ zero = 0.0d+0 )
97 INTEGER int_one
98 parameter( int_one = 1 )
99* ..
100* .. Local Scalars ..
101 LOGICAL check
102 CHARACTER trans
103 CHARACTER*6 passed
104 CHARACTER*80 outfile
105 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
106 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
107 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
108 $ iprepad, ipw, ipw_size, ipw_solve,
109 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
110 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
111 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
112 $ nnr, nout, np, npcol, nprocs, nprocs_real,
113 $ nprow, nq, nrhs, n_first, n_last, worksiz
114 REAL thresh
115 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
116 $ tmflops2
117* ..
118* .. Local Arrays ..
119 INTEGER ipiv(intmem)
120 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
121 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
122 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
123 $ nrval( ntests ), nval( ntests ),
124 $ pval( ntests ), qval( ntests )
125 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
126 COMPLEX*16 mem( memsiz )
127* ..
128* .. External Subroutines ..
129 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
130 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
131 $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
134 $ sltimer
135* ..
136* .. External Functions ..
137 INTEGER numroc
138 LOGICAL lsame
139 DOUBLE PRECISION pzlange
140 EXTERNAL lsame, numroc, pzlange
141* ..
142* .. Intrinsic Functions ..
143 INTRINSIC dble, max, min, mod
144* ..
145* .. Data Statements ..
146 DATA kfail, kpass, kskip, ktests / 4*0 /
147* ..
148*
149*
150*
151* .. Executable Statements ..
152*
153* Get starting information
154*
155 CALL blacs_pinfo( iam, nprocs )
156 iaseed = 100
157 ibseed = 200
158*
159 CALL pzgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
160 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
161 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
162 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
163*
164 check = ( thresh.GE.0.0d+0 )
165*
166* Print headings
167*
168 IF( iam.EQ.0 ) THEN
169 WRITE( nout, fmt = * )
170 WRITE( nout, fmt = 9995 )
171 WRITE( nout, fmt = 9994 )
172 WRITE( nout, fmt = * )
173 END IF
174*
175* Loop over different process grids
176*
177 DO 60 i = 1, ngrids
178*
179 nprow = pval( i )
180 npcol = qval( i )
181*
182* Make sure grid information is correct
183*
184 ierr( 1 ) = 0
185 IF( nprow.LT.1 ) THEN
186 IF( iam.EQ.0 )
187 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
188 ierr( 1 ) = 1
189 ELSE IF( npcol.LT.1 ) THEN
190 IF( iam.EQ.0 )
191 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
192 ierr( 1 ) = 1
193 ELSE IF( nprow*npcol.GT.nprocs ) THEN
194 IF( iam.EQ.0 )
195 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
196 ierr( 1 ) = 1
197 END IF
198*
199 IF( ierr( 1 ).GT.0 ) THEN
200 IF( iam.EQ.0 )
201 $ WRITE( nout, fmt = 9997 ) 'grid'
202 kskip = kskip + 1
203 GO TO 50
204 END IF
205*
206* Define process grid
207*
208 CALL blacs_get( -1, 0, ictxt )
209 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
210*
211*
212* Define transpose process grid
213*
214 CALL blacs_get( -1, 0, ictxtb )
215 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
216*
217* Go to bottom of process grid loop if this case doesn't use my
218* process
219*
220 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
221*
222 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
223 GO TO 50
224 ENDIF
225*
226 DO 40 j = 1, nmat
227*
228 ierr( 1 ) = 0
229*
230 n = nval( j )
231*
232* Make sure matrix information is correct
233*
234 IF( n.LT.1 ) THEN
235 IF( iam.EQ.0 )
236 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
237 ierr( 1 ) = 1
238 END IF
239*
240* Check all processes for an error
241*
242 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
243 $ -1, 0 )
244*
245 IF( ierr( 1 ).GT.0 ) THEN
246 IF( iam.EQ.0 )
247 $ WRITE( nout, fmt = 9997 ) 'size'
248 kskip = kskip + 1
249 GO TO 40
250 END IF
251*
252*
253 DO 45 bw_num = 1, nbw
254*
255 ierr( 1 ) = 0
256*
257 bwl = bwlval( bw_num )
258 IF( bwl.LT.1 ) THEN
259 IF( iam.EQ.0 )
260 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
261 ierr( 1 ) = 1
262 END IF
263*
264 bwu = bwuval( bw_num )
265 IF( bwu.LT.1 ) THEN
266 IF( iam.EQ.0 )
267 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
268 ierr( 1 ) = 1
269 END IF
270*
271 IF( bwl.GT.n-1 ) THEN
272 IF( iam.EQ.0 ) THEN
273 ierr( 1 ) = 1
274 ENDIF
275 END IF
276*
277 IF( bwu.GT.n-1 ) THEN
278 IF( iam.EQ.0 ) THEN
279 ierr( 1 ) = 1
280 ENDIF
281 END IF
282*
283* Check all processes for an error
284*
285 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
286 $ -1, 0 )
287*
288 IF( ierr( 1 ).GT.0 ) THEN
289 kskip = kskip + 1
290 GO TO 45
291 END IF
292*
293 DO 30 k = 1, nnb
294*
295 ierr( 1 ) = 0
296*
297 nb = nbval( k )
298 IF( nb.LT.0 ) THEN
299 nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
300 $ + (bwl+bwu)
301 nb = max( nb, 2*(bwl+bwu) )
302 nb = min( n, nb )
303 END IF
304*
305* Make sure NB is legal
306*
307 ierr( 1 ) = 0
308*
309 IF( nb.GT.intmem ) THEN
310 ierr( 1 ) = 1
311 IF( iam.EQ.0 ) THEN
312 WRITE( nout,* )'You have chosen an '
313 $ ,'NB > INTMEM in the driver.'
314 WRITE(nout, *)'Please edit the driver '
315 $ ,'and increase the value of INTMEM'
316 ENDIF
317 END IF
318*
319* Check all processes for an error
320*
321 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
322 $ -1, 0 )
323*
324 IF( ierr( 1 ).GT.0 ) THEN
325 kskip = kskip + 1
326 GO TO 30
327 END IF
328*
329* Padding constants
330*
331 np = numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
332 $ myrow, 0, nprow )
333 nq = numroc( n, nb, mycol, 0, npcol )
334*
335 IF( check ) THEN
336 iprepad = ((2*bwl+2*bwu+1)+10)
337 imidpad = 10
338 ipostpad = ((2*bwl+2*bwu+1)+10)
339 ELSE
340 iprepad = 0
341 imidpad = 0
342 ipostpad = 0
343 END IF
344*
345* Initialize the array descriptor for the matrix A
346*
347 CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
348 $ (2*bwl+2*bwu+1), nb, 0, 0,
349 $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
350*
351* Convert this to 1D descriptor
352*
353 desca( 1 ) = 501
354 desca( 3 ) = n
355 desca( 4 ) = nb
356 desca( 5 ) = 0
357 desca( 2 ) = ictxt
358 desca( 6 ) = ((2*bwl+2*bwu+1)+10)
359 desca( 7 ) = 0
360*
361 ierr_temp = ierr( 1 )
362 ierr( 1 ) = 0
363 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
364*
365* Check all processes for an error
366*
367 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
368*
369 IF( ierr( 1 ).LT.0 ) THEN
370 IF( iam.EQ.0 )
371 $ WRITE( nout, fmt = 9997 ) 'descriptor'
372 kskip = kskip + 1
373 GO TO 30
374 END IF
375*
376* Assign pointers into MEM for SCALAPACK arrays, A is
377* allocated starting at position MEM( IPREPAD+1 )
378*
379 free_ptr = 1
380 ipb = 0
381*
382* Save room for prepadding
383 free_ptr = free_ptr + iprepad
384*
385 ipa = free_ptr
386 free_ptr = free_ptr + desca2d( lld_ )*
387 $ desca2d( nb_ )
388 $ + ipostpad
389*
390* Add memory for fillin
391* Fillin space needs to store:
392* Fillin spike:
393* Contribution to previous proc's diagonal block of
394* reduced system:
395* Off-diagonal block of reduced system:
396* Diagonal block of reduced system:
397*
398 fillin_size =
399 $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
400*
401* Claim memory for fillin
402*
403 free_ptr = free_ptr + iprepad
404 ip_fillin = free_ptr
405 free_ptr = free_ptr + fillin_size
406*
407* Workspace needed by computational routines:
408*
409 ipw_size = 0
410*
411* factorization:
412*
413 ipw_size = 1
414*
415* Claim memory for IPW
416*
417 ipw = free_ptr
418 free_ptr = free_ptr + ipw_size
419*
420* Check for adequate memory for problem size
421*
422 ierr( 1 ) = 0
423 IF( free_ptr.GT.memsiz ) THEN
424 IF( iam.EQ.0 )
425 $ WRITE( nout, fmt = 9996 )
426 $ 'divide and conquer factorization',
427 $ (free_ptr )*zplxsz
428 ierr( 1 ) = 1
429 END IF
430*
431* Check all processes for an error
432*
433 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
434 $ 1, -1, 0 )
435*
436 IF( ierr( 1 ).GT.0 ) THEN
437 IF( iam.EQ.0 )
438 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
439 kskip = kskip + 1
440 GO TO 30
441 END IF
442*
443* Worksize needed for LAPRNT
444 worksiz = max( ((2*bwl+2*bwu+1)+10), nb )
445*
446 IF( check ) THEN
447*
448* Calculate the amount of workspace required by
449* the checking routines.
450*
451* PZLANGE
452 worksiz = max( worksiz, desca2d( nb_ ) )
453*
454* PZDBLASCHK
455 worksiz = max( worksiz,
456 $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
457 END IF
458*
459 free_ptr = free_ptr + iprepad
460 ip_driver_w = free_ptr
461 free_ptr = free_ptr + worksiz + ipostpad
462*
463*
464* Check for adequate memory for problem size
465*
466 ierr( 1 ) = 0
467 IF( free_ptr.GT.memsiz ) THEN
468 IF( iam.EQ.0 )
469 $ WRITE( nout, fmt = 9996 ) 'factorization',
470 $ ( free_ptr )*zplxsz
471 ierr( 1 ) = 1
472 END IF
473*
474* Check all processes for an error
475*
476 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
477 $ 1, -1, 0 )
478*
479 IF( ierr( 1 ).GT.0 ) THEN
480 IF( iam.EQ.0 )
481 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
482 kskip = kskip + 1
483 GO TO 30
484 END IF
485*
486 CALL pzbmatgen( ictxt, 'G', 'N', bwl, bwu, n,
487 $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
488 $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
489 $ myrow, mycol, nprow, npcol )
490*
491 CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
492 $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
493 $ padval )
494*
495 CALL pzfillpad( ictxt, worksiz, 1,
496 $ mem( ip_driver_w-iprepad ), worksiz,
497 $ iprepad, ipostpad, padval )
498*
499* Calculate norm of A for residual error-checking
500*
501 IF( check ) THEN
502*
503 anorm = pzlange( '1', (2*bwl+2*bwu+1),
504 $ n, mem( ipa ), 1, 1,
505 $ desca2d, mem( ip_driver_w ) )
506 CALL pzchekpad( ictxt, 'PZLANGE', np, nq,
507 $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
508 $ iprepad, ipostpad, padval )
509 CALL pzchekpad( ictxt, 'PZLANGE',
510 $ worksiz, 1,
511 $ mem( ip_driver_w-iprepad ), worksiz,
512 $ iprepad, ipostpad, padval )
513 END IF
514*
515*
516 CALL slboot()
517 CALL blacs_barrier( ictxt, 'All' )
518*
519* Perform factorization
520*
521 CALL sltimer( 1 )
522*
523 CALL pzgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
524 $ mem( ip_fillin ), fillin_size, mem( ipw ),
525 $ ipw_size, info )
526*
527 CALL sltimer( 1 )
528*
529 IF( info.NE.0 ) THEN
530 IF( iam.EQ.0 ) THEN
531 WRITE( nout, fmt = * ) 'PZGBTRF INFO=', info
532 ENDIF
533 kfail = kfail + 1
534 GO TO 30
535 END IF
536*
537 IF( check ) THEN
538*
539* Check for memory overwrite in factorization
540*
541 CALL pzchekpad( ictxt, 'PZGBTRF', np,
542 $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
543 $ iprepad, ipostpad, padval )
544 END IF
545*
546*
547* Loop over the different values for NRHS
548*
549 DO 20 hh = 1, nnr
550*
551 ierr( 1 ) = 0
552*
553 nrhs = nrval( hh )
554*
555* Initialize Array Descriptor for rhs
556*
557 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
558 $ ictxtb, nb+10, ierr( 1 ) )
559*
560* Convert this to 1D descriptor
561*
562 descb( 1 ) = 502
563 descb( 3 ) = n
564 descb( 4 ) = nb
565 descb( 5 ) = 0
566 descb( 2 ) = ictxt
567 descb( 6 ) = descb2d( lld_ )
568 descb( 7 ) = 0
569*
570* reset free_ptr to reuse space for right hand sides
571*
572 IF( ipb .GT. 0 ) THEN
573 free_ptr = ipb
574 ENDIF
575*
576 free_ptr = free_ptr + iprepad
577 ipb = free_ptr
578 free_ptr = free_ptr + nrhs*descb2d( lld_ )
579 $ + ipostpad
580*
581* Allocate workspace for workspace in TRS routine:
582*
583 ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
584*
585 ipw_solve = free_ptr
586 free_ptr = free_ptr + ipw_solve_size
587*
588 ierr( 1 ) = 0
589 IF( free_ptr.GT.memsiz ) THEN
590 IF( iam.EQ.0 )
591 $ WRITE( nout, fmt = 9996 )'solve',
592 $ ( free_ptr )*zplxsz
593 ierr( 1 ) = 1
594 END IF
595*
596* Check all processes for an error
597*
598 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
599 $ ierr, 1, -1, 0 )
600*
601 IF( ierr( 1 ).GT.0 ) THEN
602 IF( iam.EQ.0 )
603 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
604 kskip = kskip + 1
605 GO TO 15
606 END IF
607*
608 myrhs_size = numroc( n, nb, mycol, 0, npcol )
609*
610* Generate RHS
611*
612 CALL pzmatgen(ictxtb, 'No', 'No',
613 $ descb2d( m_ ), descb2d( n_ ),
614 $ descb2d( mb_ ), descb2d( nb_ ),
615 $ mem( ipb ),
616 $ descb2d( lld_ ), descb2d( rsrc_ ),
617 $ descb2d( csrc_ ),
618 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
619 $ myrow, npcol, nprow )
620*
621 IF( check ) THEN
622 CALL pzfillpad( ictxtb, nb, nrhs,
623 $ mem( ipb-iprepad ),
624 $ descb2d( lld_ ),
625 $ iprepad, ipostpad,
626 $ padval )
627 CALL pzfillpad( ictxt, worksiz, 1,
628 $ mem( ip_driver_w-iprepad ),
629 $ worksiz, iprepad,
630 $ ipostpad, padval )
631 END IF
632*
633*
634 CALL blacs_barrier( ictxt, 'All')
635 CALL sltimer( 2 )
636*
637* Solve linear system via factorization
638*
639 CALL pzgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
640 $ 1, desca, ipiv, mem( ipb ), 1, descb,
641 $ mem( ip_fillin ), fillin_size,
642 $ mem( ipw_solve ), ipw_solve_size,
643 $ info )
644*
645 CALL sltimer( 2 )
646*
647 IF( info.NE.0 ) THEN
648 IF( iam.EQ.0 )
649 $ WRITE( nout, fmt = * ) 'PZGBTRS INFO=', info
650 kfail = kfail + 1
651 passed = 'FAILED'
652 GO TO 20
653 END IF
654*
655 IF( check ) THEN
656*
657* check for memory overwrite
658*
659 CALL pzchekpad( ictxt, 'PZGBTRS-work',
660 $ worksiz, 1,
661 $ mem( ip_driver_w-iprepad ),
662 $ worksiz, iprepad,
663 $ ipostpad, padval )
664*
665* check the solution to rhs
666*
667 sresid = zero
668*
669 CALL pzdblaschk( 'N', 'N', trans,
670 $ n, bwl, bwu, nrhs,
671 $ mem( ipb ), 1, 1, descb2d,
672 $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
673 $ ibseed, anorm, sresid,
674 $ mem( ip_driver_w ), worksiz )
675*
676 IF( iam.EQ.0 ) THEN
677 IF( sresid.GT.thresh )
678 $ WRITE( nout, fmt = 9985 ) sresid
679 END IF
680*
681* The second test is a NaN trap
682*
683 IF( ( sresid.LE.thresh ).AND.
684 $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
685 kpass = kpass + 1
686 passed = 'PASSED'
687 ELSE
688 kfail = kfail + 1
689 passed = 'FAILED'
690 END IF
691*
692 END IF
693*
694 15 CONTINUE
695* Skipped tests jump to here to print out "SKIPPED"
696*
697* Gather maximum of all CPU and WALL clock timings
698*
699 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
700 $ wtime )
701 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
702 $ ctime )
703*
704* Print results
705*
706 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
707*
708 nops = 0
709 nops2 = 0
710*
711 n_first = nb
712 nprocs_real = ( n-1 )/nb + 1
713 n_last = mod( n-1, nb ) + 1
714*
715* 2 N bwl (bwl+bwu) + N (bwl) flops
716* for LU factorization
717*
718 nops = 2*(dble(n)*dble(bwl)*
719 $ dble((bwl+bwu))) +
720 $ (dble(n)*dble(bwl))
721*
722* nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
723*
724 nops = nops +
725 $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
726 $ *dble(nrhs))
727*
728* Multiply by 4 to get complex count
729*
730 nops = nops * dble(4)
731*
732* Second calc to represent actual hardware speed
733*
734* 2*N_FIRST (bwl+bwu)*bwu Flops for LU
735* factorization in proc 1
736*
737 nops2 = 2*( (dble(n_first)*
738 $ dble((bwl+bwu))*dble(bwu)))
739*
740 IF ( nprocs_real .GT. 1) THEN
741* 8 N_LAST (bwl+bwu)*(bwl+bwu)
742* flops for LU and spike
743* calc in last processor
744*
745 nops2 = nops2 +
746 $ 8*( (dble(n_last)*dble((bwl+bwu))
747 $ *dble((bwl+bwu))) )
748 ENDIF
749*
750 IF ( nprocs_real .GT. 2) THEN
751* 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike
752* calc in other processors
753*
754 nops2 = nops2 + (nprocs_real-2)*
755 $ 8*( (dble(nb)*dble((bwl+bwu))
756 $ *dble((bwl+bwu))) )
757 ENDIF
758*
759* Reduced system
760*
761 nops2 = nops2 +
762 $ 2*( nprocs_real-1 ) *
763 $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
764 IF( nprocs_real .GT. 1 ) THEN
765 nops2 = nops2 +
766 $ 2*( nprocs_real-2 ) *
767 $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
768 ENDIF
769*
770* Solve stage
771*
772* nrhs*2 n_first*
773* (bwl+(bwl+bwu))
774* flops for L,U solve in proc 1.
775*
776 nops2 = nops2 +
777 $ 2*
778 $ dble(n_first)*
779 $ dble(nrhs) *
780 $ ( dble(bwl)+dble((bwl+bwu)))
781*
782 IF ( nprocs_real .GT. 1 ) THEN
783*
784* 2*nrhs*2 n_last
785* ((bwl+bwu)+(bwl+bwu))
786* flops for LU solve in other procs
787*
788 nops2 = nops2 +
789 $ 4*
790 $ (dble(n_last)*(dble((bwl+bwu))+
791 $ dble((bwl+bwu))))*dble(nrhs)
792 ENDIF
793*
794 IF ( nprocs_real .GT. 2 ) THEN
795*
796* 2*nrhs*2 NB
797* ((bwl+bwu)+(bwl+bwu))
798* flops for LU solve in other procs
799*
800 nops2 = nops2 +
801 $ ( nprocs_real-2)*2*
802 $ ( (dble(nb)*(dble((bwl+bwu))+
803 $ dble((bwl+bwu))))*dble(nrhs) )
804 ENDIF
805*
806* Reduced system
807*
808 nops2 = nops2 +
809 $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
810 IF( nprocs_real .GT. 1 ) THEN
811 nops2 = nops2 +
812 $ nrhs*( nprocs_real-2 ) *
813 $ ( 6 * (bwl+bwu)*(bwl+bwu) )
814 ENDIF
815*
816*
817* Multiply by 4 to get complex count
818*
819 nops2 = nops2 * dble(4)
820*
821* Calculate total megaflops - factorization and/or
822* solve -- for WALL and CPU time, and print output
823*
824* Print WALL time if machine supports it
825*
826 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
827 tmflops = nops /
828 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
829 ELSE
830 tmflops = 0.0d+0
831 END IF
832*
833 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
834 tmflops2 = nops2 /
835 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
836 ELSE
837 tmflops2 = 0.0d+0
838 END IF
839*
840 IF( wtime( 2 ).GE.0.0d+0 )
841 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
842 $ n,
843 $ bwl, bwu,
844 $ nb, nrhs, nprow, npcol,
845 $ wtime( 1 ), wtime( 2 ), tmflops,
846 $ tmflops2, passed
847*
848* Print CPU time if machine supports it
849*
850 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
851 tmflops = nops /
852 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
853 ELSE
854 tmflops = 0.0d+0
855 END IF
856*
857 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
858 tmflops2 = nops2 /
859 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
860 ELSE
861 tmflops2 = 0.0d+0
862 END IF
863*
864 IF( ctime( 2 ).GE.0.0d+0 )
865 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
866 $ n,
867 $ bwl, bwu,
868 $ nb, nrhs, nprow, npcol,
869 $ ctime( 1 ), ctime( 2 ), tmflops,
870 $ tmflops2, passed
871*
872 END IF
873 20 CONTINUE
874*
875*
876 30 CONTINUE
877* NNB loop
878*
879 45 CONTINUE
880* BW[] loop
881*
882 40 CONTINUE
883* NMAT loop
884*
885 CALL blacs_gridexit( ictxt )
886 CALL blacs_gridexit( ictxtb )
887*
888 50 CONTINUE
889* NGRIDS DROPOUT
890 60 CONTINUE
891* NGRIDS loop
892*
893* Print ending messages and close output file
894*
895 IF( iam.EQ.0 ) THEN
896 ktests = kpass + kfail + kskip
897 WRITE( nout, fmt = * )
898 WRITE( nout, fmt = 9992 ) ktests
899 IF( check ) THEN
900 WRITE( nout, fmt = 9991 ) kpass
901 WRITE( nout, fmt = 9989 ) kfail
902 ELSE
903 WRITE( nout, fmt = 9990 ) kpass
904 END IF
905 WRITE( nout, fmt = 9988 ) kskip
906 WRITE( nout, fmt = * )
907 WRITE( nout, fmt = * )
908 WRITE( nout, fmt = 9987 )
909 IF( nout.NE.6 .AND. nout.NE.0 )
910 $ CLOSE ( nout )
911 END IF
912*
913 CALL blacs_exit( 0 )
914*
915 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
916 $ '; It should be at least 1' )
917 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
918 $ i4 )
919 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
920 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
921 $ i11 )
922 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
923 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
924 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
925 $ '-------- -------- -------- ------' )
926 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
927 $ 1x,i4,1x,i4,1x,f9.3,
928 $ f9.4, f9.2, f9.2, 1x, a6 )
929 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
930 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
931 9990 FORMAT( i5, ' tests completed without checking.' )
932 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
933 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
934 9987 FORMAT( 'END OF TESTS.' )
935 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
936 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
937*
938 stop
939*
940* End of PZGBTRS_DRIVER
941*
942 END
943*
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pzmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pzbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pzbmatgen.f:5
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pzchekpad.f:3
subroutine pzdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pzdblaschk.f:4
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pzfillpad.f:2
program pzgbdriver
Definition pzgbdriver.f:1
subroutine pzgbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pzgbinfo.f:6
subroutine pzgbtrf(n, bwl, bwu, a, ja, desca, ipiv, af, laf, work, lwork, info)
Definition pzgbtrf.f:3
subroutine pzgbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, ipiv, b, ib, descb, af, laf, work, lwork, info)
Definition pzgbtrs.f:3
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
Definition pzlange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724