SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzdbdriver.f
Go to the documentation of this file.
1 PROGRAM pzdbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PZDBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by ZDB. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* ZPLXSZ INTEGER, default = 16 bytes.
56* INTGSZ and ZPLXSZ indicate the length in bytes on the
57* given platform for an integer and a double precision
58* complex.
59* MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
60* All arrays used by ScaLAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPB,
62* for example, is a pointer to the starting element of MEM for
63* the solution vector(s) B.
64*
65* =====================================================================
66*
67* Code Developer: Andrew J. Cleary, University of Tennessee.
68* Current address: Lawrence Livermore National Labs.
69* This version released: August, 2001.
70*
71* =====================================================================
72*
73* .. Parameters ..
74 INTEGER totmem
75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81*
82 DOUBLE PRECISION zero
83 INTEGER memsiz, ntests, zplxsz
84 COMPLEX*16 padval
85 parameter( zplxsz = 16,
86 $ memsiz = totmem / zplxsz, ntests = 20,
87 $ padval = ( -9923.0d+0, -9923.0d+0 ),
88 $ zero = 0.0d+0 )
89 INTEGER int_one
90 parameter( int_one = 1 )
91* ..
92* .. Local Scalars ..
93 LOGICAL check
94 CHARACTER trans
95 CHARACTER*6 passed
96 CHARACTER*80 outfile
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL thresh
107 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
108 $ tmflops2
109* ..
110* .. Local Arrays ..
111 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114 $ nrval( ntests ), nval( ntests ),
115 $ pval( ntests ), qval( ntests )
116 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 COMPLEX*16 mem( memsiz )
118* ..
119* .. External Subroutines ..
120 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
122 $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
126* ..
127* .. External Functions ..
128 INTEGER numroc
129 LOGICAL lsame
130 DOUBLE PRECISION pzlange
131 EXTERNAL lsame, numroc, pzlange
132* ..
133* .. Intrinsic Functions ..
134 INTRINSIC dble, max, min, mod
135* ..
136* .. Data Statements ..
137 DATA kfail, kpass, kskip, ktests / 4*0 /
138* ..
139*
140*
141*
142* .. Executable Statements ..
143*
144* Get starting information
145*
146 CALL blacs_pinfo( iam, nprocs )
147 iaseed = 100
148 ibseed = 200
149*
150 CALL pzdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154*
155 check = ( thresh.GE.0.0d+0 )
156*
157* Print headings
158*
159 IF( iam.EQ.0 ) THEN
160 WRITE( nout, fmt = * )
161 WRITE( nout, fmt = 9995 )
162 WRITE( nout, fmt = 9994 )
163 WRITE( nout, fmt = * )
164 END IF
165*
166* Loop over different process grids
167*
168 DO 60 i = 1, ngrids
169*
170 nprow = pval( i )
171 npcol = qval( i )
172*
173* Make sure grid information is correct
174*
175 ierr( 1 ) = 0
176 IF( nprow.LT.1 ) THEN
177 IF( iam.EQ.0 )
178 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
179 ierr( 1 ) = 1
180 ELSE IF( npcol.LT.1 ) THEN
181 IF( iam.EQ.0 )
182 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
183 ierr( 1 ) = 1
184 ELSE IF( nprow*npcol.GT.nprocs ) THEN
185 IF( iam.EQ.0 )
186 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 ierr( 1 ) = 1
188 END IF
189*
190 IF( ierr( 1 ).GT.0 ) THEN
191 IF( iam.EQ.0 )
192 $ WRITE( nout, fmt = 9997 ) 'grid'
193 kskip = kskip + 1
194 GO TO 50
195 END IF
196*
197* Define process grid
198*
199 CALL blacs_get( -1, 0, ictxt )
200 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
201*
202*
203* Define transpose process grid
204*
205 CALL blacs_get( -1, 0, ictxtb )
206 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
207*
208* Go to bottom of process grid loop if this case doesn't use my
209* process
210*
211 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212*
213 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
214 GO TO 50
215 ENDIF
216*
217 DO 40 j = 1, nmat
218*
219 ierr( 1 ) = 0
220*
221 n = nval( j )
222*
223* Make sure matrix information is correct
224*
225 IF( n.LT.1 ) THEN
226 IF( iam.EQ.0 )
227 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
228 ierr( 1 ) = 1
229 END IF
230*
231* Check all processes for an error
232*
233 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
234 $ -1, 0 )
235*
236 IF( ierr( 1 ).GT.0 ) THEN
237 IF( iam.EQ.0 )
238 $ WRITE( nout, fmt = 9997 ) 'size'
239 kskip = kskip + 1
240 GO TO 40
241 END IF
242*
243*
244 DO 45 bw_num = 1, nbw
245*
246 ierr( 1 ) = 0
247*
248 bwl = bwlval( bw_num )
249 IF( bwl.LT.1 ) THEN
250 IF( iam.EQ.0 )
251 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
252 ierr( 1 ) = 1
253 END IF
254*
255 bwu = bwuval( bw_num )
256 IF( bwu.LT.1 ) THEN
257 IF( iam.EQ.0 )
258 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
259 ierr( 1 ) = 1
260 END IF
261*
262 IF( bwl.GT.n-1 ) THEN
263 IF( iam.EQ.0 ) THEN
264 ierr( 1 ) = 1
265 ENDIF
266 END IF
267*
268 IF( bwu.GT.n-1 ) THEN
269 IF( iam.EQ.0 ) THEN
270 ierr( 1 ) = 1
271 ENDIF
272 END IF
273*
274* Check all processes for an error
275*
276 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
277 $ -1, 0 )
278*
279 IF( ierr( 1 ).GT.0 ) THEN
280 kskip = kskip + 1
281 GO TO 45
282 END IF
283*
284 DO 30 k = 1, nnb
285*
286 ierr( 1 ) = 0
287*
288 nb = nbval( k )
289 IF( nb.LT.0 ) THEN
290 nb =( (n-(npcol-1)*max(bwl,bwu)-1)/npcol + 1 )
291 $ + max(bwl,bwu)
292 nb = max( nb, 2*max(bwl,bwu) )
293 nb = min( n, nb )
294 END IF
295*
296* Make sure NB is legal
297*
298 ierr( 1 ) = 0
299 IF( nb.LT.min( 2*max(bwl,bwu), n ) ) THEN
300 ierr( 1 ) = 1
301 END IF
302*
303* Check all processes for an error
304*
305 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
306 $ -1, 0 )
307*
308 IF( ierr( 1 ).GT.0 ) THEN
309 kskip = kskip + 1
310 GO TO 30
311 END IF
312*
313* Padding constants
314*
315 np = numroc( (bwl+bwu+1), (bwl+bwu+1),
316 $ myrow, 0, nprow )
317 nq = numroc( n, nb, mycol, 0, npcol )
318*
319 IF( check ) THEN
320 iprepad = ((bwl+bwu+1)+10)
321 imidpad = 10
322 ipostpad = ((bwl+bwu+1)+10)
323 ELSE
324 iprepad = 0
325 imidpad = 0
326 ipostpad = 0
327 END IF
328*
329* Initialize the array descriptor for the matrix A
330*
331 CALL descinit( desca2d, (bwl+bwu+1), n,
332 $ (bwl+bwu+1), nb, 0, 0,
333 $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
334*
335* Convert this to 1D descriptor
336*
337 desca( 1 ) = 501
338 desca( 3 ) = n
339 desca( 4 ) = nb
340 desca( 5 ) = 0
341 desca( 2 ) = ictxt
342 desca( 6 ) = ((bwl+bwu+1)+10)
343 desca( 7 ) = 0
344*
345 ierr_temp = ierr( 1 )
346 ierr( 1 ) = 0
347 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
348*
349* Check all processes for an error
350*
351 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
352*
353 IF( ierr( 1 ).LT.0 ) THEN
354 IF( iam.EQ.0 )
355 $ WRITE( nout, fmt = 9997 ) 'descriptor'
356 kskip = kskip + 1
357 GO TO 30
358 END IF
359*
360* Assign pointers into MEM for SCALAPACK arrays, A is
361* allocated starting at position MEM( IPREPAD+1 )
362*
363 free_ptr = 1
364 ipb = 0
365*
366* Save room for prepadding
367 free_ptr = free_ptr + iprepad
368*
369 ipa = free_ptr
370 free_ptr = free_ptr + desca2d( lld_ )*
371 $ desca2d( nb_ )
372 $ + ipostpad
373*
374* Add memory for fillin
375* Fillin space needs to store:
376* Fillin spike:
377* Contribution to previous proc's diagonal block of
378* reduced system:
379* Off-diagonal block of reduced system:
380* Diagonal block of reduced system:
381*
382 fillin_size =
383 $ nb*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
384*
385* Claim memory for fillin
386*
387 free_ptr = free_ptr + iprepad
388 ip_fillin = free_ptr
389 free_ptr = free_ptr + fillin_size
390*
391* Workspace needed by computational routines:
392*
393 ipw_size = 0
394*
395* factorization:
396*
397 ipw_size = max(bwl,bwu)*max(bwl,bwu)
398*
399* Claim memory for IPW
400*
401 ipw = free_ptr
402 free_ptr = free_ptr + ipw_size
403*
404* Check for adequate memory for problem size
405*
406 ierr( 1 ) = 0
407 IF( free_ptr.GT.memsiz ) THEN
408 IF( iam.EQ.0 )
409 $ WRITE( nout, fmt = 9996 )
410 $ 'divide and conquer factorization',
411 $ (free_ptr )*zplxsz
412 ierr( 1 ) = 1
413 END IF
414*
415* Check all processes for an error
416*
417 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
418 $ 1, -1, 0 )
419*
420 IF( ierr( 1 ).GT.0 ) THEN
421 IF( iam.EQ.0 )
422 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
423 kskip = kskip + 1
424 GO TO 30
425 END IF
426*
427* Worksize needed for LAPRNT
428 worksiz = max( ((bwl+bwu+1)+10), nb )
429*
430 IF( check ) THEN
431*
432* Calculate the amount of workspace required by
433* the checking routines.
434*
435* PZLANGE
436 worksiz = max( worksiz, desca2d( nb_ ) )
437*
438* PZDBLASCHK
439 worksiz = max( worksiz,
440 $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
441 END IF
442*
443 free_ptr = free_ptr + iprepad
444 ip_driver_w = free_ptr
445 free_ptr = free_ptr + worksiz + ipostpad
446*
447*
448* Check for adequate memory for problem size
449*
450 ierr( 1 ) = 0
451 IF( free_ptr.GT.memsiz ) THEN
452 IF( iam.EQ.0 )
453 $ WRITE( nout, fmt = 9996 ) 'factorization',
454 $ ( free_ptr )*zplxsz
455 ierr( 1 ) = 1
456 END IF
457*
458* Check all processes for an error
459*
460 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
461 $ 1, -1, 0 )
462*
463 IF( ierr( 1 ).GT.0 ) THEN
464 IF( iam.EQ.0 )
465 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
466 kskip = kskip + 1
467 GO TO 30
468 END IF
469*
470 CALL pzbmatgen( ictxt, 'G', 'D', bwl, bwu, n,
471 $ (bwl+bwu+1), nb, mem( ipa ),
472 $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
473 $ mycol, nprow, npcol )
474*
475 CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
476 $ ((bwl+bwu+1)+10), iprepad, ipostpad,
477 $ padval )
478*
479 CALL pzfillpad( ictxt, worksiz, 1,
480 $ mem( ip_driver_w-iprepad ), worksiz,
481 $ iprepad, ipostpad, padval )
482*
483* Calculate norm of A for residual error-checking
484*
485 IF( check ) THEN
486*
487 anorm = pzlange( '1', (bwl+bwu+1),
488 $ n, mem( ipa ), 1, 1,
489 $ desca2d, mem( ip_driver_w ) )
490 CALL pzchekpad( ictxt, 'PZLANGE', np, nq,
491 $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
492 $ iprepad, ipostpad, padval )
493 CALL pzchekpad( ictxt, 'PZLANGE',
494 $ worksiz, 1,
495 $ mem( ip_driver_w-iprepad ), worksiz,
496 $ iprepad, ipostpad, padval )
497 END IF
498*
499*
500 CALL slboot()
501 CALL blacs_barrier( ictxt, 'All' )
502*
503* Perform factorization
504*
505 CALL sltimer( 1 )
506*
507 CALL pzdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
508 $ mem( ip_fillin ), fillin_size, mem( ipw ),
509 $ ipw_size, info )
510*
511 CALL sltimer( 1 )
512*
513 IF( info.NE.0 ) THEN
514 IF( iam.EQ.0 ) THEN
515 WRITE( nout, fmt = * ) 'PZDBTRF INFO=', info
516 ENDIF
517 kfail = kfail + 1
518 GO TO 30
519 END IF
520*
521 IF( check ) THEN
522*
523* Check for memory overwrite in factorization
524*
525 CALL pzchekpad( ictxt, 'PZDBTRF', np,
526 $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
527 $ iprepad, ipostpad, padval )
528 END IF
529*
530*
531* Loop over the different values for NRHS
532*
533 DO 20 hh = 1, nnr
534*
535 ierr( 1 ) = 0
536*
537 nrhs = nrval( hh )
538*
539* Initialize Array Descriptor for rhs
540*
541 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
542 $ ictxtb, nb+10, ierr( 1 ) )
543*
544* Convert this to 1D descriptor
545*
546 descb( 1 ) = 502
547 descb( 3 ) = n
548 descb( 4 ) = nb
549 descb( 5 ) = 0
550 descb( 2 ) = ictxt
551 descb( 6 ) = descb2d( lld_ )
552 descb( 7 ) = 0
553*
554* reset free_ptr to reuse space for right hand sides
555*
556 IF( ipb .GT. 0 ) THEN
557 free_ptr = ipb
558 ENDIF
559*
560 free_ptr = free_ptr + iprepad
561 ipb = free_ptr
562 free_ptr = free_ptr + nrhs*descb2d( lld_ )
563 $ + ipostpad
564*
565* Allocate workspace for workspace in TRS routine:
566*
567 ipw_solve_size = (max(bwl,bwu)*nrhs)
568*
569 ipw_solve = free_ptr
570 free_ptr = free_ptr + ipw_solve_size
571*
572 ierr( 1 ) = 0
573 IF( free_ptr.GT.memsiz ) THEN
574 IF( iam.EQ.0 )
575 $ WRITE( nout, fmt = 9996 )'solve',
576 $ ( free_ptr )*zplxsz
577 ierr( 1 ) = 1
578 END IF
579*
580* Check all processes for an error
581*
582 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
583 $ ierr, 1, -1, 0 )
584*
585 IF( ierr( 1 ).GT.0 ) THEN
586 IF( iam.EQ.0 )
587 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
588 kskip = kskip + 1
589 GO TO 15
590 END IF
591*
592 myrhs_size = numroc( n, nb, mycol, 0, npcol )
593*
594* Generate RHS
595*
596 CALL pzmatgen(ictxtb, 'No', 'No',
597 $ descb2d( m_ ), descb2d( n_ ),
598 $ descb2d( mb_ ), descb2d( nb_ ),
599 $ mem( ipb ),
600 $ descb2d( lld_ ), descb2d( rsrc_ ),
601 $ descb2d( csrc_ ),
602 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
603 $ myrow, npcol, nprow )
604*
605 IF( check ) THEN
606 CALL pzfillpad( ictxtb, nb, nrhs,
607 $ mem( ipb-iprepad ),
608 $ descb2d( lld_ ),
609 $ iprepad, ipostpad,
610 $ padval )
611 CALL pzfillpad( ictxt, worksiz, 1,
612 $ mem( ip_driver_w-iprepad ),
613 $ worksiz, iprepad,
614 $ ipostpad, padval )
615 END IF
616*
617*
618 CALL blacs_barrier( ictxt, 'All')
619 CALL sltimer( 2 )
620*
621* Solve linear system via factorization
622*
623 CALL pzdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
624 $ 1, desca, mem( ipb ), 1, descb,
625 $ mem( ip_fillin ), fillin_size,
626 $ mem( ipw_solve ), ipw_solve_size,
627 $ info )
628*
629 CALL sltimer( 2 )
630*
631 IF( info.NE.0 ) THEN
632 IF( iam.EQ.0 )
633 $ WRITE( nout, fmt = * ) 'PZDBTRS INFO=', info
634 kfail = kfail + 1
635 passed = 'FAILED'
636 GO TO 20
637 END IF
638*
639 IF( check ) THEN
640*
641* check for memory overwrite
642*
643 CALL pzchekpad( ictxt, 'PZDBTRS-work',
644 $ worksiz, 1,
645 $ mem( ip_driver_w-iprepad ),
646 $ worksiz, iprepad,
647 $ ipostpad, padval )
648*
649* check the solution to rhs
650*
651 sresid = zero
652*
653 CALL pzdblaschk( 'N', 'D', trans,
654 $ n, bwl, bwu, nrhs,
655 $ mem( ipb ), 1, 1, descb2d,
656 $ iaseed, mem( ipa ), 1, 1, desca2d,
657 $ ibseed, anorm, sresid,
658 $ mem( ip_driver_w ), worksiz )
659*
660 IF( iam.EQ.0 ) THEN
661 IF( sresid.GT.thresh )
662 $ WRITE( nout, fmt = 9985 ) sresid
663 END IF
664*
665* The second test is a NaN trap
666*
667 IF( ( sresid.LE.thresh ).AND.
668 $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
669 kpass = kpass + 1
670 passed = 'PASSED'
671 ELSE
672 kfail = kfail + 1
673 passed = 'FAILED'
674 END IF
675*
676 END IF
677*
678 15 CONTINUE
679* Skipped tests jump to here to print out "SKIPPED"
680*
681* Gather maximum of all CPU and WALL clock timings
682*
683 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
684 $ wtime )
685 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
686 $ ctime )
687*
688* Print results
689*
690 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
691*
692 nops = 0
693 nops2 = 0
694*
695 n_first = nb
696 nprocs_real = ( n-1 )/nb + 1
697 n_last = mod( n-1, nb ) + 1
698*
699* 2 N bwl bwu + N (bwl) flops
700* for LU factorization
701*
702 nops = 2*(dble(n)*dble(bwl)*
703 $ dble(bwu)) +
704 $ (dble(n)*dble(bwl))
705*
706* nrhs * 2 N*(bwl+bwu) flops for LU solve.
707*
708 nops = nops +
709 $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
710 $ *dble(nrhs))
711*
712* Multiply by 4 to get complex count
713*
714 nops = nops * dble(4)
715*
716* Second calc to represent actual hardware speed
717*
718* 2*N_FIRST bwl*bwu Flops for LU
719* factorization in proc 1
720*
721 nops2 = 2*( (dble(n_first)*
722 $ dble(bwl)*dble(bwu)))
723*
724 IF ( nprocs_real .GT. 1) THEN
725* 8 N_LAST bwl*bwu
726* flops for LU and spike
727* calc in last processor
728*
729 nops2 = nops2 +
730 $ 8*( (dble(n_last)*dble(bwl)
731 $ *dble(bwu)) )
732 ENDIF
733*
734 IF ( nprocs_real .GT. 2) THEN
735* 8 NB bwl*bwu flops for LU and spike
736* calc in other processors
737*
738 nops2 = nops2 + (nprocs_real-2)*
739 $ 8*( (dble(nb)*dble(bwl)
740 $ *dble(bwu)) )
741 ENDIF
742*
743* Reduced system
744*
745 nops2 = nops2 +
746 $ 2*( nprocs_real-1 ) *
747 $ ( bwl*bwu*bwl/3 )
748 IF( nprocs_real .GT. 1 ) THEN
749 nops2 = nops2 +
750 $ 2*( nprocs_real-2 ) *
751 $ (2*bwl*bwu*bwl)
752 ENDIF
753*
754* Solve stage
755*
756* nrhs*2 n_first*
757* (bwl+bwu)
758* flops for L,U solve in proc 1.
759*
760 nops2 = nops2 +
761 $ 2*
762 $ dble(n_first)*
763 $ dble(nrhs) *
764 $ ( dble(bwl)+dble(bwu))
765*
766 IF ( nprocs_real .GT. 1 ) THEN
767*
768* 2*nrhs*2 n_last
769* (bwl+bwu)
770* flops for LU solve in other procs
771*
772 nops2 = nops2 +
773 $ 4*
774 $ (dble(n_last)*(dble(bwl)+
775 $ dble(bwu)))*dble(nrhs)
776 ENDIF
777*
778 IF ( nprocs_real .GT. 2 ) THEN
779*
780* 2*nrhs*2 NB
781* (bwl+bwu)
782* flops for LU solve in other procs
783*
784 nops2 = nops2 +
785 $ ( nprocs_real-2)*2*
786 $ ( (dble(nb)*(dble(bwl)+
787 $ dble(bwu)))*dble(nrhs) )
788 ENDIF
789*
790* Reduced system
791*
792 nops2 = nops2 +
793 $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
794 IF( nprocs_real .GT. 1 ) THEN
795 nops2 = nops2 +
796 $ nrhs*( nprocs_real-2 ) *
797 $ ( 6 * bwl*bwu )
798 ENDIF
799*
800*
801* Multiply by 4 to get complex count
802*
803 nops2 = nops2 * dble(4)
804*
805* Calculate total megaflops - factorization and/or
806* solve -- for WALL and CPU time, and print output
807*
808* Print WALL time if machine supports it
809*
810 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
811 tmflops = nops /
812 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
813 ELSE
814 tmflops = 0.0d+0
815 END IF
816*
817 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
818 tmflops2 = nops2 /
819 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
820 ELSE
821 tmflops2 = 0.0d+0
822 END IF
823*
824 IF( wtime( 2 ).GE.0.0d+0 )
825 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
826 $ n,
827 $ bwl, bwu,
828 $ nb, nrhs, nprow, npcol,
829 $ wtime( 1 ), wtime( 2 ), tmflops,
830 $ tmflops2, passed
831*
832* Print CPU time if machine supports it
833*
834 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
835 tmflops = nops /
836 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
837 ELSE
838 tmflops = 0.0d+0
839 END IF
840*
841 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
842 tmflops2 = nops2 /
843 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
844 ELSE
845 tmflops2 = 0.0d+0
846 END IF
847*
848 IF( ctime( 2 ).GE.0.0d+0 )
849 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
850 $ n,
851 $ bwl, bwu,
852 $ nb, nrhs, nprow, npcol,
853 $ ctime( 1 ), ctime( 2 ), tmflops,
854 $ tmflops2, passed
855*
856 END IF
857 20 CONTINUE
858*
859*
860 30 CONTINUE
861* NNB loop
862*
863 45 CONTINUE
864* BW[] loop
865*
866 40 CONTINUE
867* NMAT loop
868*
869 CALL blacs_gridexit( ictxt )
870 CALL blacs_gridexit( ictxtb )
871*
872 50 CONTINUE
873* NGRIDS DROPOUT
874 60 CONTINUE
875* NGRIDS loop
876*
877* Print ending messages and close output file
878*
879 IF( iam.EQ.0 ) THEN
880 ktests = kpass + kfail + kskip
881 WRITE( nout, fmt = * )
882 WRITE( nout, fmt = 9992 ) ktests
883 IF( check ) THEN
884 WRITE( nout, fmt = 9991 ) kpass
885 WRITE( nout, fmt = 9989 ) kfail
886 ELSE
887 WRITE( nout, fmt = 9990 ) kpass
888 END IF
889 WRITE( nout, fmt = 9988 ) kskip
890 WRITE( nout, fmt = * )
891 WRITE( nout, fmt = * )
892 WRITE( nout, fmt = 9987 )
893 IF( nout.NE.6 .AND. nout.NE.0 )
894 $ CLOSE ( nout )
895 END IF
896*
897 CALL blacs_exit( 0 )
898*
899 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
900 $ '; It should be at least 1' )
901 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
902 $ i4 )
903 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
904 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
905 $ i11 )
906 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
907 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
908 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
909 $ '-------- -------- -------- ------' )
910 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
911 $ 1x,i4,1x,i4,1x,f9.3,
912 $ f9.4, f9.2, f9.2, 1x, a6 )
913 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
914 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
915 9990 FORMAT( i5, ' tests completed without checking.' )
916 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
917 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
918 9987 FORMAT( 'END OF TESTS.' )
919 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
920 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
921*
922 stop
923*
924* End of PZDBTRS_DRIVER
925*
926 END
927*
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pzmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pzbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pzbmatgen.f:5
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pzchekpad.f:3
program pzdbdriver
Definition pzdbdriver.f:1
subroutine pzdbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pzdbinfo.f:6
subroutine pzdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pzdblaschk.f:4
subroutine pzdbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
Definition pzdbtrf.f:3
subroutine pzdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzdbtrs.f:3
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pzfillpad.f:2
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
Definition pzlange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724