SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pddtdriver.f
Go to the documentation of this file.
1 PROGRAM pddtdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PDDTDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by DDT. This test driver performs an
15* A = L*U factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* DBLESZ INTEGER, default = 8 bytes.
56* INTGSZ and DBLESZ indicate the length in bytes on the
57* given platform for an integer and a double precision real.
58* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
59* All arrays used by ScaLAPACK routines are allocated from
60* this array and referenced by pointers. The integer IPB,
61* for example, is a pointer to the starting element of MEM for
62* the solution vector(s) B.
63*
64* =====================================================================
65*
66* Code Developer: Andrew J. Cleary, University of Tennessee.
67* Current address: Lawrence Livermore National Labs.
68* This version released: August, 2001.
69*
70* =====================================================================
71*
72* .. Parameters ..
73 INTEGER totmem
74 parameter( totmem = 3000000 )
75 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76 $ lld_, mb_, m_, nb_, n_, rsrc_
77 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
80*
81 DOUBLE PRECISION zero
82 INTEGER dblesz, memsiz, ntests
83 DOUBLE PRECISION padval
84 parameter( dblesz = 8,
85 $ memsiz = totmem / dblesz, ntests = 20,
86 $ padval = -9923.0d+0, zero = 0.0d+0 )
87 INTEGER int_one
88 parameter( int_one = 1 )
89* ..
90* .. Local Scalars ..
91 LOGICAL check
92 CHARACTER trans
93 CHARACTER*6 passed
94 CHARACTER*80 outfile
95 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
96 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
97 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
98 $ iprepad, ipw, ipw_size, ipw_solve,
99 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102 $ nnr, nout, np, npcol, nprocs, nprocs_real,
103 $ nprow, nq, nrhs, n_first, n_last, worksiz
104 REAL thresh
105 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
106 $ tmflops2
107* ..
108* .. Local Arrays ..
109 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
110 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
111 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
112 $ nrval( ntests ), nval( ntests ),
113 $ pval( ntests ), qval( ntests )
114 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
115* ..
116* .. External Subroutines ..
117 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
119 $ blacs_pinfo, descinit, igsum2d, pdbmatgen,
123* ..
124* .. External Functions ..
125 INTEGER numroc
126 LOGICAL lsame
127 DOUBLE PRECISION pdlange
128 EXTERNAL lsame, numroc, pdlange
129* ..
130* .. Intrinsic Functions ..
131 INTRINSIC dble, max, min, mod
132* ..
133* .. Data Statements ..
134 DATA kfail, kpass, kskip, ktests / 4*0 /
135* ..
136*
137*
138*
139* .. Executable Statements ..
140*
141* Get starting information
142*
143 CALL blacs_pinfo( iam, nprocs )
144 iaseed = 100
145 ibseed = 200
146*
147 CALL pddtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
148 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
149 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
150 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
151*
152 check = ( thresh.GE.0.0d+0 )
153*
154* Print headings
155*
156 IF( iam.EQ.0 ) THEN
157 WRITE( nout, fmt = * )
158 WRITE( nout, fmt = 9995 )
159 WRITE( nout, fmt = 9994 )
160 WRITE( nout, fmt = * )
161 END IF
162*
163* Loop over different process grids
164*
165 DO 60 i = 1, ngrids
166*
167 nprow = pval( i )
168 npcol = qval( i )
169*
170* Make sure grid information is correct
171*
172 ierr( 1 ) = 0
173 IF( nprow.LT.1 ) THEN
174 IF( iam.EQ.0 )
175 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
176 ierr( 1 ) = 1
177 ELSE IF( npcol.LT.1 ) THEN
178 IF( iam.EQ.0 )
179 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
180 ierr( 1 ) = 1
181 ELSE IF( nprow*npcol.GT.nprocs ) THEN
182 IF( iam.EQ.0 )
183 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
184 ierr( 1 ) = 1
185 END IF
186*
187 IF( ierr( 1 ).GT.0 ) THEN
188 IF( iam.EQ.0 )
189 $ WRITE( nout, fmt = 9997 ) 'grid'
190 kskip = kskip + 1
191 GO TO 50
192 END IF
193*
194* Define process grid
195*
196 CALL blacs_get( -1, 0, ictxt )
197 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
198*
199*
200* Define transpose process grid
201*
202 CALL blacs_get( -1, 0, ictxtb )
203 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
204*
205* Go to bottom of process grid loop if this case doesn't use my
206* process
207*
208 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209*
210 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
211 GO TO 50
212 ENDIF
213*
214 DO 40 j = 1, nmat
215*
216 ierr( 1 ) = 0
217*
218 n = nval( j )
219*
220* Make sure matrix information is correct
221*
222 IF( n.LT.1 ) THEN
223 IF( iam.EQ.0 )
224 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
225 ierr( 1 ) = 1
226 END IF
227*
228* Check all processes for an error
229*
230 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
231 $ -1, 0 )
232*
233 IF( ierr( 1 ).GT.0 ) THEN
234 IF( iam.EQ.0 )
235 $ WRITE( nout, fmt = 9997 ) 'size'
236 kskip = kskip + 1
237 GO TO 40
238 END IF
239*
240*
241 DO 45 bw_num = 1, nbw
242*
243 ierr( 1 ) = 0
244*
245 bwl = 1
246 IF( bwl.LT.1 ) THEN
247 IF( iam.EQ.0 )
248 $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
249 ierr( 1 ) = 1
250 END IF
251*
252 bwu = 1
253 IF( bwu.LT.1 ) THEN
254 IF( iam.EQ.0 )
255 $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
256 ierr( 1 ) = 1
257 END IF
258*
259 IF( bwl.GT.n-1 ) THEN
260 IF( iam.EQ.0 ) THEN
261 ierr( 1 ) = 1
262 ENDIF
263 END IF
264*
265 IF( bwu.GT.n-1 ) THEN
266 IF( iam.EQ.0 ) THEN
267 ierr( 1 ) = 1
268 ENDIF
269 END IF
270*
271* Check all processes for an error
272*
273 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
274 $ -1, 0 )
275*
276 IF( ierr( 1 ).GT.0 ) THEN
277 kskip = kskip + 1
278 GO TO 45
279 END IF
280*
281 DO 30 k = 1, nnb
282*
283 ierr( 1 ) = 0
284*
285 nb = nbval( k )
286 IF( nb.LT.0 ) THEN
287 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
288 $ + int_one
289 nb = max( nb, 2*int_one )
290 nb = min( n, nb )
291 END IF
292*
293* Make sure NB is legal
294*
295 ierr( 1 ) = 0
296 IF( nb.LT.min( 2*int_one, n ) ) THEN
297 ierr( 1 ) = 1
298 END IF
299*
300* Check all processes for an error
301*
302 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
303 $ -1, 0 )
304*
305 IF( ierr( 1 ).GT.0 ) THEN
306 kskip = kskip + 1
307 GO TO 30
308 END IF
309*
310* Padding constants
311*
312 np = numroc( (3), (3),
313 $ myrow, 0, nprow )
314 nq = numroc( n, nb, mycol, 0, npcol )
315*
316 IF( check ) THEN
317 iprepad = ((3)+10)
318 imidpad = 10
319 ipostpad = ((3)+10)
320 ELSE
321 iprepad = 0
322 imidpad = 0
323 ipostpad = 0
324 END IF
325*
326* Initialize the array descriptor for the matrix A
327*
328 CALL descinit( desca2d, n, (3),
329 $ nb, 1, 0, 0,
330 $ ictxtb, nb+10, ierr( 1 ) )
331*
332* Convert this to 1D descriptor
333*
334 desca( 1 ) = 501
335 desca( 3 ) = n
336 desca( 4 ) = nb
337 desca( 5 ) = 0
338 desca( 2 ) = ictxt
339 desca( 6 ) = ((3)+10)
340 desca( 7 ) = 0
341*
342 ierr_temp = ierr( 1 )
343 ierr( 1 ) = 0
344 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
345*
346* Check all processes for an error
347*
348 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
349*
350 IF( ierr( 1 ).LT.0 ) THEN
351 IF( iam.EQ.0 )
352 $ WRITE( nout, fmt = 9997 ) 'descriptor'
353 kskip = kskip + 1
354 GO TO 30
355 END IF
356*
357* Assign pointers into MEM for SCALAPACK arrays, A is
358* allocated starting at position MEM( IPREPAD+1 )
359*
360 free_ptr = 1
361 ipb = 0
362*
363* Save room for prepadding
364 free_ptr = free_ptr + iprepad
365*
366 ipa = free_ptr
367 free_ptr = free_ptr + (nb+10)*(3)
368 $ + ipostpad
369*
370* Add memory for fillin
371* Fillin space needs to store:
372* Fillin spike:
373* Contribution to previous proc's diagonal block of
374* reduced system:
375* Off-diagonal block of reduced system:
376* Diagonal block of reduced system:
377*
378 fillin_size =
379 $ (12*npcol+3*nb)
380*
381* Claim memory for fillin
382*
383 free_ptr = free_ptr + iprepad
384 ip_fillin = free_ptr
385 free_ptr = free_ptr + fillin_size
386*
387* Workspace needed by computational routines:
388*
389 ipw_size = 0
390*
391* factorization:
392*
393 ipw_size = 8*npcol
394*
395* Claim memory for IPW
396*
397 ipw = free_ptr
398 free_ptr = free_ptr + ipw_size
399*
400* Check for adequate memory for problem size
401*
402 ierr( 1 ) = 0
403 IF( free_ptr.GT.memsiz ) THEN
404 IF( iam.EQ.0 )
405 $ WRITE( nout, fmt = 9996 )
406 $ 'divide and conquer factorization',
407 $ (free_ptr )*dblesz
408 ierr( 1 ) = 1
409 END IF
410*
411* Check all processes for an error
412*
413 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
414 $ 1, -1, 0 )
415*
416 IF( ierr( 1 ).GT.0 ) THEN
417 IF( iam.EQ.0 )
418 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
419 kskip = kskip + 1
420 GO TO 30
421 END IF
422*
423* Worksize needed for LAPRNT
424 worksiz = max( ((3)+10), nb )
425*
426 IF( check ) THEN
427*
428* Calculate the amount of workspace required by
429* the checking routines.
430*
431* PDLANGE
432 worksiz = max( worksiz, desca2d( nb_ ) )
433*
434* PDDTLASCHK
435 worksiz = max( worksiz,
436 $ max(5,nb)+2*nb )
437 END IF
438*
439 free_ptr = free_ptr + iprepad
440 ip_driver_w = free_ptr
441 free_ptr = free_ptr + worksiz + ipostpad
442*
443*
444* Check for adequate memory for problem size
445*
446 ierr( 1 ) = 0
447 IF( free_ptr.GT.memsiz ) THEN
448 IF( iam.EQ.0 )
449 $ WRITE( nout, fmt = 9996 ) 'factorization',
450 $ ( free_ptr )*dblesz
451 ierr( 1 ) = 1
452 END IF
453*
454* Check all processes for an error
455*
456 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
457 $ 1, -1, 0 )
458*
459 IF( ierr( 1 ).GT.0 ) THEN
460 IF( iam.EQ.0 )
461 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
462 kskip = kskip + 1
463 GO TO 30
464 END IF
465*
466 CALL pdbmatgen( ictxt, 'T', 'D', bwl, bwu, n, (3), nb,
467 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
468 $ mycol, nprow, npcol )
469 CALL pdfillpad( ictxt, nq, np, mem( ipa-iprepad ),
470 $ nb+10, iprepad, ipostpad,
471 $ padval )
472*
473 CALL pdfillpad( ictxt, worksiz, 1,
474 $ mem( ip_driver_w-iprepad ), worksiz,
475 $ iprepad, ipostpad, padval )
476*
477* Calculate norm of A for residual error-checking
478*
479 IF( check ) THEN
480*
481 anorm = pdlange( 'I', n,
482 $ (3), mem( ipa ), 1, 1,
483 $ desca2d, mem( ip_driver_w ) )
484 CALL pdchekpad( ictxt, 'PDLANGE', nq, np,
485 $ mem( ipa-iprepad ), nb+10,
486 $ iprepad, ipostpad, padval )
487 CALL pdchekpad( ictxt, 'PDLANGE',
488 $ worksiz, 1,
489 $ mem( ip_driver_w-iprepad ), worksiz,
490 $ iprepad, ipostpad, padval )
491 END IF
492*
493*
494 CALL slboot()
495 CALL blacs_barrier( ictxt, 'All' )
496*
497* Perform factorization
498*
499 CALL sltimer( 1 )
500*
501 CALL pddttrf( n, mem( ipa+2*( nb+10 ) ),
502 $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
503 $ desca, mem( ip_fillin ), fillin_size,
504 $ mem( ipw ), ipw_size, info )
505*
506 CALL sltimer( 1 )
507*
508 IF( info.NE.0 ) THEN
509 IF( iam.EQ.0 ) THEN
510 WRITE( nout, fmt = * ) 'PDDTTRF INFO=', info
511 ENDIF
512 kfail = kfail + 1
513 GO TO 30
514 END IF
515*
516 IF( check ) THEN
517*
518* Check for memory overwrite in factorization
519*
520 CALL pdchekpad( ictxt, 'PDDTTRF', nq,
521 $ np, mem( ipa-iprepad ), nb+10,
522 $ iprepad, ipostpad, padval )
523 END IF
524*
525*
526* Loop over the different values for NRHS
527*
528 DO 20 hh = 1, nnr
529*
530 ierr( 1 ) = 0
531*
532 nrhs = nrval( hh )
533*
534* Initialize Array Descriptor for rhs
535*
536 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
537 $ ictxtb, nb+10, ierr( 1 ) )
538*
539* Convert this to 1D descriptor
540*
541 descb( 1 ) = 502
542 descb( 3 ) = n
543 descb( 4 ) = nb
544 descb( 5 ) = 0
545 descb( 2 ) = ictxt
546 descb( 6 ) = descb2d( lld_ )
547 descb( 7 ) = 0
548*
549* reset free_ptr to reuse space for right hand sides
550*
551 IF( ipb .GT. 0 ) THEN
552 free_ptr = ipb
553 ENDIF
554*
555 free_ptr = free_ptr + iprepad
556 ipb = free_ptr
557 free_ptr = free_ptr + nrhs*descb2d( lld_ )
558 $ + ipostpad
559*
560* Allocate workspace for workspace in TRS routine:
561*
562 ipw_solve_size = 10*npcol+4*nrhs
563*
564 ipw_solve = free_ptr
565 free_ptr = free_ptr + ipw_solve_size
566*
567 ierr( 1 ) = 0
568 IF( free_ptr.GT.memsiz ) THEN
569 IF( iam.EQ.0 )
570 $ WRITE( nout, fmt = 9996 )'solve',
571 $ ( free_ptr )*dblesz
572 ierr( 1 ) = 1
573 END IF
574*
575* Check all processes for an error
576*
577 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
578 $ ierr, 1, -1, 0 )
579*
580 IF( ierr( 1 ).GT.0 ) THEN
581 IF( iam.EQ.0 )
582 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
583 kskip = kskip + 1
584 GO TO 15
585 END IF
586*
587 myrhs_size = numroc( n, nb, mycol, 0, npcol )
588*
589* Generate RHS
590*
591 CALL pdmatgen(ictxtb, 'No', 'No',
592 $ descb2d( m_ ), descb2d( n_ ),
593 $ descb2d( mb_ ), descb2d( nb_ ),
594 $ mem( ipb ),
595 $ descb2d( lld_ ), descb2d( rsrc_ ),
596 $ descb2d( csrc_ ),
597 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
598 $ myrow, npcol, nprow )
599*
600 IF( check ) THEN
601 CALL pdfillpad( ictxtb, nb, nrhs,
602 $ mem( ipb-iprepad ),
603 $ descb2d( lld_ ),
604 $ iprepad, ipostpad,
605 $ padval )
606 CALL pdfillpad( ictxt, worksiz, 1,
607 $ mem( ip_driver_w-iprepad ),
608 $ worksiz, iprepad,
609 $ ipostpad, padval )
610 END IF
611*
612*
613 CALL blacs_barrier( ictxt, 'All')
614 CALL sltimer( 2 )
615*
616* Solve linear system via factorization
617*
618 CALL pddttrs( trans, n, nrhs,
619 $ mem( ipa+2*( nb+10 ) ),
620 $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
621 $ 1, desca, mem( ipb ), 1, descb,
622 $ mem( ip_fillin ), fillin_size,
623 $ mem( ipw_solve ), ipw_solve_size,
624 $ info )
625*
626 CALL sltimer( 2 )
627*
628 IF( info.NE.0 ) THEN
629 IF( iam.EQ.0 )
630 $ WRITE( nout, fmt = * ) 'PDDTTRS INFO=', info
631 kfail = kfail + 1
632 passed = 'FAILED'
633 GO TO 20
634 END IF
635*
636 IF( check ) THEN
637*
638* check for memory overwrite
639*
640 CALL pdchekpad( ictxt, 'PDDTTRS-work',
641 $ worksiz, 1,
642 $ mem( ip_driver_w-iprepad ),
643 $ worksiz, iprepad,
644 $ ipostpad, padval )
645*
646* check the solution to rhs
647*
648 sresid = zero
649*
650* Reset descriptor describing A to 1-by-P grid for
651* use in banded utility routines
652*
653 CALL descinit( desca2d, (3), n,
654 $ (3), nb, 0, 0,
655 $ ictxt, (3), ierr( 1 ) )
656 CALL pddtlaschk( 'N', 'D', trans,
657 $ n, bwl, bwu, nrhs,
658 $ mem( ipb ), 1, 1, descb2d,
659 $ iaseed, mem( ipa ), 1, 1, desca2d,
660 $ ibseed, anorm, sresid,
661 $ mem( ip_driver_w ), worksiz )
662*
663 IF( iam.EQ.0 ) THEN
664 IF( sresid.GT.thresh )
665 $ WRITE( nout, fmt = 9985 ) sresid
666 END IF
667*
668* The second test is a NaN trap
669*
670 IF( ( sresid.LE.thresh ).AND.
671 $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
672 kpass = kpass + 1
673 passed = 'PASSED'
674 ELSE
675 kfail = kfail + 1
676 passed = 'FAILED'
677 END IF
678*
679 END IF
680*
681 15 CONTINUE
682* Skipped tests jump to here to print out "SKIPPED"
683*
684* Gather maximum of all CPU and WALL clock timings
685*
686 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
687 $ wtime )
688 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
689 $ ctime )
690*
691* Print results
692*
693 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
694*
695 nops = 0
696 nops2 = 0
697*
698 n_first = nb
699 nprocs_real = ( n-1 )/nb + 1
700 n_last = mod( n-1, nb ) + 1
701*
702* 2 N bwl INT_ONE + N (bwl) flops
703* for LU factorization
704*
705 nops = 2*(dble(n)*dble(bwl)*
706 $ dble(int_one)) +
707 $ (dble(n)*dble(bwl))
708*
709* nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
710*
711 nops = nops +
712 $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
713 $ *dble(nrhs))
714*
715* Second calc to represent actual hardware speed
716*
717* 2*N_FIRST bwl*bwu Flops for LU
718* factorization in proc 1
719*
720 nops2 = 2*( (dble(n_first)*
721 $ dble(bwl)*dble(bwu)))
722*
723 IF ( nprocs_real .GT. 1) THEN
724* 8 N_LAST bwl*INT_ONE
725* flops for LU and spike
726* calc in last processor
727*
728 nops2 = nops2 +
729 $ 8*( (dble(n_last)*dble(bwl)
730 $ *dble(int_one)) )
731 ENDIF
732*
733 IF ( nprocs_real .GT. 2) THEN
734* 8 NB bwl*INT_ONE flops for LU and spike
735* calc in other processors
736*
737 nops2 = nops2 + (nprocs_real-2)*
738 $ 8*( (dble(nb)*dble(bwl)
739 $ *dble(int_one)) )
740 ENDIF
741*
742* Reduced system
743*
744 nops2 = nops2 +
745 $ 2*( nprocs_real-1 ) *
746 $ ( bwl*int_one*bwl/3 )
747 IF( nprocs_real .GT. 1 ) THEN
748 nops2 = nops2 +
749 $ 2*( nprocs_real-2 ) *
750 $ (2*bwl*int_one*bwl)
751 ENDIF
752*
753* Solve stage
754*
755* nrhs*2 n_first*
756* (bwl+INT_ONE)
757* flops for L,U solve in proc 1.
758*
759 nops2 = nops2 +
760 $ 2*
761 $ dble(n_first)*
762 $ dble(nrhs) *
763 $ ( dble(bwl)+dble(int_one))
764*
765 IF ( nprocs_real .GT. 1 ) THEN
766*
767* 2*nrhs*2 n_last
768* (bwl+INT_ONE)
769* flops for LU solve in other procs
770*
771 nops2 = nops2 +
772 $ 4*
773 $ (dble(n_last)*(dble(bwl)+
774 $ dble(int_one)))*dble(nrhs)
775 ENDIF
776*
777 IF ( nprocs_real .GT. 2 ) THEN
778*
779* 2*nrhs*2 NB
780* (bwl+INT_ONE)
781* flops for LU solve in other procs
782*
783 nops2 = nops2 +
784 $ ( nprocs_real-2)*2*
785 $ ( (dble(nb)*(dble(bwl)+
786 $ dble(int_one)))*dble(nrhs) )
787 ENDIF
788*
789* Reduced system
790*
791 nops2 = nops2 +
792 $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
793 IF( nprocs_real .GT. 1 ) THEN
794 nops2 = nops2 +
795 $ nrhs*( nprocs_real-2 ) *
796 $ ( 6 * bwl*int_one )
797 ENDIF
798*
799*
800* Calculate total megaflops - factorization and/or
801* solve -- for WALL and CPU time, and print output
802*
803* Print WALL time if machine supports it
804*
805 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
806 tmflops = nops /
807 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
808 ELSE
809 tmflops = 0.0d+0
810 END IF
811*
812 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
813 tmflops2 = nops2 /
814 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
815 ELSE
816 tmflops2 = 0.0d+0
817 END IF
818*
819 IF( wtime( 2 ).GE.0.0d+0 )
820 $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
821 $ n,
822 $ bwl, bwu,
823 $ nb, nrhs, nprow, npcol,
824 $ wtime( 1 ), wtime( 2 ), tmflops,
825 $ tmflops2, passed
826*
827* Print CPU time if machine supports it
828*
829 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
830 tmflops = nops /
831 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
832 ELSE
833 tmflops = 0.0d+0
834 END IF
835*
836 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
837 tmflops2 = nops2 /
838 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
839 ELSE
840 tmflops2 = 0.0d+0
841 END IF
842*
843 IF( ctime( 2 ).GE.0.0d+0 )
844 $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
845 $ n,
846 $ bwl, bwu,
847 $ nb, nrhs, nprow, npcol,
848 $ ctime( 1 ), ctime( 2 ), tmflops,
849 $ tmflops2, passed
850*
851 END IF
852 20 CONTINUE
853*
854*
855 30 CONTINUE
856* NNB loop
857*
858 45 CONTINUE
859* BW[] loop
860*
861 40 CONTINUE
862* NMAT loop
863*
864 CALL blacs_gridexit( ictxt )
865 CALL blacs_gridexit( ictxtb )
866*
867 50 CONTINUE
868* NGRIDS DROPOUT
869 60 CONTINUE
870* NGRIDS loop
871*
872* Print ending messages and close output file
873*
874 IF( iam.EQ.0 ) THEN
875 ktests = kpass + kfail + kskip
876 WRITE( nout, fmt = * )
877 WRITE( nout, fmt = 9992 ) ktests
878 IF( check ) THEN
879 WRITE( nout, fmt = 9991 ) kpass
880 WRITE( nout, fmt = 9989 ) kfail
881 ELSE
882 WRITE( nout, fmt = 9990 ) kpass
883 END IF
884 WRITE( nout, fmt = 9988 ) kskip
885 WRITE( nout, fmt = * )
886 WRITE( nout, fmt = * )
887 WRITE( nout, fmt = 9987 )
888 IF( nout.NE.6 .AND. nout.NE.0 )
889 $ CLOSE ( nout )
890 END IF
891*
892 CALL blacs_exit( 0 )
893*
894 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
895 $ '; It should be at least 1' )
896 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
897 $ i4 )
898 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
899 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
900 $ i11 )
901 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
902 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
903 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
904 $ '-------- -------- -------- ------' )
905 9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
906 $ 1x,i4,1x,i4,1x,f9.3,
907 $ f9.4, f9.2, f9.2, 1x, a6 )
908 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
909 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
910 9990 FORMAT( i5, ' tests completed without checking.' )
911 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
912 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
913 9987 FORMAT( 'END OF TESTS.' )
914 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
915 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
916*
917 stop
918*
919* End of PDDTTRS_DRIVER
920*
921 END
922*
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pdbmatgen.f:5
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
program pddtdriver
Definition pddtdriver.f:1
subroutine pddtinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pddtinfo.f:6
subroutine pddtlaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pddtlaschk.f:4
subroutine pddttrf(n, dl, d, du, ja, desca, af, laf, work, lwork, info)
Definition pddttrf.f:3
subroutine pddttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pddttrs.f:3
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724