SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcptdriver.f
Go to the documentation of this file.
1 PROGRAM pcptdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PCPTDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by CPT. This test driver performs an
15* A = L*L**H factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* CPLXSZ INTEGER, default = 8 bytes.
56* INTGSZ and CPLXSZ indicate the length in bytes on the
57* given platform for an integer and a single precision
58* complex.
59* MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60* All arrays used by ScaLAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPB,
62* for example, is a pointer to the starting element of MEM for
63* the solution vector(s) B.
64*
65* =====================================================================
66*
67* Code Developer: Andrew J. Cleary, University of Tennessee.
68* Current address: Lawrence Livermore National Labs.
69* This version released: August, 2001.
70*
71* =====================================================================
72*
73* .. Parameters ..
74 INTEGER totmem
75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81*
82 REAL zero
83 INTEGER cplxsz, memsiz, ntests
84 COMPLEX padval
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
88 $ zero = 0.0e+0 )
89 INTEGER int_one
90 parameter( int_one = 1 )
91* ..
92* .. Local Scalars ..
93 LOGICAL check
94 CHARACTER uplo
95 CHARACTER*6 passed
96 CHARACTER*80 outfile
97 INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
98 $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
99 $ imidpad, info, int_temp, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108* ..
109* .. Local Arrays ..
110 INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
111 $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
112 $ nbrval( ntests ), nbval( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
117* ..
118* .. External Subroutines ..
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121 $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
125* ..
126* .. External Functions ..
127 INTEGER numroc
128 LOGICAL lsame
129 REAL pclange
130 EXTERNAL lsame, numroc, pclange
131* ..
132* .. Intrinsic Functions ..
133 INTRINSIC dble, max, min, mod
134* ..
135* .. Data Statements ..
136 DATA kfail, kpass, kskip, ktests / 4*0 /
137* ..
138*
139*
140*
141* .. Executable Statements ..
142*
143* Get starting information
144*
145 CALL blacs_pinfo( iam, nprocs )
146 iaseed = 100
147 ibseed = 200
148*
149 CALL pcptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
150 $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
151 $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
152 $ qval, ntests, thresh, mem, iam, nprocs )
153*
154 check = ( thresh.GE.0.0e+0 )
155*
156* Print headings
157*
158 IF( iam.EQ.0 ) THEN
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
163 END IF
164*
165* Loop over different process grids
166*
167 DO 60 i = 1, ngrids
168*
169 nprow = pval( i )
170 npcol = qval( i )
171*
172* Make sure grid information is correct
173*
174 ierr( 1 ) = 0
175 IF( nprow.LT.1 ) THEN
176 IF( iam.EQ.0 )
177 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178 ierr( 1 ) = 1
179 ELSE IF( npcol.LT.1 ) THEN
180 IF( iam.EQ.0 )
181 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
182 ierr( 1 ) = 1
183 ELSE IF( nprow*npcol.GT.nprocs ) THEN
184 IF( iam.EQ.0 )
185 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
186 ierr( 1 ) = 1
187 END IF
188*
189 IF( ierr( 1 ).GT.0 ) THEN
190 IF( iam.EQ.0 )
191 $ WRITE( nout, fmt = 9997 ) 'grid'
192 kskip = kskip + 1
193 GO TO 50
194 END IF
195*
196* Define process grid
197*
198 CALL blacs_get( -1, 0, ictxt )
199 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
200*
201*
202* Define transpose process grid
203*
204 CALL blacs_get( -1, 0, ictxtb )
205 CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
206*
207* Go to bottom of process grid loop if this case doesn't use my
208* process
209*
210 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
211*
212 IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
213 GO TO 50
214 ENDIF
215*
216 DO 40 j = 1, nmat
217*
218 ierr( 1 ) = 0
219*
220 n = nval( j )
221*
222* Make sure matrix information is correct
223*
224 IF( n.LT.1 ) THEN
225 IF( iam.EQ.0 )
226 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
227 ierr( 1 ) = 1
228 END IF
229*
230* Check all processes for an error
231*
232 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
233 $ -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'size'
238 kskip = kskip + 1
239 GO TO 40
240 END IF
241*
242*
243 DO 45 bw_num = 1, nbw
244*
245 ierr( 1 ) = 0
246*
247 bw = 1
248 IF( bw.LT.0 ) THEN
249 IF( iam.EQ.0 )
250 $ WRITE( nout, fmt = 9999 ) 'Band', 'bw', bw
251 ierr( 1 ) = 1
252 END IF
253*
254 IF( bw.GT.n-1 ) THEN
255 ierr( 1 ) = 1
256 END IF
257*
258* Check all processes for an error
259*
260 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
261 $ -1, 0 )
262*
263 IF( ierr( 1 ).GT.0 ) THEN
264 kskip = kskip + 1
265 GO TO 45
266 END IF
267*
268 DO 30 k = 1, nnb
269*
270 ierr( 1 ) = 0
271*
272 nb = nbval( k )
273 IF( nb.LT.0 ) THEN
274 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
275 $ + int_one
276 nb = max( nb, 2*int_one )
277 nb = min( n, nb )
278 END IF
279*
280* Make sure NB is legal
281*
282 ierr( 1 ) = 0
283 IF( nb.LT.min( 2*int_one, n ) ) THEN
284 ierr( 1 ) = 1
285 ENDIF
286*
287* Check all processes for an error
288*
289 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
290 $ -1, 0 )
291*
292 IF( ierr( 1 ).GT.0 ) THEN
293 kskip = kskip + 1
294 GO TO 30
295 END IF
296*
297* Padding constants
298*
299 np = numroc( (2), (2),
300 $ myrow, 0, nprow )
301 nq = numroc( n, nb, mycol, 0, npcol )
302*
303 IF( check ) THEN
304 iprepad = ((2)+10)
305 imidpad = 10
306 ipostpad = ((2)+10)
307 ELSE
308 iprepad = 0
309 imidpad = 0
310 ipostpad = 0
311 END IF
312*
313* Initialize the array descriptor for the matrix A
314*
315 CALL descinit( desca2d, n, (2),
316 $ nb, 1, 0, 0,
317 $ ictxtb, nb+10, ierr( 1 ) )
318*
319* Convert this to 1D descriptor
320*
321 desca( 1 ) = 501
322 desca( 3 ) = n
323 desca( 4 ) = nb
324 desca( 5 ) = 0
325 desca( 2 ) = ictxt
326 desca( 6 ) = ((2)+10)
327 desca( 7 ) = 0
328*
329 ierr_temp = ierr( 1 )
330 ierr( 1 ) = 0
331 ierr( 1 ) = min( ierr( 1 ), ierr_temp )
332*
333* Check all processes for an error
334*
335 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
336*
337 IF( ierr( 1 ).LT.0 ) THEN
338 IF( iam.EQ.0 )
339 $ WRITE( nout, fmt = 9997 ) 'descriptor'
340 kskip = kskip + 1
341 GO TO 30
342 END IF
343*
344* Assign pointers into MEM for SCALAPACK arrays, A is
345* allocated starting at position MEM( IPREPAD+1 )
346*
347 free_ptr = 1
348 ipb = 0
349*
350* Save room for prepadding
351 free_ptr = free_ptr + iprepad
352*
353 ipa = free_ptr
354 free_ptr = free_ptr + (nb+10)*(2)
355 $ + ipostpad
356*
357* Add memory for fillin
358* Fillin space needs to store:
359* Fillin spike:
360* Contribution to previous proc's diagonal block of
361* reduced system:
362* Off-diagonal block of reduced system:
363* Diagonal block of reduced system:
364*
365 fillin_size =
366 $ (12*npcol + 3*nb)
367*
368* Claim memory for fillin
369*
370 free_ptr = free_ptr + iprepad
371 ip_fillin = free_ptr
372 free_ptr = free_ptr + fillin_size
373*
374* Workspace needed by computational routines:
375*
376 ipw_size = 0
377*
378* factorization:
379*
380 ipw_size = 8*npcol
381*
382* Claim memory for IPW
383*
384 ipw = free_ptr
385 free_ptr = free_ptr + ipw_size
386*
387* Check for adequate memory for problem size
388*
389 ierr( 1 ) = 0
390 IF( free_ptr.GT.memsiz ) THEN
391 IF( iam.EQ.0 )
392 $ WRITE( nout, fmt = 9996 )
393 $ 'divide and conquer factorization',
394 $ (free_ptr )*cplxsz
395 ierr( 1 ) = 1
396 END IF
397*
398* Check all processes for an error
399*
400 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
401 $ 1, -1, 0 )
402*
403 IF( ierr( 1 ).GT.0 ) THEN
404 IF( iam.EQ.0 )
405 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
406 kskip = kskip + 1
407 GO TO 30
408 END IF
409*
410* Worksize needed for LAPRNT
411 worksiz = max( ((2)+10), nb )
412*
413 IF( check ) THEN
414*
415* Calculate the amount of workspace required by
416* the checking routines.
417*
418* PCLANGE
419 worksiz = max( worksiz, desca2d( nb_ ) )
420*
421* PCPTLASCHK
422 worksiz = max( worksiz,
423 $ max(5,nb)+2*nb )
424 END IF
425*
426 free_ptr = free_ptr + iprepad
427 ip_driver_w = free_ptr
428 free_ptr = free_ptr + worksiz + ipostpad
429*
430*
431* Check for adequate memory for problem size
432*
433 ierr( 1 ) = 0
434 IF( free_ptr.GT.memsiz ) THEN
435 IF( iam.EQ.0 )
436 $ WRITE( nout, fmt = 9996 ) 'factorization',
437 $ ( free_ptr )*cplxsz
438 ierr( 1 ) = 1
439 END IF
440*
441* Check all processes for an error
442*
443 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
444 $ 1, -1, 0 )
445*
446 IF( ierr( 1 ).GT.0 ) THEN
447 IF( iam.EQ.0 )
448 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
449 kskip = kskip + 1
450 GO TO 30
451 END IF
452*
453 CALL pcbmatgen( ictxt, uplo, 'T', bw, bw, n, (2), nb,
454 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
455 $ mycol, nprow, npcol )
456 CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),
457 $ nb+10, iprepad, ipostpad,
458 $ padval )
459*
460 CALL pcfillpad( ictxt, worksiz, 1,
461 $ mem( ip_driver_w-iprepad ), worksiz,
462 $ iprepad, ipostpad, padval )
463*
464* Calculate norm of A for residual error-checking
465*
466 IF( check ) THEN
467*
468 anorm = pclange( 'I', n,
469 $ (2), mem( ipa ), 1, 1,
470 $ desca2d, mem( ip_driver_w ) )
471 CALL pcchekpad( ictxt, 'PCLANGE', nq, np,
472 $ mem( ipa-iprepad ), nb+10,
473 $ iprepad, ipostpad, padval )
474 CALL pcchekpad( ictxt, 'PCLANGE',
475 $ worksiz, 1,
476 $ mem( ip_driver_w-iprepad ), worksiz,
477 $ iprepad, ipostpad, padval )
478 END IF
479*
480 IF( lsame( uplo, 'L' ) ) THEN
481 int_temp = 0
482 ELSE
483 int_temp = desca2d( lld_ )
484 ENDIF
485*
486* For SPD Tridiagonal complex matrices, diagonal is stored
487* as a real. Thus, compact D into half the space
488*
489 DO 10 h=1, numroc(n,nb,mycol,0,npcol)/2
490 mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
491 $ +mem( ipa+int_temp+2*h-1 )*( 0.0e+0, 1.0e+0 )
492 10 CONTINUE
493 IF( 2*(numroc(n,nb,mycol,0,npcol)/2).NE.
494 $ numroc(n,nb,mycol,0,npcol) ) THEN
495 h=numroc(n,nb,mycol,0,npcol)/2+1
496 mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
497 ENDIF
498*
499*
500 CALL slboot()
501 CALL blacs_barrier( ictxt, 'All' )
502*
503* Perform factorization
504*
505 CALL sltimer( 1 )
506*
507 CALL pcpttrf( n, mem( ipa+int_temp ),
508 $ mem( ipa+1*( nb+10-int_temp ) ), 1, desca,
509 $ mem( ip_fillin ), fillin_size, mem( ipw ),
510 $ ipw_size, info )
511*
512 CALL sltimer( 1 )
513*
514 IF( info.NE.0 ) THEN
515 IF( iam.EQ.0 ) THEN
516 WRITE( nout, fmt = * ) 'PCPTTRF INFO=', info
517 ENDIF
518 kfail = kfail + 1
519 GO TO 30
520 END IF
521*
522 IF( check ) THEN
523*
524* Check for memory overwrite in factorization
525*
526 CALL pcchekpad( ictxt, 'PCPTTRF', nq,
527 $ np, mem( ipa-iprepad ), nb+10,
528 $ iprepad, ipostpad, padval )
529 END IF
530*
531*
532* Loop over the different values for NRHS
533*
534 DO 20 hh = 1, nnr
535*
536 ierr( 1 ) = 0
537*
538 nrhs = nrval( hh )
539*
540* Initialize Array Descriptor for rhs
541*
542 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
543 $ ictxtb, nb+10, ierr( 1 ) )
544*
545* Convert this to 1D descriptor
546*
547 descb( 1 ) = 502
548 descb( 3 ) = n
549 descb( 4 ) = nb
550 descb( 5 ) = 0
551 descb( 2 ) = ictxt
552 descb( 6 ) = descb2d( lld_ )
553 descb( 7 ) = 0
554*
555* reset free_ptr to reuse space for right hand sides
556*
557 IF( ipb .GT. 0 ) THEN
558 free_ptr = ipb
559 ENDIF
560*
561 free_ptr = free_ptr + iprepad
562 ipb = free_ptr
563 free_ptr = free_ptr + nrhs*descb2d( lld_ )
564 $ + ipostpad
565*
566* Allocate workspace for workspace in TRS routine:
567*
568 ipw_solve_size = (10+2*min(100,nrhs))*npcol+4*nrhs
569*
570 ipw_solve = free_ptr
571 free_ptr = free_ptr + ipw_solve_size
572*
573 ierr( 1 ) = 0
574 IF( free_ptr.GT.memsiz ) THEN
575 IF( iam.EQ.0 )
576 $ WRITE( nout, fmt = 9996 )'solve',
577 $ ( free_ptr )*cplxsz
578 ierr( 1 ) = 1
579 END IF
580*
581* Check all processes for an error
582*
583 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
584 $ ierr, 1, -1, 0 )
585*
586 IF( ierr( 1 ).GT.0 ) THEN
587 IF( iam.EQ.0 )
588 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
589 kskip = kskip + 1
590 GO TO 15
591 END IF
592*
593 myrhs_size = numroc( n, nb, mycol, 0, npcol )
594*
595* Generate RHS
596*
597 CALL pcmatgen(ictxtb, 'No', 'No',
598 $ descb2d( m_ ), descb2d( n_ ),
599 $ descb2d( mb_ ), descb2d( nb_ ),
600 $ mem( ipb ),
601 $ descb2d( lld_ ), descb2d( rsrc_ ),
602 $ descb2d( csrc_ ),
603 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
604 $ myrow, npcol, nprow )
605*
606 IF( check ) THEN
607 CALL pcfillpad( ictxtb, nb, nrhs,
608 $ mem( ipb-iprepad ),
609 $ descb2d( lld_ ),
610 $ iprepad, ipostpad,
611 $ padval )
612 CALL pcfillpad( ictxt, worksiz, 1,
613 $ mem( ip_driver_w-iprepad ),
614 $ worksiz, iprepad,
615 $ ipostpad, padval )
616 END IF
617*
618*
619 CALL blacs_barrier( ictxt, 'All')
620 CALL sltimer( 2 )
621*
622* Solve linear system via factorization
623*
624 CALL pcpttrs( uplo, n, nrhs, mem( ipa+int_temp ),
625 $ mem( ipa+1*( nb+10-int_temp ) ), 1,
626 $ desca, mem( ipb ), 1, descb,
627 $ mem( ip_fillin ), fillin_size,
628 $ mem( ipw_solve ), ipw_solve_size,
629 $ info )
630*
631 CALL sltimer( 2 )
632*
633 IF( info.NE.0 ) THEN
634 IF( iam.EQ.0 )
635 $ WRITE( nout, fmt = * ) 'PCPTTRS INFO=', info
636 kfail = kfail + 1
637 passed = 'FAILED'
638 GO TO 20
639 END IF
640*
641 IF( check ) THEN
642*
643* check for memory overwrite
644*
645 CALL pcchekpad( ictxt, 'PCPTTRS-work',
646 $ worksiz, 1,
647 $ mem( ip_driver_w-iprepad ),
648 $ worksiz, iprepad,
649 $ ipostpad, padval )
650*
651* check the solution to rhs
652*
653 sresid = zero
654*
655* Reset descriptor describing A to 1-by-P grid for
656* use in banded utility routines
657*
658 CALL descinit( desca2d, (2), n,
659 $ (2), nb, 0, 0,
660 $ ictxt, (2), ierr( 1 ) )
661 CALL pcptlaschk( 'H', uplo, n, bw, bw, nrhs,
662 $ mem( ipb ), 1, 1, descb2d,
663 $ iaseed, mem( ipa ), 1, 1, desca2d,
664 $ ibseed, anorm, sresid,
665 $ mem( ip_driver_w ), worksiz )
666*
667 IF( iam.EQ.0 ) THEN
668 IF( sresid.GT.thresh )
669 $ WRITE( nout, fmt = 9985 ) sresid
670 END IF
671*
672* The second test is a NaN trap
673*
674 IF( ( sresid.LE.thresh ).AND.
675 $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
676 kpass = kpass + 1
677 passed = 'PASSED'
678 ELSE
679 kfail = kfail + 1
680 passed = 'FAILED'
681 END IF
682*
683 END IF
684*
685 15 CONTINUE
686* Skipped tests jump to here to print out "SKIPPED"
687*
688* Gather maximum of all CPU and WALL clock timings
689*
690 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
691 $ wtime )
692 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
693 $ ctime )
694*
695* Print results
696*
697 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
698*
699 nops = 0
700 nops2 = 0
701*
702 n_first = nb
703 nprocs_real = ( n-1 )/nb + 1
704 n_last = mod( n-1, nb ) + 1
705*
706*
707 nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
708 $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
709 $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
710 $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
711 nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
712 $ *( -1.d0 /2.d0+dble(bw)
713 $ *( -1.d0 / 3.d0 ) ) ) +
714 $ dble(n)*( dble(bw) /
715 $ 2.d0*( 1.d0+dble(bw) ) )
716*
717 nops = nops +
718 $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
719 $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
720 $ ( dble(bw)*( 2*dble(n)-
721 $ ( dble(bw)+1.d0 ) ) )
722*
723*
724* Second calc to represent actual hardware speed
725*
726* NB bw^2 flops for LLt factorization in 1st proc
727*
728 nops2 = ( (dble(n_first))* dble(bw)**2 )
729*
730 IF ( nprocs_real .GT. 1) THEN
731* 4 NB bw^2 flops for LLt factorization and
732* spike calc in last processor
733*
734 nops2 = nops2 +
735 $ 4*( (dble(n_last)*dble(bw)**2) )
736 ENDIF
737*
738 IF ( nprocs_real .GT. 2) THEN
739* 4 NB bw^2 flops for LLt factorization and
740* spike calc in other processors
741*
742 nops2 = nops2 + (nprocs_real-2)*
743 $ 4*( (dble(nb)*dble(bw)**2) )
744 ENDIF
745*
746* Reduced system
747*
748 nops2 = nops2 +
749 $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
750 IF( nprocs_real .GT. 1 ) THEN
751 nops2 = nops2 +
752 $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
753 ENDIF
754*
755*
756* nrhs * 4 n_first*bw flops for LLt solve in proc 1.
757*
758 nops2 = nops2 +
759 $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
760*
761 IF ( nprocs_real .GT. 1 ) THEN
762*
763* 2*nrhs*4 n_last*bw flops for LLt solve in last.
764*
765 nops2 = nops2 +
766 $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
767 ENDIF
768*
769 IF ( nprocs_real .GT. 2 ) THEN
770*
771* 2 * nrhs * 4 NB*bw flops for LLt solve in others.
772*
773 nops2 = nops2 +
774 $ ( nprocs_real-2)*2*
775 $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
776 ENDIF
777*
778* Reduced system
779*
780 nops2 = nops2 +
781 $ nrhs*( nprocs_real-1 ) * ( bw*bw )
782 IF( nprocs_real .GT. 1 ) THEN
783 nops2 = nops2 +
784 $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
785 ENDIF
786*
787*
788* Multiply by 4 to get complex count
789*
790 nops2 = nops2 * dble(4)
791*
792* Calculate total megaflops - factorization and/or
793* solve -- for WALL and CPU time, and print output
794*
795* Print WALL time if machine supports it
796*
797 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
798 tmflops = nops /
799 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
800 ELSE
801 tmflops = 0.0d+0
802 END IF
803*
804 IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
805 tmflops2 = nops2 /
806 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
807 ELSE
808 tmflops2 = 0.0d+0
809 END IF
810*
811 IF( wtime( 2 ).GE.0.0d+0 )
812 $ WRITE( nout, fmt = 9993 ) 'WALL', uplo,
813 $ n,
814 $ bw,
815 $ nb, nrhs, nprow, npcol,
816 $ wtime( 1 ), wtime( 2 ), tmflops,
817 $ tmflops2, passed
818*
819* Print CPU time if machine supports it
820*
821 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
822 tmflops = nops /
823 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
824 ELSE
825 tmflops = 0.0d+0
826 END IF
827*
828 IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
829 tmflops2 = nops2 /
830 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
831 ELSE
832 tmflops2 = 0.0d+0
833 END IF
834*
835 IF( ctime( 2 ).GE.0.0d+0 )
836 $ WRITE( nout, fmt = 9993 ) 'CPU ', uplo,
837 $ n,
838 $ bw,
839 $ nb, nrhs, nprow, npcol,
840 $ ctime( 1 ), ctime( 2 ), tmflops,
841 $ tmflops2, passed
842*
843 END IF
844 20 CONTINUE
845*
846*
847 30 CONTINUE
848* NNB loop
849*
850 45 CONTINUE
851* BW[] loop
852*
853 40 CONTINUE
854* NMAT loop
855*
856 CALL blacs_gridexit( ictxt )
857 CALL blacs_gridexit( ictxtb )
858*
859 50 CONTINUE
860* NGRIDS DROPOUT
861 60 CONTINUE
862* NGRIDS loop
863*
864* Print ending messages and close output file
865*
866 IF( iam.EQ.0 ) THEN
867 ktests = kpass + kfail + kskip
868 WRITE( nout, fmt = * )
869 WRITE( nout, fmt = 9992 ) ktests
870 IF( check ) THEN
871 WRITE( nout, fmt = 9991 ) kpass
872 WRITE( nout, fmt = 9989 ) kfail
873 ELSE
874 WRITE( nout, fmt = 9990 ) kpass
875 END IF
876 WRITE( nout, fmt = 9988 ) kskip
877 WRITE( nout, fmt = * )
878 WRITE( nout, fmt = * )
879 WRITE( nout, fmt = 9987 )
880 IF( nout.NE.6 .AND. nout.NE.0 )
881 $ CLOSE ( nout )
882 END IF
883*
884 CALL blacs_exit( 0 )
885*
886 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
887 $ '; It should be at least 1' )
888 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
889 $ i4 )
890 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
891 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
892 $ i11 )
893 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ',
894 $ 'Slv Time MFLOPS MFLOP2 CHECK' )
895 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',
896 $ '-------- ------ ------ ------' )
897 9993 FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
898 $ i5, 1x, i2, 1x,
899 $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
900 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
901 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
902 9990 FORMAT( i5, ' tests completed without checking.' )
903 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
904 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
905 9987 FORMAT( 'END OF TESTS.' )
906 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
907 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
908*
909 stop
910*
911* End of PCPTTRS_DRIVER
912*
913 END
914*
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pcbmatgen.f:5
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3
program pcptdriver
Definition pcptdriver.f:1
subroutine pcptinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pcptinfo.f:6
subroutine pcptlaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pcptlaschk.f:4
subroutine pcpttrf(n, d, e, ja, desca, af, laf, work, lwork, info)
Definition pcpttrf.f:3
subroutine pcpttrs(uplo, n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pcpttrs.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724