ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pddtdriver.f
Go to the documentation of this file.
1  PROGRAM pddtdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PDDTDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by DDT. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * DBLESZ INTEGER, default = 8 bytes.
56 * INTGSZ and DBLESZ indicate the length in bytes on the
57 * given platform for an integer and a double precision real.
58 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
59 * All arrays used by ScaLAPACK routines are allocated from
60 * this array and referenced by pointers. The integer IPB,
61 * for example, is a pointer to the starting element of MEM for
62 * the solution vector(s) B.
63 *
64 * =====================================================================
65 *
66 * Code Developer: Andrew J. Cleary, University of Tennessee.
67 * Current address: Lawrence Livermore National Labs.
68 * This version released: August, 2001.
69 *
70 * =====================================================================
71 *
72 * .. Parameters ..
73  INTEGER totmem
74  parameter( totmem = 3000000 )
75  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76  $ lld_, mb_, m_, nb_, n_, rsrc_
77  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
80 *
81  DOUBLE PRECISION zero
82  INTEGER dblesz, memsiz, ntests
83  DOUBLE PRECISION padval
84  parameter( dblesz = 8,
85  $ memsiz = totmem / dblesz, ntests = 20,
86  $ padval = -9923.0d+0, zero = 0.0d+0 )
87  INTEGER int_one
88  parameter( int_one = 1 )
89 * ..
90 * .. Local Scalars ..
91  LOGICAL check
92  CHARACTER trans
93  CHARACTER*6 passed
94  CHARACTER*80 outfile
95  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
96  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
97  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
98  $ iprepad, ipw, ipw_size, ipw_solve,
99  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102  $ nnr, nout, np, npcol, nprocs, nprocs_real,
103  $ nprow, nq, nrhs, n_first, n_last, worksiz
104  REAL thresh
105  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
106  $ tmflops2
107 * ..
108 * .. Local Arrays ..
109  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
110  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
111  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
112  $ nrval( ntests ), nval( ntests ),
113  $ pval( ntests ), qval( ntests )
114  DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
115 * ..
116 * .. External Subroutines ..
117  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
119  $ blacs_pinfo, descinit, igsum2d, pdbmatgen,
122  $ slcombine, sltimer
123 * ..
124 * .. External Functions ..
125  INTEGER numroc
126  LOGICAL lsame
127  DOUBLE PRECISION pdlange
128  EXTERNAL lsame, numroc, pdlange
129 * ..
130 * .. Intrinsic Functions ..
131  INTRINSIC dble, max, min, mod
132 * ..
133 * .. Data Statements ..
134  DATA kfail, kpass, kskip, ktests / 4*0 /
135 * ..
136 *
137 *
138 *
139 * .. Executable Statements ..
140 *
141 * Get starting information
142 *
143  CALL blacs_pinfo( iam, nprocs )
144  iaseed = 100
145  ibseed = 200
146 *
147  CALL pddtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
148  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
149  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
150  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
151 *
152  check = ( thresh.GE.0.0d+0 )
153 *
154 * Print headings
155 *
156  IF( iam.EQ.0 ) THEN
157  WRITE( nout, fmt = * )
158  WRITE( nout, fmt = 9995 )
159  WRITE( nout, fmt = 9994 )
160  WRITE( nout, fmt = * )
161  END IF
162 *
163 * Loop over different process grids
164 *
165  DO 60 i = 1, ngrids
166 *
167  nprow = pval( i )
168  npcol = qval( i )
169 *
170 * Make sure grid information is correct
171 *
172  ierr( 1 ) = 0
173  IF( nprow.LT.1 ) THEN
174  IF( iam.EQ.0 )
175  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
176  ierr( 1 ) = 1
177  ELSE IF( npcol.LT.1 ) THEN
178  IF( iam.EQ.0 )
179  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
180  ierr( 1 ) = 1
181  ELSE IF( nprow*npcol.GT.nprocs ) THEN
182  IF( iam.EQ.0 )
183  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
184  ierr( 1 ) = 1
185  END IF
186 *
187  IF( ierr( 1 ).GT.0 ) THEN
188  IF( iam.EQ.0 )
189  $ WRITE( nout, fmt = 9997 ) 'grid'
190  kskip = kskip + 1
191  GO TO 50
192  END IF
193 *
194 * Define process grid
195 *
196  CALL blacs_get( -1, 0, ictxt )
197  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
198 *
199 *
200 * Define transpose process grid
201 *
202  CALL blacs_get( -1, 0, ictxtb )
203  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
204 *
205 * Go to bottom of process grid loop if this case doesn't use my
206 * process
207 *
208  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209 *
210  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
211  GO TO 50
212  ENDIF
213 *
214  DO 40 j = 1, nmat
215 *
216  ierr( 1 ) = 0
217 *
218  n = nval( j )
219 *
220 * Make sure matrix information is correct
221 *
222  IF( n.LT.1 ) THEN
223  IF( iam.EQ.0 )
224  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
225  ierr( 1 ) = 1
226  END IF
227 *
228 * Check all processes for an error
229 *
230  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
231  $ -1, 0 )
232 *
233  IF( ierr( 1 ).GT.0 ) THEN
234  IF( iam.EQ.0 )
235  $ WRITE( nout, fmt = 9997 ) 'size'
236  kskip = kskip + 1
237  GO TO 40
238  END IF
239 *
240 *
241  DO 45 bw_num = 1, nbw
242 *
243  ierr( 1 ) = 0
244 *
245  bwl = 1
246  IF( bwl.LT.1 ) THEN
247  IF( iam.EQ.0 )
248  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
249  ierr( 1 ) = 1
250  END IF
251 *
252  bwu = 1
253  IF( bwu.LT.1 ) THEN
254  IF( iam.EQ.0 )
255  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
256  ierr( 1 ) = 1
257  END IF
258 *
259  IF( bwl.GT.n-1 ) THEN
260  IF( iam.EQ.0 ) THEN
261  ierr( 1 ) = 1
262  ENDIF
263  END IF
264 *
265  IF( bwu.GT.n-1 ) THEN
266  IF( iam.EQ.0 ) THEN
267  ierr( 1 ) = 1
268  ENDIF
269  END IF
270 *
271 * Check all processes for an error
272 *
273  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
274  $ -1, 0 )
275 *
276  IF( ierr( 1 ).GT.0 ) THEN
277  kskip = kskip + 1
278  GO TO 45
279  END IF
280 *
281  DO 30 k = 1, nnb
282 *
283  ierr( 1 ) = 0
284 *
285  nb = nbval( k )
286  IF( nb.LT.0 ) THEN
287  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
288  $ + int_one
289  nb = max( nb, 2*int_one )
290  nb = min( n, nb )
291  END IF
292 *
293 * Make sure NB is legal
294 *
295  ierr( 1 ) = 0
296  IF( nb.LT.min( 2*int_one, n ) ) THEN
297  ierr( 1 ) = 1
298  END IF
299 *
300 * Check all processes for an error
301 *
302  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
303  $ -1, 0 )
304 *
305  IF( ierr( 1 ).GT.0 ) THEN
306  kskip = kskip + 1
307  GO TO 30
308  END IF
309 *
310 * Padding constants
311 *
312  np = numroc( (3), (3),
313  $ myrow, 0, nprow )
314  nq = numroc( n, nb, mycol, 0, npcol )
315 *
316  IF( check ) THEN
317  iprepad = ((3)+10)
318  imidpad = 10
319  ipostpad = ((3)+10)
320  ELSE
321  iprepad = 0
322  imidpad = 0
323  ipostpad = 0
324  END IF
325 *
326 * Initialize the array descriptor for the matrix A
327 *
328  CALL descinit( desca2d, n, (3),
329  $ nb, 1, 0, 0,
330  $ ictxtb, nb+10, ierr( 1 ) )
331 *
332 * Convert this to 1D descriptor
333 *
334  desca( 1 ) = 501
335  desca( 3 ) = n
336  desca( 4 ) = nb
337  desca( 5 ) = 0
338  desca( 2 ) = ictxt
339  desca( 6 ) = ((3)+10)
340  desca( 7 ) = 0
341 *
342  ierr_temp = ierr( 1 )
343  ierr( 1 ) = 0
344  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
345 *
346 * Check all processes for an error
347 *
348  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
349 *
350  IF( ierr( 1 ).LT.0 ) THEN
351  IF( iam.EQ.0 )
352  $ WRITE( nout, fmt = 9997 ) 'descriptor'
353  kskip = kskip + 1
354  GO TO 30
355  END IF
356 *
357 * Assign pointers into MEM for SCALAPACK arrays, A is
358 * allocated starting at position MEM( IPREPAD+1 )
359 *
360  free_ptr = 1
361  ipb = 0
362 *
363 * Save room for prepadding
364  free_ptr = free_ptr + iprepad
365 *
366  ipa = free_ptr
367  free_ptr = free_ptr + (nb+10)*(3)
368  $ + ipostpad
369 *
370 * Add memory for fillin
371 * Fillin space needs to store:
372 * Fillin spike:
373 * Contribution to previous proc's diagonal block of
374 * reduced system:
375 * Off-diagonal block of reduced system:
376 * Diagonal block of reduced system:
377 *
378  fillin_size =
379  $ (12*npcol+3*nb)
380 *
381 * Claim memory for fillin
382 *
383  free_ptr = free_ptr + iprepad
384  ip_fillin = free_ptr
385  free_ptr = free_ptr + fillin_size
386 *
387 * Workspace needed by computational routines:
388 *
389  ipw_size = 0
390 *
391 * factorization:
392 *
393  ipw_size = 8*npcol
394 *
395 * Claim memory for IPW
396 *
397  ipw = free_ptr
398  free_ptr = free_ptr + ipw_size
399 *
400 * Check for adequate memory for problem size
401 *
402  ierr( 1 ) = 0
403  IF( free_ptr.GT.memsiz ) THEN
404  IF( iam.EQ.0 )
405  $ WRITE( nout, fmt = 9996 )
406  $ 'divide and conquer factorization',
407  $ (free_ptr )*dblesz
408  ierr( 1 ) = 1
409  END IF
410 *
411 * Check all processes for an error
412 *
413  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
414  $ 1, -1, 0 )
415 *
416  IF( ierr( 1 ).GT.0 ) THEN
417  IF( iam.EQ.0 )
418  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
419  kskip = kskip + 1
420  GO TO 30
421  END IF
422 *
423 * Worksize needed for LAPRNT
424  worksiz = max( ((3)+10), nb )
425 *
426  IF( check ) THEN
427 *
428 * Calculate the amount of workspace required by
429 * the checking routines.
430 *
431 * PDLANGE
432  worksiz = max( worksiz, desca2d( nb_ ) )
433 *
434 * PDDTLASCHK
435  worksiz = max( worksiz,
436  $ max(5,nb)+2*nb )
437  END IF
438 *
439  free_ptr = free_ptr + iprepad
440  ip_driver_w = free_ptr
441  free_ptr = free_ptr + worksiz + ipostpad
442 *
443 *
444 * Check for adequate memory for problem size
445 *
446  ierr( 1 ) = 0
447  IF( free_ptr.GT.memsiz ) THEN
448  IF( iam.EQ.0 )
449  $ WRITE( nout, fmt = 9996 ) 'factorization',
450  $ ( free_ptr )*dblesz
451  ierr( 1 ) = 1
452  END IF
453 *
454 * Check all processes for an error
455 *
456  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
457  $ 1, -1, 0 )
458 *
459  IF( ierr( 1 ).GT.0 ) THEN
460  IF( iam.EQ.0 )
461  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
462  kskip = kskip + 1
463  GO TO 30
464  END IF
465 *
466  CALL pdbmatgen( ictxt, 'T', 'D', bwl, bwu, n, (3), nb,
467  $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
468  $ mycol, nprow, npcol )
469  CALL pdfillpad( ictxt, nq, np, mem( ipa-iprepad ),
470  $ nb+10, iprepad, ipostpad,
471  $ padval )
472 *
473  CALL pdfillpad( ictxt, worksiz, 1,
474  $ mem( ip_driver_w-iprepad ), worksiz,
475  $ iprepad, ipostpad, padval )
476 *
477 * Calculate norm of A for residual error-checking
478 *
479  IF( check ) THEN
480 *
481  anorm = pdlange( 'I', n,
482  $ (3), mem( ipa ), 1, 1,
483  $ desca2d, mem( ip_driver_w ) )
484  CALL pdchekpad( ictxt, 'PDLANGE', nq, np,
485  $ mem( ipa-iprepad ), nb+10,
486  $ iprepad, ipostpad, padval )
487  CALL pdchekpad( ictxt, 'PDLANGE',
488  $ worksiz, 1,
489  $ mem( ip_driver_w-iprepad ), worksiz,
490  $ iprepad, ipostpad, padval )
491  END IF
492 *
493 *
494  CALL slboot()
495  CALL blacs_barrier( ictxt, 'All' )
496 *
497 * Perform factorization
498 *
499  CALL sltimer( 1 )
500 *
501  CALL pddttrf( n, mem( ipa+2*( nb+10 ) ),
502  $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
503  $ desca, mem( ip_fillin ), fillin_size,
504  $ mem( ipw ), ipw_size, info )
505 *
506  CALL sltimer( 1 )
507 *
508  IF( info.NE.0 ) THEN
509  IF( iam.EQ.0 ) THEN
510  WRITE( nout, fmt = * ) 'PDDTTRF INFO=', info
511  ENDIF
512  kfail = kfail + 1
513  GO TO 30
514  END IF
515 *
516  IF( check ) THEN
517 *
518 * Check for memory overwrite in factorization
519 *
520  CALL pdchekpad( ictxt, 'PDDTTRF', nq,
521  $ np, mem( ipa-iprepad ), nb+10,
522  $ iprepad, ipostpad, padval )
523  END IF
524 *
525 *
526 * Loop over the different values for NRHS
527 *
528  DO 20 hh = 1, nnr
529 *
530  ierr( 1 ) = 0
531 *
532  nrhs = nrval( hh )
533 *
534 * Initialize Array Descriptor for rhs
535 *
536  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
537  $ ictxtb, nb+10, ierr( 1 ) )
538 *
539 * Convert this to 1D descriptor
540 *
541  descb( 1 ) = 502
542  descb( 3 ) = n
543  descb( 4 ) = nb
544  descb( 5 ) = 0
545  descb( 2 ) = ictxt
546  descb( 6 ) = descb2d( lld_ )
547  descb( 7 ) = 0
548 *
549 * reset free_ptr to reuse space for right hand sides
550 *
551  IF( ipb .GT. 0 ) THEN
552  free_ptr = ipb
553  ENDIF
554 *
555  free_ptr = free_ptr + iprepad
556  ipb = free_ptr
557  free_ptr = free_ptr + nrhs*descb2d( lld_ )
558  $ + ipostpad
559 *
560 * Allocate workspace for workspace in TRS routine:
561 *
562  ipw_solve_size = 10*npcol+4*nrhs
563 *
564  ipw_solve = free_ptr
565  free_ptr = free_ptr + ipw_solve_size
566 *
567  ierr( 1 ) = 0
568  IF( free_ptr.GT.memsiz ) THEN
569  IF( iam.EQ.0 )
570  $ WRITE( nout, fmt = 9996 )'solve',
571  $ ( free_ptr )*dblesz
572  ierr( 1 ) = 1
573  END IF
574 *
575 * Check all processes for an error
576 *
577  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
578  $ ierr, 1, -1, 0 )
579 *
580  IF( ierr( 1 ).GT.0 ) THEN
581  IF( iam.EQ.0 )
582  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
583  kskip = kskip + 1
584  GO TO 15
585  END IF
586 *
587  myrhs_size = numroc( n, nb, mycol, 0, npcol )
588 *
589 * Generate RHS
590 *
591  CALL pdmatgen(ictxtb, 'No', 'No',
592  $ descb2d( m_ ), descb2d( n_ ),
593  $ descb2d( mb_ ), descb2d( nb_ ),
594  $ mem( ipb ),
595  $ descb2d( lld_ ), descb2d( rsrc_ ),
596  $ descb2d( csrc_ ),
597  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
598  $ myrow, npcol, nprow )
599 *
600  IF( check ) THEN
601  CALL pdfillpad( ictxtb, nb, nrhs,
602  $ mem( ipb-iprepad ),
603  $ descb2d( lld_ ),
604  $ iprepad, ipostpad,
605  $ padval )
606  CALL pdfillpad( ictxt, worksiz, 1,
607  $ mem( ip_driver_w-iprepad ),
608  $ worksiz, iprepad,
609  $ ipostpad, padval )
610  END IF
611 *
612 *
613  CALL blacs_barrier( ictxt, 'All')
614  CALL sltimer( 2 )
615 *
616 * Solve linear system via factorization
617 *
618  CALL pddttrs( trans, n, nrhs,
619  $ mem( ipa+2*( nb+10 ) ),
620  $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
621  $ 1, desca, mem( ipb ), 1, descb,
622  $ mem( ip_fillin ), fillin_size,
623  $ mem( ipw_solve ), ipw_solve_size,
624  $ info )
625 *
626  CALL sltimer( 2 )
627 *
628  IF( info.NE.0 ) THEN
629  IF( iam.EQ.0 )
630  $ WRITE( nout, fmt = * ) 'PDDTTRS INFO=', info
631  kfail = kfail + 1
632  passed = 'FAILED'
633  GO TO 20
634  END IF
635 *
636  IF( check ) THEN
637 *
638 * check for memory overwrite
639 *
640  CALL pdchekpad( ictxt, 'PDDTTRS-work',
641  $ worksiz, 1,
642  $ mem( ip_driver_w-iprepad ),
643  $ worksiz, iprepad,
644  $ ipostpad, padval )
645 *
646 * check the solution to rhs
647 *
648  sresid = zero
649 *
650 * Reset descriptor describing A to 1-by-P grid for
651 * use in banded utility routines
652 *
653  CALL descinit( desca2d, (3), n,
654  $ (3), nb, 0, 0,
655  $ ictxt, (3), ierr( 1 ) )
656  CALL pddtlaschk( 'N', 'D', trans,
657  $ n, bwl, bwu, nrhs,
658  $ mem( ipb ), 1, 1, descb2d,
659  $ iaseed, mem( ipa ), 1, 1, desca2d,
660  $ ibseed, anorm, sresid,
661  $ mem( ip_driver_w ), worksiz )
662 *
663  IF( iam.EQ.0 ) THEN
664  IF( sresid.GT.thresh )
665  $ WRITE( nout, fmt = 9985 ) sresid
666  END IF
667 *
668 * The second test is a NaN trap
669 *
670  IF( ( sresid.LE.thresh ).AND.
671  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
672  kpass = kpass + 1
673  passed = 'PASSED'
674  ELSE
675  kfail = kfail + 1
676  passed = 'FAILED'
677  END IF
678 *
679  END IF
680 *
681  15 CONTINUE
682 * Skipped tests jump to here to print out "SKIPPED"
683 *
684 * Gather maximum of all CPU and WALL clock timings
685 *
686  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
687  $ wtime )
688  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
689  $ ctime )
690 *
691 * Print results
692 *
693  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
694 *
695  nops = 0
696  nops2 = 0
697 *
698  n_first = nb
699  nprocs_real = ( n-1 )/nb + 1
700  n_last = mod( n-1, nb ) + 1
701 *
702 * 2 N bwl INT_ONE + N (bwl) flops
703 * for LU factorization
704 *
705  nops = 2*(dble(n)*dble(bwl)*
706  $ dble(int_one)) +
707  $ (dble(n)*dble(bwl))
708 *
709 * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
710 *
711  nops = nops +
712  $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
713  $ *dble(nrhs))
714 *
715 * Second calc to represent actual hardware speed
716 *
717 * 2*N_FIRST bwl*bwu Flops for LU
718 * factorization in proc 1
719 *
720  nops2 = 2*( (dble(n_first)*
721  $ dble(bwl)*dble(bwu)))
722 *
723  IF ( nprocs_real .GT. 1) THEN
724 * 8 N_LAST bwl*INT_ONE
725 * flops for LU and spike
726 * calc in last processor
727 *
728  nops2 = nops2 +
729  $ 8*( (dble(n_last)*dble(bwl)
730  $ *dble(int_one)) )
731  ENDIF
732 *
733  IF ( nprocs_real .GT. 2) THEN
734 * 8 NB bwl*INT_ONE flops for LU and spike
735 * calc in other processors
736 *
737  nops2 = nops2 + (nprocs_real-2)*
738  $ 8*( (dble(nb)*dble(bwl)
739  $ *dble(int_one)) )
740  ENDIF
741 *
742 * Reduced system
743 *
744  nops2 = nops2 +
745  $ 2*( nprocs_real-1 ) *
746  $ ( bwl*int_one*bwl/3 )
747  IF( nprocs_real .GT. 1 ) THEN
748  nops2 = nops2 +
749  $ 2*( nprocs_real-2 ) *
750  $ (2*bwl*int_one*bwl)
751  ENDIF
752 *
753 * Solve stage
754 *
755 * nrhs*2 n_first*
756 * (bwl+INT_ONE)
757 * flops for L,U solve in proc 1.
758 *
759  nops2 = nops2 +
760  $ 2*
761  $ dble(n_first)*
762  $ dble(nrhs) *
763  $ ( dble(bwl)+dble(int_one))
764 *
765  IF ( nprocs_real .GT. 1 ) THEN
766 *
767 * 2*nrhs*2 n_last
768 * (bwl+INT_ONE)
769 * flops for LU solve in other procs
770 *
771  nops2 = nops2 +
772  $ 4*
773  $ (dble(n_last)*(dble(bwl)+
774  $ dble(int_one)))*dble(nrhs)
775  ENDIF
776 *
777  IF ( nprocs_real .GT. 2 ) THEN
778 *
779 * 2*nrhs*2 NB
780 * (bwl+INT_ONE)
781 * flops for LU solve in other procs
782 *
783  nops2 = nops2 +
784  $ ( nprocs_real-2)*2*
785  $ ( (dble(nb)*(dble(bwl)+
786  $ dble(int_one)))*dble(nrhs) )
787  ENDIF
788 *
789 * Reduced system
790 *
791  nops2 = nops2 +
792  $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
793  IF( nprocs_real .GT. 1 ) THEN
794  nops2 = nops2 +
795  $ nrhs*( nprocs_real-2 ) *
796  $ ( 6 * bwl*int_one )
797  ENDIF
798 *
799 *
800 * Calculate total megaflops - factorization and/or
801 * solve -- for WALL and CPU time, and print output
802 *
803 * Print WALL time if machine supports it
804 *
805  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
806  tmflops = nops /
807  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
808  ELSE
809  tmflops = 0.0d+0
810  END IF
811 *
812  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
813  tmflops2 = nops2 /
814  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
815  ELSE
816  tmflops2 = 0.0d+0
817  END IF
818 *
819  IF( wtime( 2 ).GE.0.0d+0 )
820  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
821  $ n,
822  $ bwl, bwu,
823  $ nb, nrhs, nprow, npcol,
824  $ wtime( 1 ), wtime( 2 ), tmflops,
825  $ tmflops2, passed
826 *
827 * Print CPU time if machine supports it
828 *
829  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
830  tmflops = nops /
831  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
832  ELSE
833  tmflops = 0.0d+0
834  END IF
835 *
836  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
837  tmflops2 = nops2 /
838  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
839  ELSE
840  tmflops2 = 0.0d+0
841  END IF
842 *
843  IF( ctime( 2 ).GE.0.0d+0 )
844  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
845  $ n,
846  $ bwl, bwu,
847  $ nb, nrhs, nprow, npcol,
848  $ ctime( 1 ), ctime( 2 ), tmflops,
849  $ tmflops2, passed
850 *
851  END IF
852  20 CONTINUE
853 *
854 *
855  30 CONTINUE
856 * NNB loop
857 *
858  45 CONTINUE
859 * BW[] loop
860 *
861  40 CONTINUE
862 * NMAT loop
863 *
864  CALL blacs_gridexit( ictxt )
865  CALL blacs_gridexit( ictxtb )
866 *
867  50 CONTINUE
868 * NGRIDS DROPOUT
869  60 CONTINUE
870 * NGRIDS loop
871 *
872 * Print ending messages and close output file
873 *
874  IF( iam.EQ.0 ) THEN
875  ktests = kpass + kfail + kskip
876  WRITE( nout, fmt = * )
877  WRITE( nout, fmt = 9992 ) ktests
878  IF( check ) THEN
879  WRITE( nout, fmt = 9991 ) kpass
880  WRITE( nout, fmt = 9989 ) kfail
881  ELSE
882  WRITE( nout, fmt = 9990 ) kpass
883  END IF
884  WRITE( nout, fmt = 9988 ) kskip
885  WRITE( nout, fmt = * )
886  WRITE( nout, fmt = * )
887  WRITE( nout, fmt = 9987 )
888  IF( nout.NE.6 .AND. nout.NE.0 )
889  $ CLOSE ( nout )
890  END IF
891 *
892  CALL blacs_exit( 0 )
893 *
894  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
895  $ '; It should be at least 1' )
896  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
897  $ i4 )
898  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
899  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
900  $ i11 )
901  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
902  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
903  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
904  $ '-------- -------- -------- ------' )
905  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
906  $ 1x,i4,1x,i4,1x,f9.3,
907  $ f9.4, f9.2, f9.2, 1x, a6 )
908  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
909  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
910  9990 FORMAT( i5, ' tests completed without checking.' )
911  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
912  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
913  9987 FORMAT( 'END OF TESTS.' )
914  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
915  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
916 *
917  stop
918 *
919 * End of PDDTTRS_DRIVER
920 *
921  END
922 *
pddttrf
subroutine pddttrf(N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pddttrf.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pddttrs
subroutine pddttrs(TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pddttrs.f:3
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pddtdriver
program pddtdriver
Definition: pddtdriver.f:1
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pddtlaschk
subroutine pddtlaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pddtlaschk.f:4
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
pdbmatgen
subroutine pdbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdbmatgen.f:5
pddtinfo
subroutine pddtinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pddtinfo.f:6
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181