ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcdtdriver.f
Go to the documentation of this file.
1  PROGRAM pcdtdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PCDTDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by CDT. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * CPLXSZ INTEGER, default = 8 bytes.
56 * INTGSZ and CPLXSZ indicate the length in bytes on the
57 * given platform for an integer and a single precision
58 * complex.
59 * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60 * All arrays used by ScaLAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPB,
62 * for example, is a pointer to the starting element of MEM for
63 * the solution vector(s) B.
64 *
65 * =====================================================================
66 *
67 * Code Developer: Andrew J. Cleary, University of Tennessee.
68 * Current address: Lawrence Livermore National Labs.
69 * This version released: August, 2001.
70 *
71 * =====================================================================
72 *
73 * .. Parameters ..
74  INTEGER totmem
75  parameter( totmem = 3000000 )
76  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77  $ lld_, mb_, m_, nb_, n_, rsrc_
78  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81 *
82  REAL zero
83  INTEGER cplxsz, memsiz, ntests
84  COMPLEX padval
85  parameter( cplxsz = 8,
86  $ memsiz = totmem / cplxsz, ntests = 20,
87  $ padval = ( -9923.0e+0, -9923.0e+0 ),
88  $ zero = 0.0e+0 )
89  INTEGER int_one
90  parameter( int_one = 1 )
91 * ..
92 * .. Local Scalars ..
93  LOGICAL check
94  CHARACTER trans
95  CHARACTER*6 passed
96  CHARACTER*80 outfile
97  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100  $ iprepad, ipw, ipw_size, ipw_solve,
101  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104  $ nnr, nout, np, npcol, nprocs, nprocs_real,
105  $ nprow, nq, nrhs, n_first, n_last, worksiz
106  REAL anorm, sresid, thresh
107  DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108 * ..
109 * .. Local Arrays ..
110  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
111  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
112  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
113  $ nrval( ntests ), nval( ntests ),
114  $ pval( ntests ), qval( ntests )
115  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116  COMPLEX mem( memsiz )
117 * ..
118 * .. External Subroutines ..
119  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121  $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
124  $ slcombine, sltimer
125 * ..
126 * .. External Functions ..
127  INTEGER numroc
128  LOGICAL lsame
129  REAL pclange
130  EXTERNAL lsame, numroc, pclange
131 * ..
132 * .. Intrinsic Functions ..
133  INTRINSIC dble, max, min, mod
134 * ..
135 * .. Data Statements ..
136  DATA kfail, kpass, kskip, ktests / 4*0 /
137 * ..
138 *
139 *
140 *
141 * .. Executable Statements ..
142 *
143 * Get starting information
144 *
145  CALL blacs_pinfo( iam, nprocs )
146  iaseed = 100
147  ibseed = 200
148 *
149  CALL pcdtinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
153 *
154  check = ( thresh.GE.0.0e+0 )
155 *
156 * Print headings
157 *
158  IF( iam.EQ.0 ) THEN
159  WRITE( nout, fmt = * )
160  WRITE( nout, fmt = 9995 )
161  WRITE( nout, fmt = 9994 )
162  WRITE( nout, fmt = * )
163  END IF
164 *
165 * Loop over different process grids
166 *
167  DO 60 i = 1, ngrids
168 *
169  nprow = pval( i )
170  npcol = qval( i )
171 *
172 * Make sure grid information is correct
173 *
174  ierr( 1 ) = 0
175  IF( nprow.LT.1 ) THEN
176  IF( iam.EQ.0 )
177  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178  ierr( 1 ) = 1
179  ELSE IF( npcol.LT.1 ) THEN
180  IF( iam.EQ.0 )
181  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
182  ierr( 1 ) = 1
183  ELSE IF( nprow*npcol.GT.nprocs ) THEN
184  IF( iam.EQ.0 )
185  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
186  ierr( 1 ) = 1
187  END IF
188 *
189  IF( ierr( 1 ).GT.0 ) THEN
190  IF( iam.EQ.0 )
191  $ WRITE( nout, fmt = 9997 ) 'grid'
192  kskip = kskip + 1
193  GO TO 50
194  END IF
195 *
196 * Define process grid
197 *
198  CALL blacs_get( -1, 0, ictxt )
199  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
200 *
201 *
202 * Define transpose process grid
203 *
204  CALL blacs_get( -1, 0, ictxtb )
205  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
206 *
207 * Go to bottom of process grid loop if this case doesn't use my
208 * process
209 *
210  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
211 *
212  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
213  GO TO 50
214  ENDIF
215 *
216  DO 40 j = 1, nmat
217 *
218  ierr( 1 ) = 0
219 *
220  n = nval( j )
221 *
222 * Make sure matrix information is correct
223 *
224  IF( n.LT.1 ) THEN
225  IF( iam.EQ.0 )
226  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
227  ierr( 1 ) = 1
228  END IF
229 *
230 * Check all processes for an error
231 *
232  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
233  $ -1, 0 )
234 *
235  IF( ierr( 1 ).GT.0 ) THEN
236  IF( iam.EQ.0 )
237  $ WRITE( nout, fmt = 9997 ) 'size'
238  kskip = kskip + 1
239  GO TO 40
240  END IF
241 *
242 *
243  DO 45 bw_num = 1, nbw
244 *
245  ierr( 1 ) = 0
246 *
247  bwl = 1
248  IF( bwl.LT.1 ) THEN
249  IF( iam.EQ.0 )
250  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
251  ierr( 1 ) = 1
252  END IF
253 *
254  bwu = 1
255  IF( bwu.LT.1 ) THEN
256  IF( iam.EQ.0 )
257  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
258  ierr( 1 ) = 1
259  END IF
260 *
261  IF( bwl.GT.n-1 ) THEN
262  IF( iam.EQ.0 ) THEN
263  ierr( 1 ) = 1
264  ENDIF
265  END IF
266 *
267  IF( bwu.GT.n-1 ) THEN
268  IF( iam.EQ.0 ) THEN
269  ierr( 1 ) = 1
270  ENDIF
271  END IF
272 *
273 * Check all processes for an error
274 *
275  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
276  $ -1, 0 )
277 *
278  IF( ierr( 1 ).GT.0 ) THEN
279  kskip = kskip + 1
280  GO TO 45
281  END IF
282 *
283  DO 30 k = 1, nnb
284 *
285  ierr( 1 ) = 0
286 *
287  nb = nbval( k )
288  IF( nb.LT.0 ) THEN
289  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
290  $ + int_one
291  nb = max( nb, 2*int_one )
292  nb = min( n, nb )
293  END IF
294 *
295 * Make sure NB is legal
296 *
297  ierr( 1 ) = 0
298  IF( nb.LT.min( 2*int_one, n ) ) THEN
299  ierr( 1 ) = 1
300  END IF
301 *
302 * Check all processes for an error
303 *
304  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
305  $ -1, 0 )
306 *
307  IF( ierr( 1 ).GT.0 ) THEN
308  kskip = kskip + 1
309  GO TO 30
310  END IF
311 *
312 * Padding constants
313 *
314  np = numroc( (3), (3),
315  $ myrow, 0, nprow )
316  nq = numroc( n, nb, mycol, 0, npcol )
317 *
318  IF( check ) THEN
319  iprepad = ((3)+10)
320  imidpad = 10
321  ipostpad = ((3)+10)
322  ELSE
323  iprepad = 0
324  imidpad = 0
325  ipostpad = 0
326  END IF
327 *
328 * Initialize the array descriptor for the matrix A
329 *
330  CALL descinit( desca2d, n, (3),
331  $ nb, 1, 0, 0,
332  $ ictxtb, nb+10, ierr( 1 ) )
333 *
334 * Convert this to 1D descriptor
335 *
336  desca( 1 ) = 501
337  desca( 3 ) = n
338  desca( 4 ) = nb
339  desca( 5 ) = 0
340  desca( 2 ) = ictxt
341  desca( 6 ) = ((3)+10)
342  desca( 7 ) = 0
343 *
344  ierr_temp = ierr( 1 )
345  ierr( 1 ) = 0
346  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
347 *
348 * Check all processes for an error
349 *
350  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
351 *
352  IF( ierr( 1 ).LT.0 ) THEN
353  IF( iam.EQ.0 )
354  $ WRITE( nout, fmt = 9997 ) 'descriptor'
355  kskip = kskip + 1
356  GO TO 30
357  END IF
358 *
359 * Assign pointers into MEM for SCALAPACK arrays, A is
360 * allocated starting at position MEM( IPREPAD+1 )
361 *
362  free_ptr = 1
363  ipb = 0
364 *
365 * Save room for prepadding
366  free_ptr = free_ptr + iprepad
367 *
368  ipa = free_ptr
369  free_ptr = free_ptr + (nb+10)*(3)
370  $ + ipostpad
371 *
372 * Add memory for fillin
373 * Fillin space needs to store:
374 * Fillin spike:
375 * Contribution to previous proc's diagonal block of
376 * reduced system:
377 * Off-diagonal block of reduced system:
378 * Diagonal block of reduced system:
379 *
380  fillin_size =
381  $ (12*npcol+3*nb)
382 *
383 * Claim memory for fillin
384 *
385  free_ptr = free_ptr + iprepad
386  ip_fillin = free_ptr
387  free_ptr = free_ptr + fillin_size
388 *
389 * Workspace needed by computational routines:
390 *
391  ipw_size = 0
392 *
393 * factorization:
394 *
395  ipw_size = 8*npcol
396 *
397 * Claim memory for IPW
398 *
399  ipw = free_ptr
400  free_ptr = free_ptr + ipw_size
401 *
402 * Check for adequate memory for problem size
403 *
404  ierr( 1 ) = 0
405  IF( free_ptr.GT.memsiz ) THEN
406  IF( iam.EQ.0 )
407  $ WRITE( nout, fmt = 9996 )
408  $ 'divide and conquer factorization',
409  $ (free_ptr )*cplxsz
410  ierr( 1 ) = 1
411  END IF
412 *
413 * Check all processes for an error
414 *
415  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
416  $ 1, -1, 0 )
417 *
418  IF( ierr( 1 ).GT.0 ) THEN
419  IF( iam.EQ.0 )
420  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
421  kskip = kskip + 1
422  GO TO 30
423  END IF
424 *
425 * Worksize needed for LAPRNT
426  worksiz = max( ((3)+10), nb )
427 *
428  IF( check ) THEN
429 *
430 * Calculate the amount of workspace required by
431 * the checking routines.
432 *
433 * PCLANGE
434  worksiz = max( worksiz, desca2d( nb_ ) )
435 *
436 * PCDTLASCHK
437  worksiz = max( worksiz,
438  $ max(5,nb)+2*nb )
439  END IF
440 *
441  free_ptr = free_ptr + iprepad
442  ip_driver_w = free_ptr
443  free_ptr = free_ptr + worksiz + ipostpad
444 *
445 *
446 * Check for adequate memory for problem size
447 *
448  ierr( 1 ) = 0
449  IF( free_ptr.GT.memsiz ) THEN
450  IF( iam.EQ.0 )
451  $ WRITE( nout, fmt = 9996 ) 'factorization',
452  $ ( free_ptr )*cplxsz
453  ierr( 1 ) = 1
454  END IF
455 *
456 * Check all processes for an error
457 *
458  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
459  $ 1, -1, 0 )
460 *
461  IF( ierr( 1 ).GT.0 ) THEN
462  IF( iam.EQ.0 )
463  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
464  kskip = kskip + 1
465  GO TO 30
466  END IF
467 *
468  CALL pcbmatgen( ictxt, 'T', 'D', bwl, bwu, n, (3), nb,
469  $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
470  $ mycol, nprow, npcol )
471  CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),
472  $ nb+10, iprepad, ipostpad,
473  $ padval )
474 *
475  CALL pcfillpad( ictxt, worksiz, 1,
476  $ mem( ip_driver_w-iprepad ), worksiz,
477  $ iprepad, ipostpad, padval )
478 *
479 * Calculate norm of A for residual error-checking
480 *
481  IF( check ) THEN
482 *
483  anorm = pclange( 'I', n,
484  $ (3), mem( ipa ), 1, 1,
485  $ desca2d, mem( ip_driver_w ) )
486  CALL pcchekpad( ictxt, 'PCLANGE', nq, np,
487  $ mem( ipa-iprepad ), nb+10,
488  $ iprepad, ipostpad, padval )
489  CALL pcchekpad( ictxt, 'PCLANGE',
490  $ worksiz, 1,
491  $ mem( ip_driver_w-iprepad ), worksiz,
492  $ iprepad, ipostpad, padval )
493  END IF
494 *
495 *
496  CALL slboot()
497  CALL blacs_barrier( ictxt, 'All' )
498 *
499 * Perform factorization
500 *
501  CALL sltimer( 1 )
502 *
503  CALL pcdttrf( n, mem( ipa+2*( nb+10 ) ),
504  $ mem( ipa+1*( nb+10 ) ), mem( ipa ), 1,
505  $ desca, mem( ip_fillin ), fillin_size,
506  $ mem( ipw ), ipw_size, info )
507 *
508  CALL sltimer( 1 )
509 *
510  IF( info.NE.0 ) THEN
511  IF( iam.EQ.0 ) THEN
512  WRITE( nout, fmt = * ) 'PCDTTRF INFO=', info
513  ENDIF
514  kfail = kfail + 1
515  GO TO 30
516  END IF
517 *
518  IF( check ) THEN
519 *
520 * Check for memory overwrite in factorization
521 *
522  CALL pcchekpad( ictxt, 'PCDTTRF', nq,
523  $ np, mem( ipa-iprepad ), nb+10,
524  $ iprepad, ipostpad, padval )
525  END IF
526 *
527 *
528 * Loop over the different values for NRHS
529 *
530  DO 20 hh = 1, nnr
531 *
532  ierr( 1 ) = 0
533 *
534  nrhs = nrval( hh )
535 *
536 * Initialize Array Descriptor for rhs
537 *
538  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
539  $ ictxtb, nb+10, ierr( 1 ) )
540 *
541 * Convert this to 1D descriptor
542 *
543  descb( 1 ) = 502
544  descb( 3 ) = n
545  descb( 4 ) = nb
546  descb( 5 ) = 0
547  descb( 2 ) = ictxt
548  descb( 6 ) = descb2d( lld_ )
549  descb( 7 ) = 0
550 *
551 * reset free_ptr to reuse space for right hand sides
552 *
553  IF( ipb .GT. 0 ) THEN
554  free_ptr = ipb
555  ENDIF
556 *
557  free_ptr = free_ptr + iprepad
558  ipb = free_ptr
559  free_ptr = free_ptr + nrhs*descb2d( lld_ )
560  $ + ipostpad
561 *
562 * Allocate workspace for workspace in TRS routine:
563 *
564  ipw_solve_size = 10*npcol+4*nrhs
565 *
566  ipw_solve = free_ptr
567  free_ptr = free_ptr + ipw_solve_size
568 *
569  ierr( 1 ) = 0
570  IF( free_ptr.GT.memsiz ) THEN
571  IF( iam.EQ.0 )
572  $ WRITE( nout, fmt = 9996 )'solve',
573  $ ( free_ptr )*cplxsz
574  ierr( 1 ) = 1
575  END IF
576 *
577 * Check all processes for an error
578 *
579  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
580  $ ierr, 1, -1, 0 )
581 *
582  IF( ierr( 1 ).GT.0 ) THEN
583  IF( iam.EQ.0 )
584  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
585  kskip = kskip + 1
586  GO TO 15
587  END IF
588 *
589  myrhs_size = numroc( n, nb, mycol, 0, npcol )
590 *
591 * Generate RHS
592 *
593  CALL pcmatgen(ictxtb, 'No', 'No',
594  $ descb2d( m_ ), descb2d( n_ ),
595  $ descb2d( mb_ ), descb2d( nb_ ),
596  $ mem( ipb ),
597  $ descb2d( lld_ ), descb2d( rsrc_ ),
598  $ descb2d( csrc_ ),
599  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
600  $ myrow, npcol, nprow )
601 *
602  IF( check ) THEN
603  CALL pcfillpad( ictxtb, nb, nrhs,
604  $ mem( ipb-iprepad ),
605  $ descb2d( lld_ ),
606  $ iprepad, ipostpad,
607  $ padval )
608  CALL pcfillpad( ictxt, worksiz, 1,
609  $ mem( ip_driver_w-iprepad ),
610  $ worksiz, iprepad,
611  $ ipostpad, padval )
612  END IF
613 *
614 *
615  CALL blacs_barrier( ictxt, 'All')
616  CALL sltimer( 2 )
617 *
618 * Solve linear system via factorization
619 *
620  CALL pcdttrs( trans, n, nrhs,
621  $ mem( ipa+2*( nb+10 ) ),
622  $ mem( ipa+1*( nb+10 ) ), mem( ipa ),
623  $ 1, desca, mem( ipb ), 1, descb,
624  $ mem( ip_fillin ), fillin_size,
625  $ mem( ipw_solve ), ipw_solve_size,
626  $ info )
627 *
628  CALL sltimer( 2 )
629 *
630  IF( info.NE.0 ) THEN
631  IF( iam.EQ.0 )
632  $ WRITE( nout, fmt = * ) 'PCDTTRS INFO=', info
633  kfail = kfail + 1
634  passed = 'FAILED'
635  GO TO 20
636  END IF
637 *
638  IF( check ) THEN
639 *
640 * check for memory overwrite
641 *
642  CALL pcchekpad( ictxt, 'PCDTTRS-work',
643  $ worksiz, 1,
644  $ mem( ip_driver_w-iprepad ),
645  $ worksiz, iprepad,
646  $ ipostpad, padval )
647 *
648 * check the solution to rhs
649 *
650  sresid = zero
651 *
652 * Reset descriptor describing A to 1-by-P grid for
653 * use in banded utility routines
654 *
655  CALL descinit( desca2d, (3), n,
656  $ (3), nb, 0, 0,
657  $ ictxt, (3), ierr( 1 ) )
658  CALL pcdtlaschk( 'N', 'D', trans,
659  $ n, bwl, bwu, nrhs,
660  $ mem( ipb ), 1, 1, descb2d,
661  $ iaseed, mem( ipa ), 1, 1, desca2d,
662  $ ibseed, anorm, sresid,
663  $ mem( ip_driver_w ), worksiz )
664 *
665  IF( iam.EQ.0 ) THEN
666  IF( sresid.GT.thresh )
667  $ WRITE( nout, fmt = 9985 ) sresid
668  END IF
669 *
670 * The second test is a NaN trap
671 *
672  IF( ( sresid.LE.thresh ).AND.
673  $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
674  kpass = kpass + 1
675  passed = 'PASSED'
676  ELSE
677  kfail = kfail + 1
678  passed = 'FAILED'
679  END IF
680 *
681  END IF
682 *
683  15 CONTINUE
684 * Skipped tests jump to here to print out "SKIPPED"
685 *
686 * Gather maximum of all CPU and WALL clock timings
687 *
688  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
689  $ wtime )
690  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
691  $ ctime )
692 *
693 * Print results
694 *
695  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
696 *
697  nops = 0
698  nops2 = 0
699 *
700  n_first = nb
701  nprocs_real = ( n-1 )/nb + 1
702  n_last = mod( n-1, nb ) + 1
703 *
704 * 2 N bwl INT_ONE + N (bwl) flops
705 * for LU factorization
706 *
707  nops = 2*(dble(n)*dble(bwl)*
708  $ dble(int_one)) +
709  $ (dble(n)*dble(bwl))
710 *
711 * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve.
712 *
713  nops = nops +
714  $ 2 * (dble(n)*(dble(bwl)+dble(int_one))
715  $ *dble(nrhs))
716 *
717 * Multiply by 4 to get complex count
718 *
719  nops = nops * dble(4)
720 *
721 * Second calc to represent actual hardware speed
722 *
723 * 2*N_FIRST bwl*bwu Flops for LU
724 * factorization in proc 1
725 *
726  nops2 = 2*( (dble(n_first)*
727  $ dble(bwl)*dble(bwu)))
728 *
729  IF ( nprocs_real .GT. 1) THEN
730 * 8 N_LAST bwl*INT_ONE
731 * flops for LU and spike
732 * calc in last processor
733 *
734  nops2 = nops2 +
735  $ 8*( (dble(n_last)*dble(bwl)
736  $ *dble(int_one)) )
737  ENDIF
738 *
739  IF ( nprocs_real .GT. 2) THEN
740 * 8 NB bwl*INT_ONE flops for LU and spike
741 * calc in other processors
742 *
743  nops2 = nops2 + (nprocs_real-2)*
744  $ 8*( (dble(nb)*dble(bwl)
745  $ *dble(int_one)) )
746  ENDIF
747 *
748 * Reduced system
749 *
750  nops2 = nops2 +
751  $ 2*( nprocs_real-1 ) *
752  $ ( bwl*int_one*bwl/3 )
753  IF( nprocs_real .GT. 1 ) THEN
754  nops2 = nops2 +
755  $ 2*( nprocs_real-2 ) *
756  $ (2*bwl*int_one*bwl)
757  ENDIF
758 *
759 * Solve stage
760 *
761 * nrhs*2 n_first*
762 * (bwl+INT_ONE)
763 * flops for L,U solve in proc 1.
764 *
765  nops2 = nops2 +
766  $ 2*
767  $ dble(n_first)*
768  $ dble(nrhs) *
769  $ ( dble(bwl)+dble(int_one))
770 *
771  IF ( nprocs_real .GT. 1 ) THEN
772 *
773 * 2*nrhs*2 n_last
774 * (bwl+INT_ONE)
775 * flops for LU solve in other procs
776 *
777  nops2 = nops2 +
778  $ 4*
779  $ (dble(n_last)*(dble(bwl)+
780  $ dble(int_one)))*dble(nrhs)
781  ENDIF
782 *
783  IF ( nprocs_real .GT. 2 ) THEN
784 *
785 * 2*nrhs*2 NB
786 * (bwl+INT_ONE)
787 * flops for LU solve in other procs
788 *
789  nops2 = nops2 +
790  $ ( nprocs_real-2)*2*
791  $ ( (dble(nb)*(dble(bwl)+
792  $ dble(int_one)))*dble(nrhs) )
793  ENDIF
794 *
795 * Reduced system
796 *
797  nops2 = nops2 +
798  $ nrhs*( nprocs_real-1)*2*(bwl*int_one )
799  IF( nprocs_real .GT. 1 ) THEN
800  nops2 = nops2 +
801  $ nrhs*( nprocs_real-2 ) *
802  $ ( 6 * bwl*int_one )
803  ENDIF
804 *
805 *
806 * Multiply by 4 to get complex count
807 *
808  nops2 = nops2 * dble(4)
809 *
810 * Calculate total megaflops - factorization and/or
811 * solve -- for WALL and CPU time, and print output
812 *
813 * Print WALL time if machine supports it
814 *
815  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
816  tmflops = nops /
817  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
818  ELSE
819  tmflops = 0.0d+0
820  END IF
821 *
822  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
823  tmflops2 = nops2 /
824  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
825  ELSE
826  tmflops2 = 0.0d+0
827  END IF
828 *
829  IF( wtime( 2 ).GE.0.0d+0 )
830  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
831  $ n,
832  $ bwl, bwu,
833  $ nb, nrhs, nprow, npcol,
834  $ wtime( 1 ), wtime( 2 ), tmflops,
835  $ tmflops2, passed
836 *
837 * Print CPU time if machine supports it
838 *
839  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
840  tmflops = nops /
841  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
842  ELSE
843  tmflops = 0.0d+0
844  END IF
845 *
846  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
847  tmflops2 = nops2 /
848  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
849  ELSE
850  tmflops2 = 0.0d+0
851  END IF
852 *
853  IF( ctime( 2 ).GE.0.0d+0 )
854  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
855  $ n,
856  $ bwl, bwu,
857  $ nb, nrhs, nprow, npcol,
858  $ ctime( 1 ), ctime( 2 ), tmflops,
859  $ tmflops2, passed
860 *
861  END IF
862  20 CONTINUE
863 *
864 *
865  30 CONTINUE
866 * NNB loop
867 *
868  45 CONTINUE
869 * BW[] loop
870 *
871  40 CONTINUE
872 * NMAT loop
873 *
874  CALL blacs_gridexit( ictxt )
875  CALL blacs_gridexit( ictxtb )
876 *
877  50 CONTINUE
878 * NGRIDS DROPOUT
879  60 CONTINUE
880 * NGRIDS loop
881 *
882 * Print ending messages and close output file
883 *
884  IF( iam.EQ.0 ) THEN
885  ktests = kpass + kfail + kskip
886  WRITE( nout, fmt = * )
887  WRITE( nout, fmt = 9992 ) ktests
888  IF( check ) THEN
889  WRITE( nout, fmt = 9991 ) kpass
890  WRITE( nout, fmt = 9989 ) kfail
891  ELSE
892  WRITE( nout, fmt = 9990 ) kpass
893  END IF
894  WRITE( nout, fmt = 9988 ) kskip
895  WRITE( nout, fmt = * )
896  WRITE( nout, fmt = * )
897  WRITE( nout, fmt = 9987 )
898  IF( nout.NE.6 .AND. nout.NE.0 )
899  $ CLOSE ( nout )
900  END IF
901 *
902  CALL blacs_exit( 0 )
903 *
904  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
905  $ '; It should be at least 1' )
906  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
907  $ i4 )
908  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
909  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
910  $ i11 )
911  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
912  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
913  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
914  $ '-------- -------- -------- ------' )
915  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
916  $ 1x,i4,1x,i4,1x,f9.3,
917  $ f9.4, f9.2, f9.2, 1x, a6 )
918  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
919  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
920  9990 FORMAT( i5, ' tests completed without checking.' )
921  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
922  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
923  9987 FORMAT( 'END OF TESTS.' )
924  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
925  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
926 *
927  stop
928 *
929 * End of PCDTTRS_DRIVER
930 *
931  END
932 *
max
#define max(A, B)
Definition: pcgemr.c:180
pcdttrf
subroutine pcdttrf(N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pcdttrf.f:3
pcdtinfo
subroutine pcdtinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcdtinfo.f:6
pcbmatgen
subroutine pcbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcbmatgen.f:5
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pcdtlaschk
subroutine pcdtlaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pcdtlaschk.f:4
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pcdttrs
subroutine pcdttrs(TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pcdttrs.f:3
pcdtdriver
program pcdtdriver
Definition: pcdtdriver.f:1
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181