ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcdbdriver.f
Go to the documentation of this file.
1  PROGRAM pcdbdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PCDBDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by CDB. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * CPLXSZ INTEGER, default = 8 bytes.
56 * INTGSZ and CPLXSZ indicate the length in bytes on the
57 * given platform for an integer and a single precision
58 * complex.
59 * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60 * All arrays used by ScaLAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPB,
62 * for example, is a pointer to the starting element of MEM for
63 * the solution vector(s) B.
64 *
65 * =====================================================================
66 *
67 * Code Developer: Andrew J. Cleary, University of Tennessee.
68 * Current address: Lawrence Livermore National Labs.
69 * This version released: August, 2001.
70 *
71 * =====================================================================
72 *
73 * .. Parameters ..
74  INTEGER totmem
75  parameter( totmem = 3000000 )
76  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77  $ lld_, mb_, m_, nb_, n_, rsrc_
78  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81 *
82  REAL zero
83  INTEGER cplxsz, memsiz, ntests
84  COMPLEX padval
85  parameter( cplxsz = 8,
86  $ memsiz = totmem / cplxsz, ntests = 20,
87  $ padval = ( -9923.0e+0, -9923.0e+0 ),
88  $ zero = 0.0e+0 )
89  INTEGER int_one
90  parameter( int_one = 1 )
91 * ..
92 * .. Local Scalars ..
93  LOGICAL check
94  CHARACTER trans
95  CHARACTER*6 passed
96  CHARACTER*80 outfile
97  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100  $ iprepad, ipw, ipw_size, ipw_solve,
101  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104  $ nnr, nout, np, npcol, nprocs, nprocs_real,
105  $ nprow, nq, nrhs, n_first, n_last, worksiz
106  REAL anorm, sresid, thresh
107  DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108 * ..
109 * .. Local Arrays ..
110  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
111  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
112  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
113  $ nrval( ntests ), nval( ntests ),
114  $ pval( ntests ), qval( ntests )
115  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116  COMPLEX mem( memsiz )
117 * ..
118 * .. External Subroutines ..
119  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
120  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
121  $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
124  $ slcombine, sltimer
125 * ..
126 * .. External Functions ..
127  INTEGER numroc
128  LOGICAL lsame
129  REAL pclange
130  EXTERNAL lsame, numroc, pclange
131 * ..
132 * .. Intrinsic Functions ..
133  INTRINSIC dble, max, min, mod
134 * ..
135 * .. Data Statements ..
136  DATA kfail, kpass, kskip, ktests / 4*0 /
137 * ..
138 *
139 *
140 *
141 * .. Executable Statements ..
142 *
143 * Get starting information
144 *
145  CALL blacs_pinfo( iam, nprocs )
146  iaseed = 100
147  ibseed = 200
148 *
149  CALL pcdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
153 *
154  check = ( thresh.GE.0.0e+0 )
155 *
156 * Print headings
157 *
158  IF( iam.EQ.0 ) THEN
159  WRITE( nout, fmt = * )
160  WRITE( nout, fmt = 9995 )
161  WRITE( nout, fmt = 9994 )
162  WRITE( nout, fmt = * )
163  END IF
164 *
165 * Loop over different process grids
166 *
167  DO 60 i = 1, ngrids
168 *
169  nprow = pval( i )
170  npcol = qval( i )
171 *
172 * Make sure grid information is correct
173 *
174  ierr( 1 ) = 0
175  IF( nprow.LT.1 ) THEN
176  IF( iam.EQ.0 )
177  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178  ierr( 1 ) = 1
179  ELSE IF( npcol.LT.1 ) THEN
180  IF( iam.EQ.0 )
181  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
182  ierr( 1 ) = 1
183  ELSE IF( nprow*npcol.GT.nprocs ) THEN
184  IF( iam.EQ.0 )
185  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
186  ierr( 1 ) = 1
187  END IF
188 *
189  IF( ierr( 1 ).GT.0 ) THEN
190  IF( iam.EQ.0 )
191  $ WRITE( nout, fmt = 9997 ) 'grid'
192  kskip = kskip + 1
193  GO TO 50
194  END IF
195 *
196 * Define process grid
197 *
198  CALL blacs_get( -1, 0, ictxt )
199  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
200 *
201 *
202 * Define transpose process grid
203 *
204  CALL blacs_get( -1, 0, ictxtb )
205  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
206 *
207 * Go to bottom of process grid loop if this case doesn't use my
208 * process
209 *
210  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
211 *
212  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
213  GO TO 50
214  ENDIF
215 *
216  DO 40 j = 1, nmat
217 *
218  ierr( 1 ) = 0
219 *
220  n = nval( j )
221 *
222 * Make sure matrix information is correct
223 *
224  IF( n.LT.1 ) THEN
225  IF( iam.EQ.0 )
226  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
227  ierr( 1 ) = 1
228  END IF
229 *
230 * Check all processes for an error
231 *
232  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
233  $ -1, 0 )
234 *
235  IF( ierr( 1 ).GT.0 ) THEN
236  IF( iam.EQ.0 )
237  $ WRITE( nout, fmt = 9997 ) 'size'
238  kskip = kskip + 1
239  GO TO 40
240  END IF
241 *
242 *
243  DO 45 bw_num = 1, nbw
244 *
245  ierr( 1 ) = 0
246 *
247  bwl = bwlval( bw_num )
248  IF( bwl.LT.1 ) THEN
249  IF( iam.EQ.0 )
250  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
251  ierr( 1 ) = 1
252  END IF
253 *
254  bwu = bwuval( bw_num )
255  IF( bwu.LT.1 ) THEN
256  IF( iam.EQ.0 )
257  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
258  ierr( 1 ) = 1
259  END IF
260 *
261  IF( bwl.GT.n-1 ) THEN
262  IF( iam.EQ.0 ) THEN
263  ierr( 1 ) = 1
264  ENDIF
265  END IF
266 *
267  IF( bwu.GT.n-1 ) THEN
268  IF( iam.EQ.0 ) THEN
269  ierr( 1 ) = 1
270  ENDIF
271  END IF
272 *
273 * Check all processes for an error
274 *
275  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
276  $ -1, 0 )
277 *
278  IF( ierr( 1 ).GT.0 ) THEN
279  kskip = kskip + 1
280  GO TO 45
281  END IF
282 *
283  DO 30 k = 1, nnb
284 *
285  ierr( 1 ) = 0
286 *
287  nb = nbval( k )
288  IF( nb.LT.0 ) THEN
289  nb =( (n-(npcol-1)*max(bwl,bwu)-1)/npcol + 1 )
290  $ + max(bwl,bwu)
291  nb = max( nb, 2*max(bwl,bwu) )
292  nb = min( n, nb )
293  END IF
294 *
295 * Make sure NB is legal
296 *
297  ierr( 1 ) = 0
298  IF( nb.LT.min( 2*max(bwl,bwu), n ) ) THEN
299  ierr( 1 ) = 1
300  END IF
301 *
302 * Check all processes for an error
303 *
304  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
305  $ -1, 0 )
306 *
307  IF( ierr( 1 ).GT.0 ) THEN
308  kskip = kskip + 1
309  GO TO 30
310  END IF
311 *
312 * Padding constants
313 *
314  np = numroc( (bwl+bwu+1), (bwl+bwu+1),
315  $ myrow, 0, nprow )
316  nq = numroc( n, nb, mycol, 0, npcol )
317 *
318  IF( check ) THEN
319  iprepad = ((bwl+bwu+1)+10)
320  imidpad = 10
321  ipostpad = ((bwl+bwu+1)+10)
322  ELSE
323  iprepad = 0
324  imidpad = 0
325  ipostpad = 0
326  END IF
327 *
328 * Initialize the array descriptor for the matrix A
329 *
330  CALL descinit( desca2d, (bwl+bwu+1), n,
331  $ (bwl+bwu+1), nb, 0, 0,
332  $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
333 *
334 * Convert this to 1D descriptor
335 *
336  desca( 1 ) = 501
337  desca( 3 ) = n
338  desca( 4 ) = nb
339  desca( 5 ) = 0
340  desca( 2 ) = ictxt
341  desca( 6 ) = ((bwl+bwu+1)+10)
342  desca( 7 ) = 0
343 *
344  ierr_temp = ierr( 1 )
345  ierr( 1 ) = 0
346  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
347 *
348 * Check all processes for an error
349 *
350  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
351 *
352  IF( ierr( 1 ).LT.0 ) THEN
353  IF( iam.EQ.0 )
354  $ WRITE( nout, fmt = 9997 ) 'descriptor'
355  kskip = kskip + 1
356  GO TO 30
357  END IF
358 *
359 * Assign pointers into MEM for SCALAPACK arrays, A is
360 * allocated starting at position MEM( IPREPAD+1 )
361 *
362  free_ptr = 1
363  ipb = 0
364 *
365 * Save room for prepadding
366  free_ptr = free_ptr + iprepad
367 *
368  ipa = free_ptr
369  free_ptr = free_ptr + desca2d( lld_ )*
370  $ desca2d( nb_ )
371  $ + ipostpad
372 *
373 * Add memory for fillin
374 * Fillin space needs to store:
375 * Fillin spike:
376 * Contribution to previous proc's diagonal block of
377 * reduced system:
378 * Off-diagonal block of reduced system:
379 * Diagonal block of reduced system:
380 *
381  fillin_size =
382  $ nb*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
383 *
384 * Claim memory for fillin
385 *
386  free_ptr = free_ptr + iprepad
387  ip_fillin = free_ptr
388  free_ptr = free_ptr + fillin_size
389 *
390 * Workspace needed by computational routines:
391 *
392  ipw_size = 0
393 *
394 * factorization:
395 *
396  ipw_size = max(bwl,bwu)*max(bwl,bwu)
397 *
398 * Claim memory for IPW
399 *
400  ipw = free_ptr
401  free_ptr = free_ptr + ipw_size
402 *
403 * Check for adequate memory for problem size
404 *
405  ierr( 1 ) = 0
406  IF( free_ptr.GT.memsiz ) THEN
407  IF( iam.EQ.0 )
408  $ WRITE( nout, fmt = 9996 )
409  $ 'divide and conquer factorization',
410  $ (free_ptr )*cplxsz
411  ierr( 1 ) = 1
412  END IF
413 *
414 * Check all processes for an error
415 *
416  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
417  $ 1, -1, 0 )
418 *
419  IF( ierr( 1 ).GT.0 ) THEN
420  IF( iam.EQ.0 )
421  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
422  kskip = kskip + 1
423  GO TO 30
424  END IF
425 *
426 * Worksize needed for LAPRNT
427  worksiz = max( ((bwl+bwu+1)+10), nb )
428 *
429  IF( check ) THEN
430 *
431 * Calculate the amount of workspace required by
432 * the checking routines.
433 *
434 * PCLANGE
435  worksiz = max( worksiz, desca2d( nb_ ) )
436 *
437 * PCDBLASCHK
438  worksiz = max( worksiz,
439  $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
440  END IF
441 *
442  free_ptr = free_ptr + iprepad
443  ip_driver_w = free_ptr
444  free_ptr = free_ptr + worksiz + ipostpad
445 *
446 *
447 * Check for adequate memory for problem size
448 *
449  ierr( 1 ) = 0
450  IF( free_ptr.GT.memsiz ) THEN
451  IF( iam.EQ.0 )
452  $ WRITE( nout, fmt = 9996 ) 'factorization',
453  $ ( free_ptr )*cplxsz
454  ierr( 1 ) = 1
455  END IF
456 *
457 * Check all processes for an error
458 *
459  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
460  $ 1, -1, 0 )
461 *
462  IF( ierr( 1 ).GT.0 ) THEN
463  IF( iam.EQ.0 )
464  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
465  kskip = kskip + 1
466  GO TO 30
467  END IF
468 *
469  CALL pcbmatgen( ictxt, 'G', 'D', bwl, bwu, n,
470  $ (bwl+bwu+1), nb, mem( ipa ),
471  $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
472  $ mycol, nprow, npcol )
473 *
474  CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
475  $ ((bwl+bwu+1)+10), iprepad, ipostpad,
476  $ padval )
477 *
478  CALL pcfillpad( ictxt, worksiz, 1,
479  $ mem( ip_driver_w-iprepad ), worksiz,
480  $ iprepad, ipostpad, padval )
481 *
482 * Calculate norm of A for residual error-checking
483 *
484  IF( check ) THEN
485 *
486  anorm = pclange( '1', (bwl+bwu+1),
487  $ n, mem( ipa ), 1, 1,
488  $ desca2d, mem( ip_driver_w ) )
489  CALL pcchekpad( ictxt, 'PCLANGE', np, nq,
490  $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
491  $ iprepad, ipostpad, padval )
492  CALL pcchekpad( ictxt, 'PCLANGE',
493  $ worksiz, 1,
494  $ mem( ip_driver_w-iprepad ), worksiz,
495  $ iprepad, ipostpad, padval )
496  END IF
497 *
498 *
499  CALL slboot()
500  CALL blacs_barrier( ictxt, 'All' )
501 *
502 * Perform factorization
503 *
504  CALL sltimer( 1 )
505 *
506  CALL pcdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
507  $ mem( ip_fillin ), fillin_size, mem( ipw ),
508  $ ipw_size, info )
509 *
510  CALL sltimer( 1 )
511 *
512  IF( info.NE.0 ) THEN
513  IF( iam.EQ.0 ) THEN
514  WRITE( nout, fmt = * ) 'PCDBTRF INFO=', info
515  ENDIF
516  kfail = kfail + 1
517  GO TO 30
518  END IF
519 *
520  IF( check ) THEN
521 *
522 * Check for memory overwrite in factorization
523 *
524  CALL pcchekpad( ictxt, 'PCDBTRF', np,
525  $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
526  $ iprepad, ipostpad, padval )
527  END IF
528 *
529 *
530 * Loop over the different values for NRHS
531 *
532  DO 20 hh = 1, nnr
533 *
534  ierr( 1 ) = 0
535 *
536  nrhs = nrval( hh )
537 *
538 * Initialize Array Descriptor for rhs
539 *
540  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
541  $ ictxtb, nb+10, ierr( 1 ) )
542 *
543 * Convert this to 1D descriptor
544 *
545  descb( 1 ) = 502
546  descb( 3 ) = n
547  descb( 4 ) = nb
548  descb( 5 ) = 0
549  descb( 2 ) = ictxt
550  descb( 6 ) = descb2d( lld_ )
551  descb( 7 ) = 0
552 *
553 * reset free_ptr to reuse space for right hand sides
554 *
555  IF( ipb .GT. 0 ) THEN
556  free_ptr = ipb
557  ENDIF
558 *
559  free_ptr = free_ptr + iprepad
560  ipb = free_ptr
561  free_ptr = free_ptr + nrhs*descb2d( lld_ )
562  $ + ipostpad
563 *
564 * Allocate workspace for workspace in TRS routine:
565 *
566  ipw_solve_size = (max(bwl,bwu)*nrhs)
567 *
568  ipw_solve = free_ptr
569  free_ptr = free_ptr + ipw_solve_size
570 *
571  ierr( 1 ) = 0
572  IF( free_ptr.GT.memsiz ) THEN
573  IF( iam.EQ.0 )
574  $ WRITE( nout, fmt = 9996 )'solve',
575  $ ( free_ptr )*cplxsz
576  ierr( 1 ) = 1
577  END IF
578 *
579 * Check all processes for an error
580 *
581  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
582  $ ierr, 1, -1, 0 )
583 *
584  IF( ierr( 1 ).GT.0 ) THEN
585  IF( iam.EQ.0 )
586  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
587  kskip = kskip + 1
588  GO TO 15
589  END IF
590 *
591  myrhs_size = numroc( n, nb, mycol, 0, npcol )
592 *
593 * Generate RHS
594 *
595  CALL pcmatgen(ictxtb, 'No', 'No',
596  $ descb2d( m_ ), descb2d( n_ ),
597  $ descb2d( mb_ ), descb2d( nb_ ),
598  $ mem( ipb ),
599  $ descb2d( lld_ ), descb2d( rsrc_ ),
600  $ descb2d( csrc_ ),
601  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
602  $ myrow, npcol, nprow )
603 *
604  IF( check ) THEN
605  CALL pcfillpad( ictxtb, nb, nrhs,
606  $ mem( ipb-iprepad ),
607  $ descb2d( lld_ ),
608  $ iprepad, ipostpad,
609  $ padval )
610  CALL pcfillpad( ictxt, worksiz, 1,
611  $ mem( ip_driver_w-iprepad ),
612  $ worksiz, iprepad,
613  $ ipostpad, padval )
614  END IF
615 *
616 *
617  CALL blacs_barrier( ictxt, 'All')
618  CALL sltimer( 2 )
619 *
620 * Solve linear system via factorization
621 *
622  CALL pcdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
623  $ 1, desca, mem( ipb ), 1, descb,
624  $ mem( ip_fillin ), fillin_size,
625  $ mem( ipw_solve ), ipw_solve_size,
626  $ info )
627 *
628  CALL sltimer( 2 )
629 *
630  IF( info.NE.0 ) THEN
631  IF( iam.EQ.0 )
632  $ WRITE( nout, fmt = * ) 'PCDBTRS INFO=', info
633  kfail = kfail + 1
634  passed = 'FAILED'
635  GO TO 20
636  END IF
637 *
638  IF( check ) THEN
639 *
640 * check for memory overwrite
641 *
642  CALL pcchekpad( ictxt, 'PCDBTRS-work',
643  $ worksiz, 1,
644  $ mem( ip_driver_w-iprepad ),
645  $ worksiz, iprepad,
646  $ ipostpad, padval )
647 *
648 * check the solution to rhs
649 *
650  sresid = zero
651 *
652  CALL pcdblaschk( 'N', 'D', trans,
653  $ n, bwl, bwu, nrhs,
654  $ mem( ipb ), 1, 1, descb2d,
655  $ iaseed, mem( ipa ), 1, 1, desca2d,
656  $ ibseed, anorm, sresid,
657  $ mem( ip_driver_w ), worksiz )
658 *
659  IF( iam.EQ.0 ) THEN
660  IF( sresid.GT.thresh )
661  $ WRITE( nout, fmt = 9985 ) sresid
662  END IF
663 *
664 * The second test is a NaN trap
665 *
666  IF( ( sresid.LE.thresh ).AND.
667  $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
668  kpass = kpass + 1
669  passed = 'PASSED'
670  ELSE
671  kfail = kfail + 1
672  passed = 'FAILED'
673  END IF
674 *
675  END IF
676 *
677  15 CONTINUE
678 * Skipped tests jump to here to print out "SKIPPED"
679 *
680 * Gather maximum of all CPU and WALL clock timings
681 *
682  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
683  $ wtime )
684  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
685  $ ctime )
686 *
687 * Print results
688 *
689  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
690 *
691  nops = 0
692  nops2 = 0
693 *
694  n_first = nb
695  nprocs_real = ( n-1 )/nb + 1
696  n_last = mod( n-1, nb ) + 1
697 *
698 * 2 N bwl bwu + N (bwl) flops
699 * for LU factorization
700 *
701  nops = 2*(dble(n)*dble(bwl)*
702  $ dble(bwu)) +
703  $ (dble(n)*dble(bwl))
704 *
705 * nrhs * 2 N*(bwl+bwu) flops for LU solve.
706 *
707  nops = nops +
708  $ 2 * (dble(n)*(dble(bwl)+dble(bwu))
709  $ *dble(nrhs))
710 *
711 * Multiply by 4 to get complex count
712 *
713  nops = nops * dble(4)
714 *
715 * Second calc to represent actual hardware speed
716 *
717 * 2*N_FIRST bwl*bwu Flops for LU
718 * factorization in proc 1
719 *
720  nops2 = 2*( (dble(n_first)*
721  $ dble(bwl)*dble(bwu)))
722 *
723  IF ( nprocs_real .GT. 1) THEN
724 * 8 N_LAST bwl*bwu
725 * flops for LU and spike
726 * calc in last processor
727 *
728  nops2 = nops2 +
729  $ 8*( (dble(n_last)*dble(bwl)
730  $ *dble(bwu)) )
731  ENDIF
732 *
733  IF ( nprocs_real .GT. 2) THEN
734 * 8 NB bwl*bwu flops for LU and spike
735 * calc in other processors
736 *
737  nops2 = nops2 + (nprocs_real-2)*
738  $ 8*( (dble(nb)*dble(bwl)
739  $ *dble(bwu)) )
740  ENDIF
741 *
742 * Reduced system
743 *
744  nops2 = nops2 +
745  $ 2*( nprocs_real-1 ) *
746  $ ( bwl*bwu*bwl/3 )
747  IF( nprocs_real .GT. 1 ) THEN
748  nops2 = nops2 +
749  $ 2*( nprocs_real-2 ) *
750  $ (2*bwl*bwu*bwl)
751  ENDIF
752 *
753 * Solve stage
754 *
755 * nrhs*2 n_first*
756 * (bwl+bwu)
757 * flops for L,U solve in proc 1.
758 *
759  nops2 = nops2 +
760  $ 2*
761  $ dble(n_first)*
762  $ dble(nrhs) *
763  $ ( dble(bwl)+dble(bwu))
764 *
765  IF ( nprocs_real .GT. 1 ) THEN
766 *
767 * 2*nrhs*2 n_last
768 * (bwl+bwu)
769 * flops for LU solve in other procs
770 *
771  nops2 = nops2 +
772  $ 4*
773  $ (dble(n_last)*(dble(bwl)+
774  $ dble(bwu)))*dble(nrhs)
775  ENDIF
776 *
777  IF ( nprocs_real .GT. 2 ) THEN
778 *
779 * 2*nrhs*2 NB
780 * (bwl+bwu)
781 * flops for LU solve in other procs
782 *
783  nops2 = nops2 +
784  $ ( nprocs_real-2)*2*
785  $ ( (dble(nb)*(dble(bwl)+
786  $ dble(bwu)))*dble(nrhs) )
787  ENDIF
788 *
789 * Reduced system
790 *
791  nops2 = nops2 +
792  $ nrhs*( nprocs_real-1)*2*(bwl*bwu )
793  IF( nprocs_real .GT. 1 ) THEN
794  nops2 = nops2 +
795  $ nrhs*( nprocs_real-2 ) *
796  $ ( 6 * bwl*bwu )
797  ENDIF
798 *
799 *
800 * Multiply by 4 to get complex count
801 *
802  nops2 = nops2 * dble(4)
803 *
804 * Calculate total megaflops - factorization and/or
805 * solve -- for WALL and CPU time, and print output
806 *
807 * Print WALL time if machine supports it
808 *
809  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
810  tmflops = nops /
811  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
812  ELSE
813  tmflops = 0.0d+0
814  END IF
815 *
816  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
817  tmflops2 = nops2 /
818  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
819  ELSE
820  tmflops2 = 0.0d+0
821  END IF
822 *
823  IF( wtime( 2 ).GE.0.0d+0 )
824  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
825  $ n,
826  $ bwl, bwu,
827  $ nb, nrhs, nprow, npcol,
828  $ wtime( 1 ), wtime( 2 ), tmflops,
829  $ tmflops2, passed
830 *
831 * Print CPU time if machine supports it
832 *
833  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
834  tmflops = nops /
835  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
836  ELSE
837  tmflops = 0.0d+0
838  END IF
839 *
840  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
841  tmflops2 = nops2 /
842  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
843  ELSE
844  tmflops2 = 0.0d+0
845  END IF
846 *
847  IF( ctime( 2 ).GE.0.0d+0 )
848  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
849  $ n,
850  $ bwl, bwu,
851  $ nb, nrhs, nprow, npcol,
852  $ ctime( 1 ), ctime( 2 ), tmflops,
853  $ tmflops2, passed
854 *
855  END IF
856  20 CONTINUE
857 *
858 *
859  30 CONTINUE
860 * NNB loop
861 *
862  45 CONTINUE
863 * BW[] loop
864 *
865  40 CONTINUE
866 * NMAT loop
867 *
868  CALL blacs_gridexit( ictxt )
869  CALL blacs_gridexit( ictxtb )
870 *
871  50 CONTINUE
872 * NGRIDS DROPOUT
873  60 CONTINUE
874 * NGRIDS loop
875 *
876 * Print ending messages and close output file
877 *
878  IF( iam.EQ.0 ) THEN
879  ktests = kpass + kfail + kskip
880  WRITE( nout, fmt = * )
881  WRITE( nout, fmt = 9992 ) ktests
882  IF( check ) THEN
883  WRITE( nout, fmt = 9991 ) kpass
884  WRITE( nout, fmt = 9989 ) kfail
885  ELSE
886  WRITE( nout, fmt = 9990 ) kpass
887  END IF
888  WRITE( nout, fmt = 9988 ) kskip
889  WRITE( nout, fmt = * )
890  WRITE( nout, fmt = * )
891  WRITE( nout, fmt = 9987 )
892  IF( nout.NE.6 .AND. nout.NE.0 )
893  $ CLOSE ( nout )
894  END IF
895 *
896  CALL blacs_exit( 0 )
897 *
898  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
899  $ '; It should be at least 1' )
900  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
901  $ i4 )
902  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
903  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
904  $ i11 )
905  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
906  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
907  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
908  $ '-------- -------- -------- ------' )
909  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
910  $ 1x,i4,1x,i4,1x,f9.3,
911  $ f9.4, f9.2, f9.2, 1x, a6 )
912  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
913  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
914  9990 FORMAT( i5, ' tests completed without checking.' )
915  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
916  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
917  9987 FORMAT( 'END OF TESTS.' )
918  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
919  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
920 *
921  stop
922 *
923 * End of PCDBTRS_DRIVER
924 *
925  END
926 *
max
#define max(A, B)
Definition: pcgemr.c:180
pcdbtrf
subroutine pcdbtrf(N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pcdbtrf.f:3
pcdbtrs
subroutine pcdbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pcdbtrs.f:3
pcbmatgen
subroutine pcbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcbmatgen.f:5
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pcdbinfo
subroutine pcdbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcdbinfo.f:6
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pcdbdriver
program pcdbdriver
Definition: pcdbdriver.f:1
min
#define min(A, B)
Definition: pcgemr.c:181
pcdblaschk
subroutine pcdblaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pcdblaschk.f:4