ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdgbdriver.f
Go to the documentation of this file.
1  PROGRAM pdgbdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PDGBDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by DGB. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTMEM INTEGER, default = 2048.
55 * INTMEM is the size of the integer workspace used in this
56 * driver as input as the IPIV vector. It represents an
57 * upper bound on NB, the blocksize of the data
58 * distribution.
59 *
60 * INTGSZ INTEGER, default = 4 bytes.
61 * DBLESZ INTEGER, default = 8 bytes.
62 * INTGSZ and DBLESZ indicate the length in bytes on the
63 * given platform for an integer and a double precision real.
64 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
65 * All arrays used by ScaLAPACK routines are allocated from
66 * this array and referenced by pointers. The integer IPB,
67 * for example, is a pointer to the starting element of MEM for
68 * the solution vector(s) B.
69 *
70 * =====================================================================
71 *
72 * Code Developer: Andrew J. Cleary, University of Tennessee.
73 * Current address: Lawrence Livermore National Labs.
74 * This version released: August, 2001.
75 *
76 * =====================================================================
77 *
78 * .. Parameters ..
79  INTEGER totmem
80  parameter( totmem = 3000000 )
81  INTEGER intmem
82  parameter( intmem = 2048 )
83  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
84  $ lld_, mb_, m_, nb_, n_, rsrc_
85  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
86  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
87  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
88 *
89  DOUBLE PRECISION zero
90  INTEGER dblesz, memsiz, ntests
91  DOUBLE PRECISION padval
92  parameter( dblesz = 8,
93  $ memsiz = totmem / dblesz, ntests = 20,
94  $ padval = -9923.0d+0, zero = 0.0d+0 )
95  INTEGER int_one
96  parameter( int_one = 1 )
97 * ..
98 * .. Local Scalars ..
99  LOGICAL check
100  CHARACTER trans
101  CHARACTER*6 passed
102  CHARACTER*80 outfile
103  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
104  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
105  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
106  $ iprepad, ipw, ipw_size, ipw_solve,
107  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
108  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
109  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
110  $ nnr, nout, np, npcol, nprocs, nprocs_real,
111  $ nprow, nq, nrhs, n_first, n_last, worksiz
112  REAL thresh
113  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
114  $ tmflops2
115 * ..
116 * .. Local Arrays ..
117  INTEGER ipiv(intmem)
118  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
119  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
120  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
121  $ nrval( ntests ), nval( ntests ),
122  $ pval( ntests ), qval( ntests )
123  DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
124 * ..
125 * .. External Subroutines ..
126  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
127  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
128  $ blacs_pinfo, descinit, igsum2d, pdbmatgen,
131  $ sltimer
132 * ..
133 * .. External Functions ..
134  INTEGER numroc
135  LOGICAL lsame
136  DOUBLE PRECISION pdlange
137  EXTERNAL lsame, numroc, pdlange
138 * ..
139 * .. Intrinsic Functions ..
140  INTRINSIC dble, max, min, mod
141 * ..
142 * .. Data Statements ..
143  DATA kfail, kpass, kskip, ktests / 4*0 /
144 * ..
145 *
146 *
147 *
148 * .. Executable Statements ..
149 *
150 * Get starting information
151 *
152  CALL blacs_pinfo( iam, nprocs )
153  iaseed = 100
154  ibseed = 200
155 *
156  CALL pdgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
157  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
158  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
159  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
160 *
161  check = ( thresh.GE.0.0d+0 )
162 *
163 * Print headings
164 *
165  IF( iam.EQ.0 ) THEN
166  WRITE( nout, fmt = * )
167  WRITE( nout, fmt = 9995 )
168  WRITE( nout, fmt = 9994 )
169  WRITE( nout, fmt = * )
170  END IF
171 *
172 * Loop over different process grids
173 *
174  DO 60 i = 1, ngrids
175 *
176  nprow = pval( i )
177  npcol = qval( i )
178 *
179 * Make sure grid information is correct
180 *
181  ierr( 1 ) = 0
182  IF( nprow.LT.1 ) THEN
183  IF( iam.EQ.0 )
184  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
185  ierr( 1 ) = 1
186  ELSE IF( npcol.LT.1 ) THEN
187  IF( iam.EQ.0 )
188  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
189  ierr( 1 ) = 1
190  ELSE IF( nprow*npcol.GT.nprocs ) THEN
191  IF( iam.EQ.0 )
192  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
193  ierr( 1 ) = 1
194  END IF
195 *
196  IF( ierr( 1 ).GT.0 ) THEN
197  IF( iam.EQ.0 )
198  $ WRITE( nout, fmt = 9997 ) 'grid'
199  kskip = kskip + 1
200  GO TO 50
201  END IF
202 *
203 * Define process grid
204 *
205  CALL blacs_get( -1, 0, ictxt )
206  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
207 *
208 *
209 * Define transpose process grid
210 *
211  CALL blacs_get( -1, 0, ictxtb )
212  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
213 *
214 * Go to bottom of process grid loop if this case doesn't use my
215 * process
216 *
217  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
218 *
219  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
220  GO TO 50
221  ENDIF
222 *
223  DO 40 j = 1, nmat
224 *
225  ierr( 1 ) = 0
226 *
227  n = nval( j )
228 *
229 * Make sure matrix information is correct
230 *
231  IF( n.LT.1 ) THEN
232  IF( iam.EQ.0 )
233  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
234  ierr( 1 ) = 1
235  END IF
236 *
237 * Check all processes for an error
238 *
239  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
240  $ -1, 0 )
241 *
242  IF( ierr( 1 ).GT.0 ) THEN
243  IF( iam.EQ.0 )
244  $ WRITE( nout, fmt = 9997 ) 'size'
245  kskip = kskip + 1
246  GO TO 40
247  END IF
248 *
249 *
250  DO 45 bw_num = 1, nbw
251 *
252  ierr( 1 ) = 0
253 *
254  bwl = bwlval( bw_num )
255  IF( bwl.LT.1 ) THEN
256  IF( iam.EQ.0 )
257  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
258  ierr( 1 ) = 1
259  END IF
260 *
261  bwu = bwuval( bw_num )
262  IF( bwu.LT.1 ) THEN
263  IF( iam.EQ.0 )
264  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
265  ierr( 1 ) = 1
266  END IF
267 *
268  IF( bwl.GT.n-1 ) THEN
269  IF( iam.EQ.0 ) THEN
270  ierr( 1 ) = 1
271  ENDIF
272  END IF
273 *
274  IF( bwu.GT.n-1 ) THEN
275  IF( iam.EQ.0 ) THEN
276  ierr( 1 ) = 1
277  ENDIF
278  END IF
279 *
280 * Check all processes for an error
281 *
282  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
283  $ -1, 0 )
284 *
285  IF( ierr( 1 ).GT.0 ) THEN
286  kskip = kskip + 1
287  GO TO 45
288  END IF
289 *
290  DO 30 k = 1, nnb
291 *
292  ierr( 1 ) = 0
293 *
294  nb = nbval( k )
295  IF( nb.LT.0 ) THEN
296  nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
297  $ + (bwl+bwu)
298  nb = max( nb, 2*(bwl+bwu) )
299  nb = min( n, nb )
300  END IF
301 *
302 * Make sure NB is legal
303 *
304  ierr( 1 ) = 0
305 *
306  IF( nb.GT.intmem ) THEN
307  ierr( 1 ) = 1
308  IF( iam.EQ.0 ) THEN
309  WRITE( nout,* )'You have chosen an '
310  $ ,'NB > INTMEM in the driver.'
311  WRITE(nout, *)'Please edit the driver '
312  $ ,'and increase the value of INTMEM'
313  ENDIF
314  END IF
315 *
316 * Check all processes for an error
317 *
318  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
319  $ -1, 0 )
320 *
321  IF( ierr( 1 ).GT.0 ) THEN
322  kskip = kskip + 1
323  GO TO 30
324  END IF
325 *
326 * Padding constants
327 *
328  np = numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
329  $ myrow, 0, nprow )
330  nq = numroc( n, nb, mycol, 0, npcol )
331 *
332  IF( check ) THEN
333  iprepad = ((2*bwl+2*bwu+1)+10)
334  imidpad = 10
335  ipostpad = ((2*bwl+2*bwu+1)+10)
336  ELSE
337  iprepad = 0
338  imidpad = 0
339  ipostpad = 0
340  END IF
341 *
342 * Initialize the array descriptor for the matrix A
343 *
344  CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
345  $ (2*bwl+2*bwu+1), nb, 0, 0,
346  $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
347 *
348 * Convert this to 1D descriptor
349 *
350  desca( 1 ) = 501
351  desca( 3 ) = n
352  desca( 4 ) = nb
353  desca( 5 ) = 0
354  desca( 2 ) = ictxt
355  desca( 6 ) = ((2*bwl+2*bwu+1)+10)
356  desca( 7 ) = 0
357 *
358  ierr_temp = ierr( 1 )
359  ierr( 1 ) = 0
360  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
361 *
362 * Check all processes for an error
363 *
364  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
365 *
366  IF( ierr( 1 ).LT.0 ) THEN
367  IF( iam.EQ.0 )
368  $ WRITE( nout, fmt = 9997 ) 'descriptor'
369  kskip = kskip + 1
370  GO TO 30
371  END IF
372 *
373 * Assign pointers into MEM for SCALAPACK arrays, A is
374 * allocated starting at position MEM( IPREPAD+1 )
375 *
376  free_ptr = 1
377  ipb = 0
378 *
379 * Save room for prepadding
380  free_ptr = free_ptr + iprepad
381 *
382  ipa = free_ptr
383  free_ptr = free_ptr + desca2d( lld_ )*
384  $ desca2d( nb_ )
385  $ + ipostpad
386 *
387 * Add memory for fillin
388 * Fillin space needs to store:
389 * Fillin spike:
390 * Contribution to previous proc's diagonal block of
391 * reduced system:
392 * Off-diagonal block of reduced system:
393 * Diagonal block of reduced system:
394 *
395  fillin_size =
396  $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
397 *
398 * Claim memory for fillin
399 *
400  free_ptr = free_ptr + iprepad
401  ip_fillin = free_ptr
402  free_ptr = free_ptr + fillin_size
403 *
404 * Workspace needed by computational routines:
405 *
406  ipw_size = 0
407 *
408 * factorization:
409 *
410  ipw_size = 1
411 *
412 * Claim memory for IPW
413 *
414  ipw = free_ptr
415  free_ptr = free_ptr + ipw_size
416 *
417 * Check for adequate memory for problem size
418 *
419  ierr( 1 ) = 0
420  IF( free_ptr.GT.memsiz ) THEN
421  IF( iam.EQ.0 )
422  $ WRITE( nout, fmt = 9996 )
423  $ 'divide and conquer factorization',
424  $ (free_ptr )*dblesz
425  ierr( 1 ) = 1
426  END IF
427 *
428 * Check all processes for an error
429 *
430  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
431  $ 1, -1, 0 )
432 *
433  IF( ierr( 1 ).GT.0 ) THEN
434  IF( iam.EQ.0 )
435  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
436  kskip = kskip + 1
437  GO TO 30
438  END IF
439 *
440 * Worksize needed for LAPRNT
441  worksiz = max( ((2*bwl+2*bwu+1)+10), nb )
442 *
443  IF( check ) THEN
444 *
445 * Calculate the amount of workspace required by
446 * the checking routines.
447 *
448 * PDLANGE
449  worksiz = max( worksiz, desca2d( nb_ ) )
450 *
451 * PDDBLASCHK
452  worksiz = max( worksiz,
453  $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
454  END IF
455 *
456  free_ptr = free_ptr + iprepad
457  ip_driver_w = free_ptr
458  free_ptr = free_ptr + worksiz + ipostpad
459 *
460 *
461 * Check for adequate memory for problem size
462 *
463  ierr( 1 ) = 0
464  IF( free_ptr.GT.memsiz ) THEN
465  IF( iam.EQ.0 )
466  $ WRITE( nout, fmt = 9996 ) 'factorization',
467  $ ( free_ptr )*dblesz
468  ierr( 1 ) = 1
469  END IF
470 *
471 * Check all processes for an error
472 *
473  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
474  $ 1, -1, 0 )
475 *
476  IF( ierr( 1 ).GT.0 ) THEN
477  IF( iam.EQ.0 )
478  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
479  kskip = kskip + 1
480  GO TO 30
481  END IF
482 *
483  CALL pdbmatgen( ictxt, 'G', 'N', bwl, bwu, n,
484  $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
485  $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
486  $ myrow, mycol, nprow, npcol )
487 *
488  CALL pdfillpad( ictxt, np, nq, mem( ipa-iprepad ),
489  $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
490  $ padval )
491 *
492  CALL pdfillpad( ictxt, worksiz, 1,
493  $ mem( ip_driver_w-iprepad ), worksiz,
494  $ iprepad, ipostpad, padval )
495 *
496 * Calculate norm of A for residual error-checking
497 *
498  IF( check ) THEN
499 *
500  anorm = pdlange( '1', (2*bwl+2*bwu+1),
501  $ n, mem( ipa ), 1, 1,
502  $ desca2d, mem( ip_driver_w ) )
503  CALL pdchekpad( ictxt, 'PDLANGE', np, nq,
504  $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
505  $ iprepad, ipostpad, padval )
506  CALL pdchekpad( ictxt, 'PDLANGE',
507  $ worksiz, 1,
508  $ mem( ip_driver_w-iprepad ), worksiz,
509  $ iprepad, ipostpad, padval )
510  END IF
511 *
512 *
513  CALL slboot()
514  CALL blacs_barrier( ictxt, 'All' )
515 *
516 * Perform factorization
517 *
518  CALL sltimer( 1 )
519 *
520  CALL pdgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
521  $ mem( ip_fillin ), fillin_size, mem( ipw ),
522  $ ipw_size, info )
523 *
524  CALL sltimer( 1 )
525 *
526  IF( info.NE.0 ) THEN
527  IF( iam.EQ.0 ) THEN
528  WRITE( nout, fmt = * ) 'PDGBTRF INFO=', info
529  ENDIF
530  kfail = kfail + 1
531  GO TO 30
532  END IF
533 *
534  IF( check ) THEN
535 *
536 * Check for memory overwrite in factorization
537 *
538  CALL pdchekpad( ictxt, 'PDGBTRF', np,
539  $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
540  $ iprepad, ipostpad, padval )
541  END IF
542 *
543 *
544 * Loop over the different values for NRHS
545 *
546  DO 20 hh = 1, nnr
547 *
548  ierr( 1 ) = 0
549 *
550  nrhs = nrval( hh )
551 *
552 * Initialize Array Descriptor for rhs
553 *
554  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
555  $ ictxtb, nb+10, ierr( 1 ) )
556 *
557 * Convert this to 1D descriptor
558 *
559  descb( 1 ) = 502
560  descb( 3 ) = n
561  descb( 4 ) = nb
562  descb( 5 ) = 0
563  descb( 2 ) = ictxt
564  descb( 6 ) = descb2d( lld_ )
565  descb( 7 ) = 0
566 *
567 * reset free_ptr to reuse space for right hand sides
568 *
569  IF( ipb .GT. 0 ) THEN
570  free_ptr = ipb
571  ENDIF
572 *
573  free_ptr = free_ptr + iprepad
574  ipb = free_ptr
575  free_ptr = free_ptr + nrhs*descb2d( lld_ )
576  $ + ipostpad
577 *
578 * Allocate workspace for workspace in TRS routine:
579 *
580  ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
581 *
582  ipw_solve = free_ptr
583  free_ptr = free_ptr + ipw_solve_size
584 *
585  ierr( 1 ) = 0
586  IF( free_ptr.GT.memsiz ) THEN
587  IF( iam.EQ.0 )
588  $ WRITE( nout, fmt = 9996 )'solve',
589  $ ( free_ptr )*dblesz
590  ierr( 1 ) = 1
591  END IF
592 *
593 * Check all processes for an error
594 *
595  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
596  $ ierr, 1, -1, 0 )
597 *
598  IF( ierr( 1 ).GT.0 ) THEN
599  IF( iam.EQ.0 )
600  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
601  kskip = kskip + 1
602  GO TO 15
603  END IF
604 *
605  myrhs_size = numroc( n, nb, mycol, 0, npcol )
606 *
607 * Generate RHS
608 *
609  CALL pdmatgen(ictxtb, 'No', 'No',
610  $ descb2d( m_ ), descb2d( n_ ),
611  $ descb2d( mb_ ), descb2d( nb_ ),
612  $ mem( ipb ),
613  $ descb2d( lld_ ), descb2d( rsrc_ ),
614  $ descb2d( csrc_ ),
615  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
616  $ myrow, npcol, nprow )
617 *
618  IF( check ) THEN
619  CALL pdfillpad( ictxtb, nb, nrhs,
620  $ mem( ipb-iprepad ),
621  $ descb2d( lld_ ),
622  $ iprepad, ipostpad,
623  $ padval )
624  CALL pdfillpad( ictxt, worksiz, 1,
625  $ mem( ip_driver_w-iprepad ),
626  $ worksiz, iprepad,
627  $ ipostpad, padval )
628  END IF
629 *
630 *
631  CALL blacs_barrier( ictxt, 'All')
632  CALL sltimer( 2 )
633 *
634 * Solve linear system via factorization
635 *
636  CALL pdgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
637  $ 1, desca, ipiv, mem( ipb ), 1, descb,
638  $ mem( ip_fillin ), fillin_size,
639  $ mem( ipw_solve ), ipw_solve_size,
640  $ info )
641 *
642  CALL sltimer( 2 )
643 *
644  IF( info.NE.0 ) THEN
645  IF( iam.EQ.0 )
646  $ WRITE( nout, fmt = * ) 'PDGBTRS INFO=', info
647  kfail = kfail + 1
648  passed = 'FAILED'
649  GO TO 20
650  END IF
651 *
652  IF( check ) THEN
653 *
654 * check for memory overwrite
655 *
656  CALL pdchekpad( ictxt, 'PDGBTRS-work',
657  $ worksiz, 1,
658  $ mem( ip_driver_w-iprepad ),
659  $ worksiz, iprepad,
660  $ ipostpad, padval )
661 *
662 * check the solution to rhs
663 *
664  sresid = zero
665 *
666  CALL pddblaschk( 'N', 'N', trans,
667  $ n, bwl, bwu, nrhs,
668  $ mem( ipb ), 1, 1, descb2d,
669  $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
670  $ ibseed, anorm, sresid,
671  $ mem( ip_driver_w ), worksiz )
672 *
673  IF( iam.EQ.0 ) THEN
674  IF( sresid.GT.thresh )
675  $ WRITE( nout, fmt = 9985 ) sresid
676  END IF
677 *
678 * The second test is a NaN trap
679 *
680  IF( ( sresid.LE.thresh ).AND.
681  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
682  kpass = kpass + 1
683  passed = 'PASSED'
684  ELSE
685  kfail = kfail + 1
686  passed = 'FAILED'
687  END IF
688 *
689  END IF
690 *
691  15 CONTINUE
692 * Skipped tests jump to here to print out "SKIPPED"
693 *
694 * Gather maximum of all CPU and WALL clock timings
695 *
696  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
697  $ wtime )
698  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
699  $ ctime )
700 *
701 * Print results
702 *
703  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
704 *
705  nops = 0
706  nops2 = 0
707 *
708  n_first = nb
709  nprocs_real = ( n-1 )/nb + 1
710  n_last = mod( n-1, nb ) + 1
711 *
712 * 2 N bwl (bwl+bwu) + N (bwl) flops
713 * for LU factorization
714 *
715  nops = 2*(dble(n)*dble(bwl)*
716  $ dble((bwl+bwu))) +
717  $ (dble(n)*dble(bwl))
718 *
719 * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
720 *
721  nops = nops +
722  $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
723  $ *dble(nrhs))
724 *
725 * Second calc to represent actual hardware speed
726 *
727 * 2*N_FIRST (bwl+bwu)*bwu Flops for LU
728 * factorization in proc 1
729 *
730  nops2 = 2*( (dble(n_first)*
731  $ dble((bwl+bwu))*dble(bwu)))
732 *
733  IF ( nprocs_real .GT. 1) THEN
734 * 8 N_LAST (bwl+bwu)*(bwl+bwu)
735 * flops for LU and spike
736 * calc in last processor
737 *
738  nops2 = nops2 +
739  $ 8*( (dble(n_last)*dble((bwl+bwu))
740  $ *dble((bwl+bwu))) )
741  ENDIF
742 *
743  IF ( nprocs_real .GT. 2) THEN
744 * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike
745 * calc in other processors
746 *
747  nops2 = nops2 + (nprocs_real-2)*
748  $ 8*( (dble(nb)*dble((bwl+bwu))
749  $ *dble((bwl+bwu))) )
750  ENDIF
751 *
752 * Reduced system
753 *
754  nops2 = nops2 +
755  $ 2*( nprocs_real-1 ) *
756  $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
757  IF( nprocs_real .GT. 1 ) THEN
758  nops2 = nops2 +
759  $ 2*( nprocs_real-2 ) *
760  $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
761  ENDIF
762 *
763 * Solve stage
764 *
765 * nrhs*2 n_first*
766 * (bwl+(bwl+bwu))
767 * flops for L,U solve in proc 1.
768 *
769  nops2 = nops2 +
770  $ 2*
771  $ dble(n_first)*
772  $ dble(nrhs) *
773  $ ( dble(bwl)+dble((bwl+bwu)))
774 *
775  IF ( nprocs_real .GT. 1 ) THEN
776 *
777 * 2*nrhs*2 n_last
778 * ((bwl+bwu)+(bwl+bwu))
779 * flops for LU solve in other procs
780 *
781  nops2 = nops2 +
782  $ 4*
783  $ (dble(n_last)*(dble((bwl+bwu))+
784  $ dble((bwl+bwu))))*dble(nrhs)
785  ENDIF
786 *
787  IF ( nprocs_real .GT. 2 ) THEN
788 *
789 * 2*nrhs*2 NB
790 * ((bwl+bwu)+(bwl+bwu))
791 * flops for LU solve in other procs
792 *
793  nops2 = nops2 +
794  $ ( nprocs_real-2)*2*
795  $ ( (dble(nb)*(dble((bwl+bwu))+
796  $ dble((bwl+bwu))))*dble(nrhs) )
797  ENDIF
798 *
799 * Reduced system
800 *
801  nops2 = nops2 +
802  $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
803  IF( nprocs_real .GT. 1 ) THEN
804  nops2 = nops2 +
805  $ nrhs*( nprocs_real-2 ) *
806  $ ( 6 * (bwl+bwu)*(bwl+bwu) )
807  ENDIF
808 *
809 *
810 * Calculate total megaflops - factorization and/or
811 * solve -- for WALL and CPU time, and print output
812 *
813 * Print WALL time if machine supports it
814 *
815  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
816  tmflops = nops /
817  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
818  ELSE
819  tmflops = 0.0d+0
820  END IF
821 *
822  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
823  tmflops2 = nops2 /
824  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
825  ELSE
826  tmflops2 = 0.0d+0
827  END IF
828 *
829  IF( wtime( 2 ).GE.0.0d+0 )
830  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
831  $ n,
832  $ bwl, bwu,
833  $ nb, nrhs, nprow, npcol,
834  $ wtime( 1 ), wtime( 2 ), tmflops,
835  $ tmflops2, passed
836 *
837 * Print CPU time if machine supports it
838 *
839  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
840  tmflops = nops /
841  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
842  ELSE
843  tmflops = 0.0d+0
844  END IF
845 *
846  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
847  tmflops2 = nops2 /
848  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
849  ELSE
850  tmflops2 = 0.0d+0
851  END IF
852 *
853  IF( ctime( 2 ).GE.0.0d+0 )
854  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
855  $ n,
856  $ bwl, bwu,
857  $ nb, nrhs, nprow, npcol,
858  $ ctime( 1 ), ctime( 2 ), tmflops,
859  $ tmflops2, passed
860 *
861  END IF
862  20 CONTINUE
863 *
864 *
865  30 CONTINUE
866 * NNB loop
867 *
868  45 CONTINUE
869 * BW[] loop
870 *
871  40 CONTINUE
872 * NMAT loop
873 *
874  CALL blacs_gridexit( ictxt )
875  CALL blacs_gridexit( ictxtb )
876 *
877  50 CONTINUE
878 * NGRIDS DROPOUT
879  60 CONTINUE
880 * NGRIDS loop
881 *
882 * Print ending messages and close output file
883 *
884  IF( iam.EQ.0 ) THEN
885  ktests = kpass + kfail + kskip
886  WRITE( nout, fmt = * )
887  WRITE( nout, fmt = 9992 ) ktests
888  IF( check ) THEN
889  WRITE( nout, fmt = 9991 ) kpass
890  WRITE( nout, fmt = 9989 ) kfail
891  ELSE
892  WRITE( nout, fmt = 9990 ) kpass
893  END IF
894  WRITE( nout, fmt = 9988 ) kskip
895  WRITE( nout, fmt = * )
896  WRITE( nout, fmt = * )
897  WRITE( nout, fmt = 9987 )
898  IF( nout.NE.6 .AND. nout.NE.0 )
899  $ CLOSE ( nout )
900  END IF
901 *
902  CALL blacs_exit( 0 )
903 *
904  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
905  $ '; It should be at least 1' )
906  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
907  $ i4 )
908  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
909  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
910  $ i11 )
911  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
912  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
913  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
914  $ '-------- -------- -------- ------' )
915  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
916  $ 1x,i4,1x,i4,1x,f9.3,
917  $ f9.4, f9.2, f9.2, 1x, a6 )
918  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
919  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
920  9990 FORMAT( i5, ' tests completed without checking.' )
921  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
922  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
923  9987 FORMAT( 'END OF TESTS.' )
924  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
925  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
926 *
927  stop
928 *
929 * End of PDGBTRS_DRIVER
930 *
931  END
932 *
max
#define max(A, B)
Definition: pcgemr.c:180
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pddblaschk
subroutine pddblaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pddblaschk.f:4
pdgbinfo
subroutine pdgbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pdgbinfo.f:6
pdgbdriver
program pdgbdriver
Definition: pdgbdriver.f:1
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdgbtrf
subroutine pdgbtrf(N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, WORK, LWORK, INFO)
Definition: pdgbtrf.f:3
pdgbtrs
subroutine pdgbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pdgbtrs.f:3
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
pdbmatgen
subroutine pdbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdbmatgen.f:5
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181