ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzgbdriver.f
Go to the documentation of this file.
1  PROGRAM pzgbdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PZGBDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by ZGB. This test driver performs an
15 * A = L*U factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTMEM INTEGER, default = 2048.
55 * INTMEM is the size of the integer workspace used in this
56 * driver as input as the IPIV vector. It represents an
57 * upper bound on NB, the blocksize of the data
58 * distribution.
59 *
60 * INTGSZ INTEGER, default = 4 bytes.
61 * ZPLXSZ INTEGER, default = 16 bytes.
62 * INTGSZ and ZPLXSZ indicate the length in bytes on the
63 * given platform for an integer and a double precision
64 * complex.
65 * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
66 * All arrays used by ScaLAPACK routines are allocated from
67 * this array and referenced by pointers. The integer IPB,
68 * for example, is a pointer to the starting element of MEM for
69 * the solution vector(s) B.
70 *
71 * =====================================================================
72 *
73 * Code Developer: Andrew J. Cleary, University of Tennessee.
74 * Current address: Lawrence Livermore National Labs.
75 * This version released: August, 2001.
76 *
77 * =====================================================================
78 *
79 * .. Parameters ..
80  INTEGER totmem
81  parameter( totmem = 3000000 )
82  INTEGER intmem
83  parameter( intmem = 2048 )
84  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
85  $ lld_, mb_, m_, nb_, n_, rsrc_
86  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
87  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
88  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
89 *
90  DOUBLE PRECISION zero
91  INTEGER memsiz, ntests, zplxsz
92  COMPLEX*16 padval
93  parameter( zplxsz = 16,
94  $ memsiz = totmem / zplxsz, ntests = 20,
95  $ padval = ( -9923.0d+0, -9923.0d+0 ),
96  $ zero = 0.0d+0 )
97  INTEGER int_one
98  parameter( int_one = 1 )
99 * ..
100 * .. Local Scalars ..
101  LOGICAL check
102  CHARACTER trans
103  CHARACTER*6 passed
104  CHARACTER*80 outfile
105  INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
106  $ i, iam, iaseed, ibseed, ictxt, ictxtb,
107  $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
108  $ iprepad, ipw, ipw_size, ipw_solve,
109  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
110  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
111  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
112  $ nnr, nout, np, npcol, nprocs, nprocs_real,
113  $ nprow, nq, nrhs, n_first, n_last, worksiz
114  REAL thresh
115  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
116  $ tmflops2
117 * ..
118 * .. Local Arrays ..
119  INTEGER ipiv(intmem)
120  INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
121  $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
122  $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
123  $ nrval( ntests ), nval( ntests ),
124  $ pval( ntests ), qval( ntests )
125  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
126  COMPLEX*16 mem( memsiz )
127 * ..
128 * .. External Subroutines ..
129  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
130  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
131  $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
134  $ sltimer
135 * ..
136 * .. External Functions ..
137  INTEGER numroc
138  LOGICAL lsame
139  DOUBLE PRECISION pzlange
140  EXTERNAL lsame, numroc, pzlange
141 * ..
142 * .. Intrinsic Functions ..
143  INTRINSIC dble, max, min, mod
144 * ..
145 * .. Data Statements ..
146  DATA kfail, kpass, kskip, ktests / 4*0 /
147 * ..
148 *
149 *
150 *
151 * .. Executable Statements ..
152 *
153 * Get starting information
154 *
155  CALL blacs_pinfo( iam, nprocs )
156  iaseed = 100
157  ibseed = 200
158 *
159  CALL pzgbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
160  $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
161  $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
162  $ ntests, qval, ntests, thresh, mem, iam, nprocs )
163 *
164  check = ( thresh.GE.0.0d+0 )
165 *
166 * Print headings
167 *
168  IF( iam.EQ.0 ) THEN
169  WRITE( nout, fmt = * )
170  WRITE( nout, fmt = 9995 )
171  WRITE( nout, fmt = 9994 )
172  WRITE( nout, fmt = * )
173  END IF
174 *
175 * Loop over different process grids
176 *
177  DO 60 i = 1, ngrids
178 *
179  nprow = pval( i )
180  npcol = qval( i )
181 *
182 * Make sure grid information is correct
183 *
184  ierr( 1 ) = 0
185  IF( nprow.LT.1 ) THEN
186  IF( iam.EQ.0 )
187  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
188  ierr( 1 ) = 1
189  ELSE IF( npcol.LT.1 ) THEN
190  IF( iam.EQ.0 )
191  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
192  ierr( 1 ) = 1
193  ELSE IF( nprow*npcol.GT.nprocs ) THEN
194  IF( iam.EQ.0 )
195  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
196  ierr( 1 ) = 1
197  END IF
198 *
199  IF( ierr( 1 ).GT.0 ) THEN
200  IF( iam.EQ.0 )
201  $ WRITE( nout, fmt = 9997 ) 'grid'
202  kskip = kskip + 1
203  GO TO 50
204  END IF
205 *
206 * Define process grid
207 *
208  CALL blacs_get( -1, 0, ictxt )
209  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
210 *
211 *
212 * Define transpose process grid
213 *
214  CALL blacs_get( -1, 0, ictxtb )
215  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
216 *
217 * Go to bottom of process grid loop if this case doesn't use my
218 * process
219 *
220  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
221 *
222  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
223  GO TO 50
224  ENDIF
225 *
226  DO 40 j = 1, nmat
227 *
228  ierr( 1 ) = 0
229 *
230  n = nval( j )
231 *
232 * Make sure matrix information is correct
233 *
234  IF( n.LT.1 ) THEN
235  IF( iam.EQ.0 )
236  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
237  ierr( 1 ) = 1
238  END IF
239 *
240 * Check all processes for an error
241 *
242  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
243  $ -1, 0 )
244 *
245  IF( ierr( 1 ).GT.0 ) THEN
246  IF( iam.EQ.0 )
247  $ WRITE( nout, fmt = 9997 ) 'size'
248  kskip = kskip + 1
249  GO TO 40
250  END IF
251 *
252 *
253  DO 45 bw_num = 1, nbw
254 *
255  ierr( 1 ) = 0
256 *
257  bwl = bwlval( bw_num )
258  IF( bwl.LT.1 ) THEN
259  IF( iam.EQ.0 )
260  $ WRITE( nout, fmt = 9999 ) 'Lower Band', 'bwl', bwl
261  ierr( 1 ) = 1
262  END IF
263 *
264  bwu = bwuval( bw_num )
265  IF( bwu.LT.1 ) THEN
266  IF( iam.EQ.0 )
267  $ WRITE( nout, fmt = 9999 ) 'Upper Band', 'bwu', bwu
268  ierr( 1 ) = 1
269  END IF
270 *
271  IF( bwl.GT.n-1 ) THEN
272  IF( iam.EQ.0 ) THEN
273  ierr( 1 ) = 1
274  ENDIF
275  END IF
276 *
277  IF( bwu.GT.n-1 ) THEN
278  IF( iam.EQ.0 ) THEN
279  ierr( 1 ) = 1
280  ENDIF
281  END IF
282 *
283 * Check all processes for an error
284 *
285  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
286  $ -1, 0 )
287 *
288  IF( ierr( 1 ).GT.0 ) THEN
289  kskip = kskip + 1
290  GO TO 45
291  END IF
292 *
293  DO 30 k = 1, nnb
294 *
295  ierr( 1 ) = 0
296 *
297  nb = nbval( k )
298  IF( nb.LT.0 ) THEN
299  nb =( (n-(npcol-1)*(bwl+bwu)-1)/npcol + 1 )
300  $ + (bwl+bwu)
301  nb = max( nb, 2*(bwl+bwu) )
302  nb = min( n, nb )
303  END IF
304 *
305 * Make sure NB is legal
306 *
307  ierr( 1 ) = 0
308 *
309  IF( nb.GT.intmem ) THEN
310  ierr( 1 ) = 1
311  IF( iam.EQ.0 ) THEN
312  WRITE( nout,* )'You have chosen an '
313  $ ,'NB > INTMEM in the driver.'
314  WRITE(nout, *)'Please edit the driver '
315  $ ,'and increase the value of INTMEM'
316  ENDIF
317  END IF
318 *
319 * Check all processes for an error
320 *
321  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
322  $ -1, 0 )
323 *
324  IF( ierr( 1 ).GT.0 ) THEN
325  kskip = kskip + 1
326  GO TO 30
327  END IF
328 *
329 * Padding constants
330 *
331  np = numroc( (2*bwl+2*bwu+1), (2*bwl+2*bwu+1),
332  $ myrow, 0, nprow )
333  nq = numroc( n, nb, mycol, 0, npcol )
334 *
335  IF( check ) THEN
336  iprepad = ((2*bwl+2*bwu+1)+10)
337  imidpad = 10
338  ipostpad = ((2*bwl+2*bwu+1)+10)
339  ELSE
340  iprepad = 0
341  imidpad = 0
342  ipostpad = 0
343  END IF
344 *
345 * Initialize the array descriptor for the matrix A
346 *
347  CALL descinit( desca2d, (2*bwl+2*bwu+1), n,
348  $ (2*bwl+2*bwu+1), nb, 0, 0,
349  $ ictxt,((2*bwl+2*bwu+1)+10), ierr( 1 ) )
350 *
351 * Convert this to 1D descriptor
352 *
353  desca( 1 ) = 501
354  desca( 3 ) = n
355  desca( 4 ) = nb
356  desca( 5 ) = 0
357  desca( 2 ) = ictxt
358  desca( 6 ) = ((2*bwl+2*bwu+1)+10)
359  desca( 7 ) = 0
360 *
361  ierr_temp = ierr( 1 )
362  ierr( 1 ) = 0
363  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
364 *
365 * Check all processes for an error
366 *
367  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
368 *
369  IF( ierr( 1 ).LT.0 ) THEN
370  IF( iam.EQ.0 )
371  $ WRITE( nout, fmt = 9997 ) 'descriptor'
372  kskip = kskip + 1
373  GO TO 30
374  END IF
375 *
376 * Assign pointers into MEM for SCALAPACK arrays, A is
377 * allocated starting at position MEM( IPREPAD+1 )
378 *
379  free_ptr = 1
380  ipb = 0
381 *
382 * Save room for prepadding
383  free_ptr = free_ptr + iprepad
384 *
385  ipa = free_ptr
386  free_ptr = free_ptr + desca2d( lld_ )*
387  $ desca2d( nb_ )
388  $ + ipostpad
389 *
390 * Add memory for fillin
391 * Fillin space needs to store:
392 * Fillin spike:
393 * Contribution to previous proc's diagonal block of
394 * reduced system:
395 * Off-diagonal block of reduced system:
396 * Diagonal block of reduced system:
397 *
398  fillin_size =
399  $ (nb+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu)
400 *
401 * Claim memory for fillin
402 *
403  free_ptr = free_ptr + iprepad
404  ip_fillin = free_ptr
405  free_ptr = free_ptr + fillin_size
406 *
407 * Workspace needed by computational routines:
408 *
409  ipw_size = 0
410 *
411 * factorization:
412 *
413  ipw_size = 1
414 *
415 * Claim memory for IPW
416 *
417  ipw = free_ptr
418  free_ptr = free_ptr + ipw_size
419 *
420 * Check for adequate memory for problem size
421 *
422  ierr( 1 ) = 0
423  IF( free_ptr.GT.memsiz ) THEN
424  IF( iam.EQ.0 )
425  $ WRITE( nout, fmt = 9996 )
426  $ 'divide and conquer factorization',
427  $ (free_ptr )*zplxsz
428  ierr( 1 ) = 1
429  END IF
430 *
431 * Check all processes for an error
432 *
433  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
434  $ 1, -1, 0 )
435 *
436  IF( ierr( 1 ).GT.0 ) THEN
437  IF( iam.EQ.0 )
438  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
439  kskip = kskip + 1
440  GO TO 30
441  END IF
442 *
443 * Worksize needed for LAPRNT
444  worksiz = max( ((2*bwl+2*bwu+1)+10), nb )
445 *
446  IF( check ) THEN
447 *
448 * Calculate the amount of workspace required by
449 * the checking routines.
450 *
451 * PZLANGE
452  worksiz = max( worksiz, desca2d( nb_ ) )
453 *
454 * PZDBLASCHK
455  worksiz = max( worksiz,
456  $ max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),nb))+2*nb )
457  END IF
458 *
459  free_ptr = free_ptr + iprepad
460  ip_driver_w = free_ptr
461  free_ptr = free_ptr + worksiz + ipostpad
462 *
463 *
464 * Check for adequate memory for problem size
465 *
466  ierr( 1 ) = 0
467  IF( free_ptr.GT.memsiz ) THEN
468  IF( iam.EQ.0 )
469  $ WRITE( nout, fmt = 9996 ) 'factorization',
470  $ ( free_ptr )*zplxsz
471  ierr( 1 ) = 1
472  END IF
473 *
474 * Check all processes for an error
475 *
476  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
477  $ 1, -1, 0 )
478 *
479  IF( ierr( 1 ).GT.0 ) THEN
480  IF( iam.EQ.0 )
481  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
482  kskip = kskip + 1
483  GO TO 30
484  END IF
485 *
486  CALL pzbmatgen( ictxt, 'G', 'N', bwl, bwu, n,
487  $ (2*bwl+2*bwu+1), nb, mem( ipa+bwl+bwu ),
488  $ ((2*bwl+2*bwu+1)+10), 0, 0, iaseed,
489  $ myrow, mycol, nprow, npcol )
490 *
491  CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
492  $ ((2*bwl+2*bwu+1)+10), iprepad, ipostpad,
493  $ padval )
494 *
495  CALL pzfillpad( ictxt, worksiz, 1,
496  $ mem( ip_driver_w-iprepad ), worksiz,
497  $ iprepad, ipostpad, padval )
498 *
499 * Calculate norm of A for residual error-checking
500 *
501  IF( check ) THEN
502 *
503  anorm = pzlange( '1', (2*bwl+2*bwu+1),
504  $ n, mem( ipa ), 1, 1,
505  $ desca2d, mem( ip_driver_w ) )
506  CALL pzchekpad( ictxt, 'PZLANGE', np, nq,
507  $ mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
508  $ iprepad, ipostpad, padval )
509  CALL pzchekpad( ictxt, 'PZLANGE',
510  $ worksiz, 1,
511  $ mem( ip_driver_w-iprepad ), worksiz,
512  $ iprepad, ipostpad, padval )
513  END IF
514 *
515 *
516  CALL slboot()
517  CALL blacs_barrier( ictxt, 'All' )
518 *
519 * Perform factorization
520 *
521  CALL sltimer( 1 )
522 *
523  CALL pzgbtrf( n, bwl, bwu, mem( ipa ), 1, desca, ipiv,
524  $ mem( ip_fillin ), fillin_size, mem( ipw ),
525  $ ipw_size, info )
526 *
527  CALL sltimer( 1 )
528 *
529  IF( info.NE.0 ) THEN
530  IF( iam.EQ.0 ) THEN
531  WRITE( nout, fmt = * ) 'PZGBTRF INFO=', info
532  ENDIF
533  kfail = kfail + 1
534  GO TO 30
535  END IF
536 *
537  IF( check ) THEN
538 *
539 * Check for memory overwrite in factorization
540 *
541  CALL pzchekpad( ictxt, 'PZGBTRF', np,
542  $ nq, mem( ipa-iprepad ), ((2*bwl+2*bwu+1)+10),
543  $ iprepad, ipostpad, padval )
544  END IF
545 *
546 *
547 * Loop over the different values for NRHS
548 *
549  DO 20 hh = 1, nnr
550 *
551  ierr( 1 ) = 0
552 *
553  nrhs = nrval( hh )
554 *
555 * Initialize Array Descriptor for rhs
556 *
557  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
558  $ ictxtb, nb+10, ierr( 1 ) )
559 *
560 * Convert this to 1D descriptor
561 *
562  descb( 1 ) = 502
563  descb( 3 ) = n
564  descb( 4 ) = nb
565  descb( 5 ) = 0
566  descb( 2 ) = ictxt
567  descb( 6 ) = descb2d( lld_ )
568  descb( 7 ) = 0
569 *
570 * reset free_ptr to reuse space for right hand sides
571 *
572  IF( ipb .GT. 0 ) THEN
573  free_ptr = ipb
574  ENDIF
575 *
576  free_ptr = free_ptr + iprepad
577  ipb = free_ptr
578  free_ptr = free_ptr + nrhs*descb2d( lld_ )
579  $ + ipostpad
580 *
581 * Allocate workspace for workspace in TRS routine:
582 *
583  ipw_solve_size = nrhs*(nb+2*bwl+4*bwu)
584 *
585  ipw_solve = free_ptr
586  free_ptr = free_ptr + ipw_solve_size
587 *
588  ierr( 1 ) = 0
589  IF( free_ptr.GT.memsiz ) THEN
590  IF( iam.EQ.0 )
591  $ WRITE( nout, fmt = 9996 )'solve',
592  $ ( free_ptr )*zplxsz
593  ierr( 1 ) = 1
594  END IF
595 *
596 * Check all processes for an error
597 *
598  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
599  $ ierr, 1, -1, 0 )
600 *
601  IF( ierr( 1 ).GT.0 ) THEN
602  IF( iam.EQ.0 )
603  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
604  kskip = kskip + 1
605  GO TO 15
606  END IF
607 *
608  myrhs_size = numroc( n, nb, mycol, 0, npcol )
609 *
610 * Generate RHS
611 *
612  CALL pzmatgen(ictxtb, 'No', 'No',
613  $ descb2d( m_ ), descb2d( n_ ),
614  $ descb2d( mb_ ), descb2d( nb_ ),
615  $ mem( ipb ),
616  $ descb2d( lld_ ), descb2d( rsrc_ ),
617  $ descb2d( csrc_ ),
618  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
619  $ myrow, npcol, nprow )
620 *
621  IF( check ) THEN
622  CALL pzfillpad( ictxtb, nb, nrhs,
623  $ mem( ipb-iprepad ),
624  $ descb2d( lld_ ),
625  $ iprepad, ipostpad,
626  $ padval )
627  CALL pzfillpad( ictxt, worksiz, 1,
628  $ mem( ip_driver_w-iprepad ),
629  $ worksiz, iprepad,
630  $ ipostpad, padval )
631  END IF
632 *
633 *
634  CALL blacs_barrier( ictxt, 'All')
635  CALL sltimer( 2 )
636 *
637 * Solve linear system via factorization
638 *
639  CALL pzgbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
640  $ 1, desca, ipiv, mem( ipb ), 1, descb,
641  $ mem( ip_fillin ), fillin_size,
642  $ mem( ipw_solve ), ipw_solve_size,
643  $ info )
644 *
645  CALL sltimer( 2 )
646 *
647  IF( info.NE.0 ) THEN
648  IF( iam.EQ.0 )
649  $ WRITE( nout, fmt = * ) 'PZGBTRS INFO=', info
650  kfail = kfail + 1
651  passed = 'FAILED'
652  GO TO 20
653  END IF
654 *
655  IF( check ) THEN
656 *
657 * check for memory overwrite
658 *
659  CALL pzchekpad( ictxt, 'PZGBTRS-work',
660  $ worksiz, 1,
661  $ mem( ip_driver_w-iprepad ),
662  $ worksiz, iprepad,
663  $ ipostpad, padval )
664 *
665 * check the solution to rhs
666 *
667  sresid = zero
668 *
669  CALL pzdblaschk( 'N', 'N', trans,
670  $ n, bwl, bwu, nrhs,
671  $ mem( ipb ), 1, 1, descb2d,
672  $ iaseed, mem( ipa+bwl+bwu ), 1, 1, desca2d,
673  $ ibseed, anorm, sresid,
674  $ mem( ip_driver_w ), worksiz )
675 *
676  IF( iam.EQ.0 ) THEN
677  IF( sresid.GT.thresh )
678  $ WRITE( nout, fmt = 9985 ) sresid
679  END IF
680 *
681 * The second test is a NaN trap
682 *
683  IF( ( sresid.LE.thresh ).AND.
684  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
685  kpass = kpass + 1
686  passed = 'PASSED'
687  ELSE
688  kfail = kfail + 1
689  passed = 'FAILED'
690  END IF
691 *
692  END IF
693 *
694  15 CONTINUE
695 * Skipped tests jump to here to print out "SKIPPED"
696 *
697 * Gather maximum of all CPU and WALL clock timings
698 *
699  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
700  $ wtime )
701  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
702  $ ctime )
703 *
704 * Print results
705 *
706  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
707 *
708  nops = 0
709  nops2 = 0
710 *
711  n_first = nb
712  nprocs_real = ( n-1 )/nb + 1
713  n_last = mod( n-1, nb ) + 1
714 *
715 * 2 N bwl (bwl+bwu) + N (bwl) flops
716 * for LU factorization
717 *
718  nops = 2*(dble(n)*dble(bwl)*
719  $ dble((bwl+bwu))) +
720  $ (dble(n)*dble(bwl))
721 *
722 * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
723 *
724  nops = nops +
725  $ 2 * (dble(n)*(dble(bwl)+dble((bwl+bwu)))
726  $ *dble(nrhs))
727 *
728 * Multiply by 4 to get complex count
729 *
730  nops = nops * dble(4)
731 *
732 * Second calc to represent actual hardware speed
733 *
734 * 2*N_FIRST (bwl+bwu)*bwu Flops for LU
735 * factorization in proc 1
736 *
737  nops2 = 2*( (dble(n_first)*
738  $ dble((bwl+bwu))*dble(bwu)))
739 *
740  IF ( nprocs_real .GT. 1) THEN
741 * 8 N_LAST (bwl+bwu)*(bwl+bwu)
742 * flops for LU and spike
743 * calc in last processor
744 *
745  nops2 = nops2 +
746  $ 8*( (dble(n_last)*dble((bwl+bwu))
747  $ *dble((bwl+bwu))) )
748  ENDIF
749 *
750  IF ( nprocs_real .GT. 2) THEN
751 * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike
752 * calc in other processors
753 *
754  nops2 = nops2 + (nprocs_real-2)*
755  $ 8*( (dble(nb)*dble((bwl+bwu))
756  $ *dble((bwl+bwu))) )
757  ENDIF
758 *
759 * Reduced system
760 *
761  nops2 = nops2 +
762  $ 2*( nprocs_real-1 ) *
763  $ ( (bwl+bwu)*(bwl+bwu)*(bwl+bwu)/3 )
764  IF( nprocs_real .GT. 1 ) THEN
765  nops2 = nops2 +
766  $ 2*( nprocs_real-2 ) *
767  $ (2*(bwl+bwu)*(bwl+bwu)*(bwl+bwu))
768  ENDIF
769 *
770 * Solve stage
771 *
772 * nrhs*2 n_first*
773 * (bwl+(bwl+bwu))
774 * flops for L,U solve in proc 1.
775 *
776  nops2 = nops2 +
777  $ 2*
778  $ dble(n_first)*
779  $ dble(nrhs) *
780  $ ( dble(bwl)+dble((bwl+bwu)))
781 *
782  IF ( nprocs_real .GT. 1 ) THEN
783 *
784 * 2*nrhs*2 n_last
785 * ((bwl+bwu)+(bwl+bwu))
786 * flops for LU solve in other procs
787 *
788  nops2 = nops2 +
789  $ 4*
790  $ (dble(n_last)*(dble((bwl+bwu))+
791  $ dble((bwl+bwu))))*dble(nrhs)
792  ENDIF
793 *
794  IF ( nprocs_real .GT. 2 ) THEN
795 *
796 * 2*nrhs*2 NB
797 * ((bwl+bwu)+(bwl+bwu))
798 * flops for LU solve in other procs
799 *
800  nops2 = nops2 +
801  $ ( nprocs_real-2)*2*
802  $ ( (dble(nb)*(dble((bwl+bwu))+
803  $ dble((bwl+bwu))))*dble(nrhs) )
804  ENDIF
805 *
806 * Reduced system
807 *
808  nops2 = nops2 +
809  $ nrhs*( nprocs_real-1)*2*((bwl+bwu)*(bwl+bwu) )
810  IF( nprocs_real .GT. 1 ) THEN
811  nops2 = nops2 +
812  $ nrhs*( nprocs_real-2 ) *
813  $ ( 6 * (bwl+bwu)*(bwl+bwu) )
814  ENDIF
815 *
816 *
817 * Multiply by 4 to get complex count
818 *
819  nops2 = nops2 * dble(4)
820 *
821 * Calculate total megaflops - factorization and/or
822 * solve -- for WALL and CPU time, and print output
823 *
824 * Print WALL time if machine supports it
825 *
826  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
827  tmflops = nops /
828  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
829  ELSE
830  tmflops = 0.0d+0
831  END IF
832 *
833  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
834  tmflops2 = nops2 /
835  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
836  ELSE
837  tmflops2 = 0.0d+0
838  END IF
839 *
840  IF( wtime( 2 ).GE.0.0d+0 )
841  $ WRITE( nout, fmt = 9993 ) 'WALL', trans,
842  $ n,
843  $ bwl, bwu,
844  $ nb, nrhs, nprow, npcol,
845  $ wtime( 1 ), wtime( 2 ), tmflops,
846  $ tmflops2, passed
847 *
848 * Print CPU time if machine supports it
849 *
850  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
851  tmflops = nops /
852  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
853  ELSE
854  tmflops = 0.0d+0
855  END IF
856 *
857  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
858  tmflops2 = nops2 /
859  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
860  ELSE
861  tmflops2 = 0.0d+0
862  END IF
863 *
864  IF( ctime( 2 ).GE.0.0d+0 )
865  $ WRITE( nout, fmt = 9993 ) 'CPU ', trans,
866  $ n,
867  $ bwl, bwu,
868  $ nb, nrhs, nprow, npcol,
869  $ ctime( 1 ), ctime( 2 ), tmflops,
870  $ tmflops2, passed
871 *
872  END IF
873  20 CONTINUE
874 *
875 *
876  30 CONTINUE
877 * NNB loop
878 *
879  45 CONTINUE
880 * BW[] loop
881 *
882  40 CONTINUE
883 * NMAT loop
884 *
885  CALL blacs_gridexit( ictxt )
886  CALL blacs_gridexit( ictxtb )
887 *
888  50 CONTINUE
889 * NGRIDS DROPOUT
890  60 CONTINUE
891 * NGRIDS loop
892 *
893 * Print ending messages and close output file
894 *
895  IF( iam.EQ.0 ) THEN
896  ktests = kpass + kfail + kskip
897  WRITE( nout, fmt = * )
898  WRITE( nout, fmt = 9992 ) ktests
899  IF( check ) THEN
900  WRITE( nout, fmt = 9991 ) kpass
901  WRITE( nout, fmt = 9989 ) kfail
902  ELSE
903  WRITE( nout, fmt = 9990 ) kpass
904  END IF
905  WRITE( nout, fmt = 9988 ) kskip
906  WRITE( nout, fmt = * )
907  WRITE( nout, fmt = * )
908  WRITE( nout, fmt = 9987 )
909  IF( nout.NE.6 .AND. nout.NE.0 )
910  $ CLOSE ( nout )
911  END IF
912 *
913  CALL blacs_exit( 0 )
914 *
915  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
916  $ '; It should be at least 1' )
917  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
918  $ i4 )
919  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
920  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
921  $ i11 )
922  9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ',
923  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
924  9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ',
925  $ '-------- -------- -------- ------' )
926  9993 FORMAT( a4,1x,a1,2x,i6,1x,i3,1x,i3,1x,i4,1x,i5,
927  $ 1x,i4,1x,i4,1x,f9.3,
928  $ f9.4, f9.2, f9.2, 1x, a6 )
929  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
930  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
931  9990 FORMAT( i5, ' tests completed without checking.' )
932  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
933  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
934  9987 FORMAT( 'END OF TESTS.' )
935  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
936  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
937 *
938  stop
939 *
940 * End of PZGBTRS_DRIVER
941 *
942  END
943 *
pzgbtrs
subroutine pzgbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pzgbtrs.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pzdblaschk
subroutine pzdblaschk(SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pzdblaschk.f:4
pzgbinfo
subroutine pzgbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pzgbinfo.f:6
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pzgbdriver
program pzgbdriver
Definition: pzgbdriver.f:1
pzbmatgen
subroutine pzbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzbmatgen.f:5
pzmatgen
subroutine pzmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzmatgen.f:4
pzgbtrf
subroutine pzgbtrf(N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, WORK, LWORK, INFO)
Definition: pzgbtrf.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pzchekpad
subroutine pzchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzchekpad.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
pzfillpad
subroutine pzfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzfillpad.f:2