ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzptdriver.f
Go to the documentation of this file.
1  PROGRAM pzptdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PZPTDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by ZPT. This test driver performs an
15 * A = L*L**H factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * ZPLXSZ INTEGER, default = 16 bytes.
56 * INTGSZ and ZPLXSZ indicate the length in bytes on the
57 * given platform for an integer and a double precision
58 * complex.
59 * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
60 * All arrays used by ScaLAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPB,
62 * for example, is a pointer to the starting element of MEM for
63 * the solution vector(s) B.
64 *
65 * =====================================================================
66 *
67 * Code Developer: Andrew J. Cleary, University of Tennessee.
68 * Current address: Lawrence Livermore National Labs.
69 * This version released: August, 2001.
70 *
71 * =====================================================================
72 *
73 * .. Parameters ..
74  INTEGER totmem
75  parameter( totmem = 3000000 )
76  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77  $ lld_, mb_, m_, nb_, n_, rsrc_
78  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81 *
82  DOUBLE PRECISION zero
83  INTEGER memsiz, ntests, zplxsz
84  COMPLEX*16 padval
85  parameter( zplxsz = 16,
86  $ memsiz = totmem / zplxsz, ntests = 20,
87  $ padval = ( -9923.0d+0, -9923.0d+0 ),
88  $ zero = 0.0d+0 )
89  INTEGER int_one
90  parameter( int_one = 1 )
91 * ..
92 * .. Local Scalars ..
93  LOGICAL check
94  CHARACTER uplo
95  CHARACTER*6 passed
96  CHARACTER*80 outfile
97  INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
98  $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
99  $ imidpad, info, int_temp, ipa, ipb, ipostpad,
100  $ iprepad, ipw, ipw_size, ipw_solve,
101  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104  $ nnr, nout, np, npcol, nprocs, nprocs_real,
105  $ nprow, nq, nrhs, n_first, n_last, worksiz
106  REAL thresh
107  DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
108  $ tmflops2
109 * ..
110 * .. Local Arrays ..
111  INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
112  $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
113  $ nbrval( ntests ), nbval( ntests ),
114  $ nrval( ntests ), nval( ntests ),
115  $ pval( ntests ), qval( ntests )
116  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117  COMPLEX*16 mem( memsiz )
118 * ..
119 * .. External Subroutines ..
120  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
122  $ blacs_pinfo, descinit, igsum2d, pzbmatgen,
125  $ slcombine, sltimer
126 * ..
127 * .. External Functions ..
128  INTEGER numroc
129  LOGICAL lsame
130  DOUBLE PRECISION pzlange
131  EXTERNAL lsame, numroc, pzlange
132 * ..
133 * .. Intrinsic Functions ..
134  INTRINSIC dble, max, min, mod
135 * ..
136 * .. Data Statements ..
137  DATA kfail, kpass, kskip, ktests / 4*0 /
138 * ..
139 *
140 *
141 *
142 * .. Executable Statements ..
143 *
144 * Get starting information
145 *
146  CALL blacs_pinfo( iam, nprocs )
147  iaseed = 100
148  ibseed = 200
149 *
150  CALL pzptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
151  $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
152  $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
153  $ qval, ntests, thresh, mem, iam, nprocs )
154 *
155  check = ( thresh.GE.0.0d+0 )
156 *
157 * Print headings
158 *
159  IF( iam.EQ.0 ) THEN
160  WRITE( nout, fmt = * )
161  WRITE( nout, fmt = 9995 )
162  WRITE( nout, fmt = 9994 )
163  WRITE( nout, fmt = * )
164  END IF
165 *
166 * Loop over different process grids
167 *
168  DO 60 i = 1, ngrids
169 *
170  nprow = pval( i )
171  npcol = qval( i )
172 *
173 * Make sure grid information is correct
174 *
175  ierr( 1 ) = 0
176  IF( nprow.LT.1 ) THEN
177  IF( iam.EQ.0 )
178  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
179  ierr( 1 ) = 1
180  ELSE IF( npcol.LT.1 ) THEN
181  IF( iam.EQ.0 )
182  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
183  ierr( 1 ) = 1
184  ELSE IF( nprow*npcol.GT.nprocs ) THEN
185  IF( iam.EQ.0 )
186  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187  ierr( 1 ) = 1
188  END IF
189 *
190  IF( ierr( 1 ).GT.0 ) THEN
191  IF( iam.EQ.0 )
192  $ WRITE( nout, fmt = 9997 ) 'grid'
193  kskip = kskip + 1
194  GO TO 50
195  END IF
196 *
197 * Define process grid
198 *
199  CALL blacs_get( -1, 0, ictxt )
200  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
201 *
202 *
203 * Define transpose process grid
204 *
205  CALL blacs_get( -1, 0, ictxtb )
206  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
207 *
208 * Go to bottom of process grid loop if this case doesn't use my
209 * process
210 *
211  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
212 *
213  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
214  GO TO 50
215  ENDIF
216 *
217  DO 40 j = 1, nmat
218 *
219  ierr( 1 ) = 0
220 *
221  n = nval( j )
222 *
223 * Make sure matrix information is correct
224 *
225  IF( n.LT.1 ) THEN
226  IF( iam.EQ.0 )
227  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
228  ierr( 1 ) = 1
229  END IF
230 *
231 * Check all processes for an error
232 *
233  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
234  $ -1, 0 )
235 *
236  IF( ierr( 1 ).GT.0 ) THEN
237  IF( iam.EQ.0 )
238  $ WRITE( nout, fmt = 9997 ) 'size'
239  kskip = kskip + 1
240  GO TO 40
241  END IF
242 *
243 *
244  DO 45 bw_num = 1, nbw
245 *
246  ierr( 1 ) = 0
247 *
248  bw = 1
249  IF( bw.LT.0 ) THEN
250  IF( iam.EQ.0 )
251  $ WRITE( nout, fmt = 9999 ) 'Band', 'bw', bw
252  ierr( 1 ) = 1
253  END IF
254 *
255  IF( bw.GT.n-1 ) THEN
256  ierr( 1 ) = 1
257  END IF
258 *
259 * Check all processes for an error
260 *
261  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
262  $ -1, 0 )
263 *
264  IF( ierr( 1 ).GT.0 ) THEN
265  kskip = kskip + 1
266  GO TO 45
267  END IF
268 *
269  DO 30 k = 1, nnb
270 *
271  ierr( 1 ) = 0
272 *
273  nb = nbval( k )
274  IF( nb.LT.0 ) THEN
275  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
276  $ + int_one
277  nb = max( nb, 2*int_one )
278  nb = min( n, nb )
279  END IF
280 *
281 * Make sure NB is legal
282 *
283  ierr( 1 ) = 0
284  IF( nb.LT.min( 2*int_one, n ) ) THEN
285  ierr( 1 ) = 1
286  ENDIF
287 *
288 * Check all processes for an error
289 *
290  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
291  $ -1, 0 )
292 *
293  IF( ierr( 1 ).GT.0 ) THEN
294  kskip = kskip + 1
295  GO TO 30
296  END IF
297 *
298 * Padding constants
299 *
300  np = numroc( (2), (2),
301  $ myrow, 0, nprow )
302  nq = numroc( n, nb, mycol, 0, npcol )
303 *
304  IF( check ) THEN
305  iprepad = ((2)+10)
306  imidpad = 10
307  ipostpad = ((2)+10)
308  ELSE
309  iprepad = 0
310  imidpad = 0
311  ipostpad = 0
312  END IF
313 *
314 * Initialize the array descriptor for the matrix A
315 *
316  CALL descinit( desca2d, n, (2),
317  $ nb, 1, 0, 0,
318  $ ictxtb, nb+10, ierr( 1 ) )
319 *
320 * Convert this to 1D descriptor
321 *
322  desca( 1 ) = 501
323  desca( 3 ) = n
324  desca( 4 ) = nb
325  desca( 5 ) = 0
326  desca( 2 ) = ictxt
327  desca( 6 ) = ((2)+10)
328  desca( 7 ) = 0
329 *
330  ierr_temp = ierr( 1 )
331  ierr( 1 ) = 0
332  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
333 *
334 * Check all processes for an error
335 *
336  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
337 *
338  IF( ierr( 1 ).LT.0 ) THEN
339  IF( iam.EQ.0 )
340  $ WRITE( nout, fmt = 9997 ) 'descriptor'
341  kskip = kskip + 1
342  GO TO 30
343  END IF
344 *
345 * Assign pointers into MEM for SCALAPACK arrays, A is
346 * allocated starting at position MEM( IPREPAD+1 )
347 *
348  free_ptr = 1
349  ipb = 0
350 *
351 * Save room for prepadding
352  free_ptr = free_ptr + iprepad
353 *
354  ipa = free_ptr
355  free_ptr = free_ptr + (nb+10)*(2)
356  $ + ipostpad
357 *
358 * Add memory for fillin
359 * Fillin space needs to store:
360 * Fillin spike:
361 * Contribution to previous proc's diagonal block of
362 * reduced system:
363 * Off-diagonal block of reduced system:
364 * Diagonal block of reduced system:
365 *
366  fillin_size =
367  $ (12*npcol + 3*nb)
368 *
369 * Claim memory for fillin
370 *
371  free_ptr = free_ptr + iprepad
372  ip_fillin = free_ptr
373  free_ptr = free_ptr + fillin_size
374 *
375 * Workspace needed by computational routines:
376 *
377  ipw_size = 0
378 *
379 * factorization:
380 *
381  ipw_size = 8*npcol
382 *
383 * Claim memory for IPW
384 *
385  ipw = free_ptr
386  free_ptr = free_ptr + ipw_size
387 *
388 * Check for adequate memory for problem size
389 *
390  ierr( 1 ) = 0
391  IF( free_ptr.GT.memsiz ) THEN
392  IF( iam.EQ.0 )
393  $ WRITE( nout, fmt = 9996 )
394  $ 'divide and conquer factorization',
395  $ (free_ptr )*zplxsz
396  ierr( 1 ) = 1
397  END IF
398 *
399 * Check all processes for an error
400 *
401  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
402  $ 1, -1, 0 )
403 *
404  IF( ierr( 1 ).GT.0 ) THEN
405  IF( iam.EQ.0 )
406  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
407  kskip = kskip + 1
408  GO TO 30
409  END IF
410 *
411 * Worksize needed for LAPRNT
412  worksiz = max( ((2)+10), nb )
413 *
414  IF( check ) THEN
415 *
416 * Calculate the amount of workspace required by
417 * the checking routines.
418 *
419 * PZLANGE
420  worksiz = max( worksiz, desca2d( nb_ ) )
421 *
422 * PZPTLASCHK
423  worksiz = max( worksiz,
424  $ max(5,nb)+2*nb )
425  END IF
426 *
427  free_ptr = free_ptr + iprepad
428  ip_driver_w = free_ptr
429  free_ptr = free_ptr + worksiz + ipostpad
430 *
431 *
432 * Check for adequate memory for problem size
433 *
434  ierr( 1 ) = 0
435  IF( free_ptr.GT.memsiz ) THEN
436  IF( iam.EQ.0 )
437  $ WRITE( nout, fmt = 9996 ) 'factorization',
438  $ ( free_ptr )*zplxsz
439  ierr( 1 ) = 1
440  END IF
441 *
442 * Check all processes for an error
443 *
444  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
445  $ 1, -1, 0 )
446 *
447  IF( ierr( 1 ).GT.0 ) THEN
448  IF( iam.EQ.0 )
449  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
450  kskip = kskip + 1
451  GO TO 30
452  END IF
453 *
454  CALL pzbmatgen( ictxt, uplo, 'T', bw, bw, n, (2), nb,
455  $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
456  $ mycol, nprow, npcol )
457  CALL pzfillpad( ictxt, nq, np, mem( ipa-iprepad ),
458  $ nb+10, iprepad, ipostpad,
459  $ padval )
460 *
461  CALL pzfillpad( ictxt, worksiz, 1,
462  $ mem( ip_driver_w-iprepad ), worksiz,
463  $ iprepad, ipostpad, padval )
464 *
465 * Calculate norm of A for residual error-checking
466 *
467  IF( check ) THEN
468 *
469  anorm = pzlange( 'I', n,
470  $ (2), mem( ipa ), 1, 1,
471  $ desca2d, mem( ip_driver_w ) )
472  CALL pzchekpad( ictxt, 'PZLANGE', nq, np,
473  $ mem( ipa-iprepad ), nb+10,
474  $ iprepad, ipostpad, padval )
475  CALL pzchekpad( ictxt, 'PZLANGE',
476  $ worksiz, 1,
477  $ mem( ip_driver_w-iprepad ), worksiz,
478  $ iprepad, ipostpad, padval )
479  END IF
480 *
481  IF( lsame( uplo, 'L' ) ) THEN
482  int_temp = 0
483  ELSE
484  int_temp = desca2d( lld_ )
485  ENDIF
486 *
487 * For SPD Tridiagonal complex matrices, diagonal is stored
488 * as a real. Thus, compact D into half the space
489 *
490  DO 10 h=1, numroc(n,nb,mycol,0,npcol)/2
491  mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
492  $ +mem( ipa+int_temp+2*h-1 )*( 0.0d+0, 1.0d+0 )
493  10 CONTINUE
494  IF( 2*(numroc(n,nb,mycol,0,npcol)/2).NE.
495  $ numroc(n,nb,mycol,0,npcol) ) THEN
496  h=numroc(n,nb,mycol,0,npcol)/2+1
497  mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )
498  ENDIF
499 *
500 *
501  CALL slboot()
502  CALL blacs_barrier( ictxt, 'All' )
503 *
504 * Perform factorization
505 *
506  CALL sltimer( 1 )
507 *
508  CALL pzpttrf( n, mem( ipa+int_temp ),
509  $ mem( ipa+1*( nb+10-int_temp ) ), 1, desca,
510  $ mem( ip_fillin ), fillin_size, mem( ipw ),
511  $ ipw_size, info )
512 *
513  CALL sltimer( 1 )
514 *
515  IF( info.NE.0 ) THEN
516  IF( iam.EQ.0 ) THEN
517  WRITE( nout, fmt = * ) 'PZPTTRF INFO=', info
518  ENDIF
519  kfail = kfail + 1
520  GO TO 30
521  END IF
522 *
523  IF( check ) THEN
524 *
525 * Check for memory overwrite in factorization
526 *
527  CALL pzchekpad( ictxt, 'PZPTTRF', nq,
528  $ np, mem( ipa-iprepad ), nb+10,
529  $ iprepad, ipostpad, padval )
530  END IF
531 *
532 *
533 * Loop over the different values for NRHS
534 *
535  DO 20 hh = 1, nnr
536 *
537  ierr( 1 ) = 0
538 *
539  nrhs = nrval( hh )
540 *
541 * Initialize Array Descriptor for rhs
542 *
543  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
544  $ ictxtb, nb+10, ierr( 1 ) )
545 *
546 * Convert this to 1D descriptor
547 *
548  descb( 1 ) = 502
549  descb( 3 ) = n
550  descb( 4 ) = nb
551  descb( 5 ) = 0
552  descb( 2 ) = ictxt
553  descb( 6 ) = descb2d( lld_ )
554  descb( 7 ) = 0
555 *
556 * reset free_ptr to reuse space for right hand sides
557 *
558  IF( ipb .GT. 0 ) THEN
559  free_ptr = ipb
560  ENDIF
561 *
562  free_ptr = free_ptr + iprepad
563  ipb = free_ptr
564  free_ptr = free_ptr + nrhs*descb2d( lld_ )
565  $ + ipostpad
566 *
567 * Allocate workspace for workspace in TRS routine:
568 *
569  ipw_solve_size = (10+2*min(100,nrhs))*npcol+4*nrhs
570 *
571  ipw_solve = free_ptr
572  free_ptr = free_ptr + ipw_solve_size
573 *
574  ierr( 1 ) = 0
575  IF( free_ptr.GT.memsiz ) THEN
576  IF( iam.EQ.0 )
577  $ WRITE( nout, fmt = 9996 )'solve',
578  $ ( free_ptr )*zplxsz
579  ierr( 1 ) = 1
580  END IF
581 *
582 * Check all processes for an error
583 *
584  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
585  $ ierr, 1, -1, 0 )
586 *
587  IF( ierr( 1 ).GT.0 ) THEN
588  IF( iam.EQ.0 )
589  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
590  kskip = kskip + 1
591  GO TO 15
592  END IF
593 *
594  myrhs_size = numroc( n, nb, mycol, 0, npcol )
595 *
596 * Generate RHS
597 *
598  CALL pzmatgen(ictxtb, 'No', 'No',
599  $ descb2d( m_ ), descb2d( n_ ),
600  $ descb2d( mb_ ), descb2d( nb_ ),
601  $ mem( ipb ),
602  $ descb2d( lld_ ), descb2d( rsrc_ ),
603  $ descb2d( csrc_ ),
604  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
605  $ myrow, npcol, nprow )
606 *
607  IF( check ) THEN
608  CALL pzfillpad( ictxtb, nb, nrhs,
609  $ mem( ipb-iprepad ),
610  $ descb2d( lld_ ),
611  $ iprepad, ipostpad,
612  $ padval )
613  CALL pzfillpad( ictxt, worksiz, 1,
614  $ mem( ip_driver_w-iprepad ),
615  $ worksiz, iprepad,
616  $ ipostpad, padval )
617  END IF
618 *
619 *
620  CALL blacs_barrier( ictxt, 'All')
621  CALL sltimer( 2 )
622 *
623 * Solve linear system via factorization
624 *
625  CALL pzpttrs( uplo, n, nrhs, mem( ipa+int_temp ),
626  $ mem( ipa+1*( nb+10-int_temp ) ), 1,
627  $ desca, mem( ipb ), 1, descb,
628  $ mem( ip_fillin ), fillin_size,
629  $ mem( ipw_solve ), ipw_solve_size,
630  $ info )
631 *
632  CALL sltimer( 2 )
633 *
634  IF( info.NE.0 ) THEN
635  IF( iam.EQ.0 )
636  $ WRITE( nout, fmt = * ) 'PZPTTRS INFO=', info
637  kfail = kfail + 1
638  passed = 'FAILED'
639  GO TO 20
640  END IF
641 *
642  IF( check ) THEN
643 *
644 * check for memory overwrite
645 *
646  CALL pzchekpad( ictxt, 'PZPTTRS-work',
647  $ worksiz, 1,
648  $ mem( ip_driver_w-iprepad ),
649  $ worksiz, iprepad,
650  $ ipostpad, padval )
651 *
652 * check the solution to rhs
653 *
654  sresid = zero
655 *
656 * Reset descriptor describing A to 1-by-P grid for
657 * use in banded utility routines
658 *
659  CALL descinit( desca2d, (2), n,
660  $ (2), nb, 0, 0,
661  $ ictxt, (2), ierr( 1 ) )
662  CALL pzptlaschk( 'H', uplo, n, bw, bw, nrhs,
663  $ mem( ipb ), 1, 1, descb2d,
664  $ iaseed, mem( ipa ), 1, 1, desca2d,
665  $ ibseed, anorm, sresid,
666  $ mem( ip_driver_w ), worksiz )
667 *
668  IF( iam.EQ.0 ) THEN
669  IF( sresid.GT.thresh )
670  $ WRITE( nout, fmt = 9985 ) sresid
671  END IF
672 *
673 * The second test is a NaN trap
674 *
675  IF( ( sresid.LE.thresh ).AND.
676  $ ( (sresid-sresid).EQ.0.0d+0 ) ) THEN
677  kpass = kpass + 1
678  passed = 'PASSED'
679  ELSE
680  kfail = kfail + 1
681  passed = 'FAILED'
682  END IF
683 *
684  END IF
685 *
686  15 CONTINUE
687 * Skipped tests jump to here to print out "SKIPPED"
688 *
689 * Gather maximum of all CPU and WALL clock timings
690 *
691  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
692  $ wtime )
693  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
694  $ ctime )
695 *
696 * Print results
697 *
698  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
699 *
700  nops = 0
701  nops2 = 0
702 *
703  n_first = nb
704  nprocs_real = ( n-1 )/nb + 1
705  n_last = mod( n-1, nb ) + 1
706 *
707 *
708  nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
709  $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
710  $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
711  $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
712  nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
713  $ *( -1.d0 /2.d0+dble(bw)
714  $ *( -1.d0 / 3.d0 ) ) ) +
715  $ dble(n)*( dble(bw) /
716  $ 2.d0*( 1.d0+dble(bw) ) )
717 *
718  nops = nops +
719  $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
720  $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
721  $ ( dble(bw)*( 2*dble(n)-
722  $ ( dble(bw)+1.d0 ) ) )
723 *
724 *
725 * Second calc to represent actual hardware speed
726 *
727 * NB bw^2 flops for LLt factorization in 1st proc
728 *
729  nops2 = ( (dble(n_first))* dble(bw)**2 )
730 *
731  IF ( nprocs_real .GT. 1) THEN
732 * 4 NB bw^2 flops for LLt factorization and
733 * spike calc in last processor
734 *
735  nops2 = nops2 +
736  $ 4*( (dble(n_last)*dble(bw)**2) )
737  ENDIF
738 *
739  IF ( nprocs_real .GT. 2) THEN
740 * 4 NB bw^2 flops for LLt factorization and
741 * spike calc in other processors
742 *
743  nops2 = nops2 + (nprocs_real-2)*
744  $ 4*( (dble(nb)*dble(bw)**2) )
745  ENDIF
746 *
747 * Reduced system
748 *
749  nops2 = nops2 +
750  $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
751  IF( nprocs_real .GT. 1 ) THEN
752  nops2 = nops2 +
753  $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
754  ENDIF
755 *
756 *
757 * nrhs * 4 n_first*bw flops for LLt solve in proc 1.
758 *
759  nops2 = nops2 +
760  $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
761 *
762  IF ( nprocs_real .GT. 1 ) THEN
763 *
764 * 2*nrhs*4 n_last*bw flops for LLt solve in last.
765 *
766  nops2 = nops2 +
767  $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
768  ENDIF
769 *
770  IF ( nprocs_real .GT. 2 ) THEN
771 *
772 * 2 * nrhs * 4 NB*bw flops for LLt solve in others.
773 *
774  nops2 = nops2 +
775  $ ( nprocs_real-2)*2*
776  $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
777  ENDIF
778 *
779 * Reduced system
780 *
781  nops2 = nops2 +
782  $ nrhs*( nprocs_real-1 ) * ( bw*bw )
783  IF( nprocs_real .GT. 1 ) THEN
784  nops2 = nops2 +
785  $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
786  ENDIF
787 *
788 *
789 * Multiply by 4 to get complex count
790 *
791  nops2 = nops2 * dble(4)
792 *
793 * Calculate total megaflops - factorization and/or
794 * solve -- for WALL and CPU time, and print output
795 *
796 * Print WALL time if machine supports it
797 *
798  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
799  tmflops = nops /
800  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
801  ELSE
802  tmflops = 0.0d+0
803  END IF
804 *
805  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
806  tmflops2 = nops2 /
807  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
808  ELSE
809  tmflops2 = 0.0d+0
810  END IF
811 *
812  IF( wtime( 2 ).GE.0.0d+0 )
813  $ WRITE( nout, fmt = 9993 ) 'WALL', uplo,
814  $ n,
815  $ bw,
816  $ nb, nrhs, nprow, npcol,
817  $ wtime( 1 ), wtime( 2 ), tmflops,
818  $ tmflops2, passed
819 *
820 * Print CPU time if machine supports it
821 *
822  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
823  tmflops = nops /
824  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
825  ELSE
826  tmflops = 0.0d+0
827  END IF
828 *
829  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
830  tmflops2 = nops2 /
831  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
832  ELSE
833  tmflops2 = 0.0d+0
834  END IF
835 *
836  IF( ctime( 2 ).GE.0.0d+0 )
837  $ WRITE( nout, fmt = 9993 ) 'CPU ', uplo,
838  $ n,
839  $ bw,
840  $ nb, nrhs, nprow, npcol,
841  $ ctime( 1 ), ctime( 2 ), tmflops,
842  $ tmflops2, passed
843 *
844  END IF
845  20 CONTINUE
846 *
847 *
848  30 CONTINUE
849 * NNB loop
850 *
851  45 CONTINUE
852 * BW[] loop
853 *
854  40 CONTINUE
855 * NMAT loop
856 *
857  CALL blacs_gridexit( ictxt )
858  CALL blacs_gridexit( ictxtb )
859 *
860  50 CONTINUE
861 * NGRIDS DROPOUT
862  60 CONTINUE
863 * NGRIDS loop
864 *
865 * Print ending messages and close output file
866 *
867  IF( iam.EQ.0 ) THEN
868  ktests = kpass + kfail + kskip
869  WRITE( nout, fmt = * )
870  WRITE( nout, fmt = 9992 ) ktests
871  IF( check ) THEN
872  WRITE( nout, fmt = 9991 ) kpass
873  WRITE( nout, fmt = 9989 ) kfail
874  ELSE
875  WRITE( nout, fmt = 9990 ) kpass
876  END IF
877  WRITE( nout, fmt = 9988 ) kskip
878  WRITE( nout, fmt = * )
879  WRITE( nout, fmt = * )
880  WRITE( nout, fmt = 9987 )
881  IF( nout.NE.6 .AND. nout.NE.0 )
882  $ CLOSE ( nout )
883  END IF
884 *
885  CALL blacs_exit( 0 )
886 *
887  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
888  $ '; It should be at least 1' )
889  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
890  $ i4 )
891  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
892  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
893  $ i11 )
894  9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ',
895  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
896  9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',
897  $ '-------- ------ ------ ------' )
898  9993 FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
899  $ i5, 1x, i2, 1x,
900  $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
901  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
902  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
903  9990 FORMAT( i5, ' tests completed without checking.' )
904  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
905  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
906  9987 FORMAT( 'END OF TESTS.' )
907  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
908  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
909 *
910  stop
911 *
912 * End of PZPTTRS_DRIVER
913 *
914  END
915 *
max
#define max(A, B)
Definition: pcgemr.c:180
pzptdriver
program pzptdriver
Definition: pzptdriver.f:1
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pzbmatgen
subroutine pzbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzbmatgen.f:5
pzmatgen
subroutine pzmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzmatgen.f:4
pzpttrs
subroutine pzpttrs(UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pzpttrs.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pzpttrf
subroutine pzpttrf(N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pzpttrf.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pzptinfo
subroutine pzptinfo(SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pzptinfo.f:6
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pzptlaschk
subroutine pzptlaschk(SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: pzptlaschk.f:4
pzchekpad
subroutine pzchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzchekpad.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
pzfillpad
subroutine pzfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pzfillpad.f:2