ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psptdriver.f
Go to the documentation of this file.
1  PROGRAM psptdriver
2 *
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * Purpose
10 * =======
11 *
12 * PSPTDRIVER is a test program for the
13 * ScaLAPACK Band Cholesky routines corresponding to the options
14 * indicated by SPT. This test driver performs an
15 * A = L*L**T factorization
16 * and solves a linear system with the factors for 1 or more RHS.
17 *
18 * The program must be driven by a short data file.
19 * Here's an example file:
20 *'ScaLAPACK, Version 1.2, banded linear systems input file'
21 *'PVM.'
22 *'' output file name (if any)
23 *6 device out
24 *'L' define Lower or Upper
25 *9 number of problem sizes
26 *1 5 17 28 37 121 200 1023 2048 3073 values of N
27 *6 number of bandwidths
28 *1 2 4 10 31 64 values of BW
29 *1 number of NB's
30 *-1 3 4 5 values of NB (-1 for automatic choice)
31 *1 number of NRHS's (must be 1)
32 *8 values of NRHS
33 *1 number of NBRHS's (ignored)
34 *1 values of NBRHS (ignored)
35 *6 number of process grids
36 *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37 *3.0 threshold
38 *
39 * Internal Parameters
40 * ===================
41 *
42 * TOTMEM INTEGER, default = 6200000.
43 * TOTMEM is a machine-specific parameter indicating the
44 * maximum amount of available memory in bytes.
45 * The user should customize TOTMEM to his platform. Remember
46 * to leave room in memory for the operating system, the BLACS
47 * buffer, etc. For example, on a system with 8 MB of memory
48 * per process (e.g., one processor on an Intel iPSC/860), the
49 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50 * code, BLACS buffer, etc). However, for PVM, we usually set
51 * TOTMEM = 2000000. Some experimenting with the maximum value
52 * of TOTMEM may be required.
53 *
54 * INTGSZ INTEGER, default = 4 bytes.
55 * REALSZ INTEGER, default = 4 bytes.
56 * INTGSZ and REALSZ indicate the length in bytes on the
57 * given platform for an integer and a single precision real.
58 * MEM REAL array, dimension ( TOTMEM / REALSZ )
59 * All arrays used by ScaLAPACK routines are allocated from
60 * this array and referenced by pointers. The integer IPB,
61 * for example, is a pointer to the starting element of MEM for
62 * the solution vector(s) B.
63 *
64 * =====================================================================
65 *
66 * Code Developer: Andrew J. Cleary, University of Tennessee.
67 * Current address: Lawrence Livermore National Labs.
68 * This version released: August, 2001.
69 *
70 * =====================================================================
71 *
72 * .. Parameters ..
73  INTEGER totmem
74  parameter( totmem = 3000000 )
75  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76  $ lld_, mb_, m_, nb_, n_, rsrc_
77  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
80 *
81  REAL zero
82  INTEGER memsiz, ntests, realsz
83  REAL padval
84  parameter( realsz = 4,
85  $ memsiz = totmem / realsz, ntests = 20,
86  $ padval = -9923.0e+0, zero = 0.0e+0 )
87  INTEGER int_one
88  parameter( int_one = 1 )
89 * ..
90 * .. Local Scalars ..
91  LOGICAL check
92  CHARACTER uplo
93  CHARACTER*6 passed
94  CHARACTER*80 outfile
95  INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
96  $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
97  $ imidpad, info, int_temp, ipa, ipb, ipostpad,
98  $ iprepad, ipw, ipw_size, ipw_solve,
99  $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100  $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101  $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102  $ nnr, nout, np, npcol, nprocs, nprocs_real,
103  $ nprow, nq, nrhs, n_first, n_last, worksiz
104  REAL anorm, sresid, thresh
105  DOUBLE PRECISION nops, nops2, tmflops, tmflops2
106 * ..
107 * .. Local Arrays ..
108  INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
109  $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
110  $ nbrval( ntests ), nbval( ntests ),
111  $ nrval( ntests ), nval( ntests ),
112  $ pval( ntests ), qval( ntests )
113  REAL mem( memsiz )
114  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
115 * ..
116 * .. External Subroutines ..
117  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
118  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
119  $ blacs_pinfo, descinit, igsum2d, psbmatgen,
122  $ slcombine, sltimer
123 * ..
124 * .. External Functions ..
125  INTEGER numroc
126  LOGICAL lsame
127  REAL pslange
128  EXTERNAL lsame, numroc, pslange
129 * ..
130 * .. Intrinsic Functions ..
131  INTRINSIC dble, max, min, mod
132 * ..
133 * .. Data Statements ..
134  DATA kfail, kpass, kskip, ktests / 4*0 /
135 * ..
136 *
137 *
138 *
139 * .. Executable Statements ..
140 *
141 * Get starting information
142 *
143  CALL blacs_pinfo( iam, nprocs )
144  iaseed = 100
145  ibseed = 200
146 *
147  CALL psptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
148  $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
149  $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
150  $ qval, ntests, thresh, mem, iam, nprocs )
151 *
152  check = ( thresh.GE.0.0e+0 )
153 *
154 * Print headings
155 *
156  IF( iam.EQ.0 ) THEN
157  WRITE( nout, fmt = * )
158  WRITE( nout, fmt = 9995 )
159  WRITE( nout, fmt = 9994 )
160  WRITE( nout, fmt = * )
161  END IF
162 *
163 * Loop over different process grids
164 *
165  DO 60 i = 1, ngrids
166 *
167  nprow = pval( i )
168  npcol = qval( i )
169 *
170 * Make sure grid information is correct
171 *
172  ierr( 1 ) = 0
173  IF( nprow.LT.1 ) THEN
174  IF( iam.EQ.0 )
175  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
176  ierr( 1 ) = 1
177  ELSE IF( npcol.LT.1 ) THEN
178  IF( iam.EQ.0 )
179  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
180  ierr( 1 ) = 1
181  ELSE IF( nprow*npcol.GT.nprocs ) THEN
182  IF( iam.EQ.0 )
183  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
184  ierr( 1 ) = 1
185  END IF
186 *
187  IF( ierr( 1 ).GT.0 ) THEN
188  IF( iam.EQ.0 )
189  $ WRITE( nout, fmt = 9997 ) 'grid'
190  kskip = kskip + 1
191  GO TO 50
192  END IF
193 *
194 * Define process grid
195 *
196  CALL blacs_get( -1, 0, ictxt )
197  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
198 *
199 *
200 * Define transpose process grid
201 *
202  CALL blacs_get( -1, 0, ictxtb )
203  CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )
204 *
205 * Go to bottom of process grid loop if this case doesn't use my
206 * process
207 *
208  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209 *
210  IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN
211  GO TO 50
212  ENDIF
213 *
214  DO 40 j = 1, nmat
215 *
216  ierr( 1 ) = 0
217 *
218  n = nval( j )
219 *
220 * Make sure matrix information is correct
221 *
222  IF( n.LT.1 ) THEN
223  IF( iam.EQ.0 )
224  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
225  ierr( 1 ) = 1
226  END IF
227 *
228 * Check all processes for an error
229 *
230  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
231  $ -1, 0 )
232 *
233  IF( ierr( 1 ).GT.0 ) THEN
234  IF( iam.EQ.0 )
235  $ WRITE( nout, fmt = 9997 ) 'size'
236  kskip = kskip + 1
237  GO TO 40
238  END IF
239 *
240 *
241  DO 45 bw_num = 1, nbw
242 *
243  ierr( 1 ) = 0
244 *
245  bw = 1
246  IF( bw.LT.0 ) THEN
247  IF( iam.EQ.0 )
248  $ WRITE( nout, fmt = 9999 ) 'Band', 'bw', bw
249  ierr( 1 ) = 1
250  END IF
251 *
252  IF( bw.GT.n-1 ) THEN
253  ierr( 1 ) = 1
254  END IF
255 *
256 * Check all processes for an error
257 *
258  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
259  $ -1, 0 )
260 *
261  IF( ierr( 1 ).GT.0 ) THEN
262  kskip = kskip + 1
263  GO TO 45
264  END IF
265 *
266  DO 30 k = 1, nnb
267 *
268  ierr( 1 ) = 0
269 *
270  nb = nbval( k )
271  IF( nb.LT.0 ) THEN
272  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
273  $ + int_one
274  nb = max( nb, 2*int_one )
275  nb = min( n, nb )
276  END IF
277 *
278 * Make sure NB is legal
279 *
280  ierr( 1 ) = 0
281  IF( nb.LT.min( 2*int_one, n ) ) THEN
282  ierr( 1 ) = 1
283  ENDIF
284 *
285 * Check all processes for an error
286 *
287  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,
288  $ -1, 0 )
289 *
290  IF( ierr( 1 ).GT.0 ) THEN
291  kskip = kskip + 1
292  GO TO 30
293  END IF
294 *
295 * Padding constants
296 *
297  np = numroc( (2), (2),
298  $ myrow, 0, nprow )
299  nq = numroc( n, nb, mycol, 0, npcol )
300 *
301  IF( check ) THEN
302  iprepad = ((2)+10)
303  imidpad = 10
304  ipostpad = ((2)+10)
305  ELSE
306  iprepad = 0
307  imidpad = 0
308  ipostpad = 0
309  END IF
310 *
311 * Initialize the array descriptor for the matrix A
312 *
313  CALL descinit( desca2d, n, (2),
314  $ nb, 1, 0, 0,
315  $ ictxtb, nb+10, ierr( 1 ) )
316 *
317 * Convert this to 1D descriptor
318 *
319  desca( 1 ) = 501
320  desca( 3 ) = n
321  desca( 4 ) = nb
322  desca( 5 ) = 0
323  desca( 2 ) = ictxt
324  desca( 6 ) = ((2)+10)
325  desca( 7 ) = 0
326 *
327  ierr_temp = ierr( 1 )
328  ierr( 1 ) = 0
329  ierr( 1 ) = min( ierr( 1 ), ierr_temp )
330 *
331 * Check all processes for an error
332 *
333  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
334 *
335  IF( ierr( 1 ).LT.0 ) THEN
336  IF( iam.EQ.0 )
337  $ WRITE( nout, fmt = 9997 ) 'descriptor'
338  kskip = kskip + 1
339  GO TO 30
340  END IF
341 *
342 * Assign pointers into MEM for SCALAPACK arrays, A is
343 * allocated starting at position MEM( IPREPAD+1 )
344 *
345  free_ptr = 1
346  ipb = 0
347 *
348 * Save room for prepadding
349  free_ptr = free_ptr + iprepad
350 *
351  ipa = free_ptr
352  free_ptr = free_ptr + (nb+10)*(2)
353  $ + ipostpad
354 *
355 * Add memory for fillin
356 * Fillin space needs to store:
357 * Fillin spike:
358 * Contribution to previous proc's diagonal block of
359 * reduced system:
360 * Off-diagonal block of reduced system:
361 * Diagonal block of reduced system:
362 *
363  fillin_size =
364  $ (12*npcol + 3*nb)
365 *
366 * Claim memory for fillin
367 *
368  free_ptr = free_ptr + iprepad
369  ip_fillin = free_ptr
370  free_ptr = free_ptr + fillin_size
371 *
372 * Workspace needed by computational routines:
373 *
374  ipw_size = 0
375 *
376 * factorization:
377 *
378  ipw_size = 8*npcol
379 *
380 * Claim memory for IPW
381 *
382  ipw = free_ptr
383  free_ptr = free_ptr + ipw_size
384 *
385 * Check for adequate memory for problem size
386 *
387  ierr( 1 ) = 0
388  IF( free_ptr.GT.memsiz ) THEN
389  IF( iam.EQ.0 )
390  $ WRITE( nout, fmt = 9996 )
391  $ 'divide and conquer factorization',
392  $ (free_ptr )*realsz
393  ierr( 1 ) = 1
394  END IF
395 *
396 * Check all processes for an error
397 *
398  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
399  $ 1, -1, 0 )
400 *
401  IF( ierr( 1 ).GT.0 ) THEN
402  IF( iam.EQ.0 )
403  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
404  kskip = kskip + 1
405  GO TO 30
406  END IF
407 *
408 * Worksize needed for LAPRNT
409  worksiz = max( ((2)+10), nb )
410 *
411  IF( check ) THEN
412 *
413 * Calculate the amount of workspace required by
414 * the checking routines.
415 *
416 * PSLANGE
417  worksiz = max( worksiz, desca2d( nb_ ) )
418 *
419 * PSPTLASCHK
420  worksiz = max( worksiz,
421  $ max(5,nb)+2*nb )
422  END IF
423 *
424  free_ptr = free_ptr + iprepad
425  ip_driver_w = free_ptr
426  free_ptr = free_ptr + worksiz + ipostpad
427 *
428 *
429 * Check for adequate memory for problem size
430 *
431  ierr( 1 ) = 0
432  IF( free_ptr.GT.memsiz ) THEN
433  IF( iam.EQ.0 )
434  $ WRITE( nout, fmt = 9996 ) 'factorization',
435  $ ( free_ptr )*realsz
436  ierr( 1 ) = 1
437  END IF
438 *
439 * Check all processes for an error
440 *
441  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
442  $ 1, -1, 0 )
443 *
444  IF( ierr( 1 ).GT.0 ) THEN
445  IF( iam.EQ.0 )
446  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
447  kskip = kskip + 1
448  GO TO 30
449  END IF
450 *
451  CALL psbmatgen( ictxt, uplo, 'T', bw, bw, n, (2), nb,
452  $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
453  $ mycol, nprow, npcol )
454  CALL psfillpad( ictxt, nq, np, mem( ipa-iprepad ),
455  $ nb+10, iprepad, ipostpad,
456  $ padval )
457 *
458  CALL psfillpad( ictxt, worksiz, 1,
459  $ mem( ip_driver_w-iprepad ), worksiz,
460  $ iprepad, ipostpad, padval )
461 *
462 * Calculate norm of A for residual error-checking
463 *
464  IF( check ) THEN
465 *
466  anorm = pslange( 'I', n,
467  $ (2), mem( ipa ), 1, 1,
468  $ desca2d, mem( ip_driver_w ) )
469  CALL pschekpad( ictxt, 'PSLANGE', nq, np,
470  $ mem( ipa-iprepad ), nb+10,
471  $ iprepad, ipostpad, padval )
472  CALL pschekpad( ictxt, 'PSLANGE',
473  $ worksiz, 1,
474  $ mem( ip_driver_w-iprepad ), worksiz,
475  $ iprepad, ipostpad, padval )
476  END IF
477 *
478  IF( lsame( uplo, 'L' ) ) THEN
479  int_temp = 0
480  ELSE
481  int_temp = desca2d( lld_ )
482  ENDIF
483 *
484 *
485  CALL slboot()
486  CALL blacs_barrier( ictxt, 'All' )
487 *
488 * Perform factorization
489 *
490  CALL sltimer( 1 )
491 *
492  CALL pspttrf( n, mem( ipa+int_temp ),
493  $ mem( ipa+1*( nb+10-int_temp ) ), 1, desca,
494  $ mem( ip_fillin ), fillin_size, mem( ipw ),
495  $ ipw_size, info )
496 *
497  CALL sltimer( 1 )
498 *
499  IF( info.NE.0 ) THEN
500  IF( iam.EQ.0 ) THEN
501  WRITE( nout, fmt = * ) 'PSPTTRF INFO=', info
502  ENDIF
503  kfail = kfail + 1
504  GO TO 30
505  END IF
506 *
507  IF( check ) THEN
508 *
509 * Check for memory overwrite in factorization
510 *
511  CALL pschekpad( ictxt, 'PSPTTRF', nq,
512  $ np, mem( ipa-iprepad ), nb+10,
513  $ iprepad, ipostpad, padval )
514  END IF
515 *
516 *
517 * Loop over the different values for NRHS
518 *
519  DO 20 hh = 1, nnr
520 *
521  ierr( 1 ) = 0
522 *
523  nrhs = nrval( hh )
524 *
525 * Initialize Array Descriptor for rhs
526 *
527  CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
528  $ ictxtb, nb+10, ierr( 1 ) )
529 *
530 * Convert this to 1D descriptor
531 *
532  descb( 1 ) = 502
533  descb( 3 ) = n
534  descb( 4 ) = nb
535  descb( 5 ) = 0
536  descb( 2 ) = ictxt
537  descb( 6 ) = descb2d( lld_ )
538  descb( 7 ) = 0
539 *
540 * reset free_ptr to reuse space for right hand sides
541 *
542  IF( ipb .GT. 0 ) THEN
543  free_ptr = ipb
544  ENDIF
545 *
546  free_ptr = free_ptr + iprepad
547  ipb = free_ptr
548  free_ptr = free_ptr + nrhs*descb2d( lld_ )
549  $ + ipostpad
550 *
551 * Allocate workspace for workspace in TRS routine:
552 *
553  ipw_solve_size = (10+2*min(100,nrhs))*npcol+4*nrhs
554 *
555  ipw_solve = free_ptr
556  free_ptr = free_ptr + ipw_solve_size
557 *
558  ierr( 1 ) = 0
559  IF( free_ptr.GT.memsiz ) THEN
560  IF( iam.EQ.0 )
561  $ WRITE( nout, fmt = 9996 )'solve',
562  $ ( free_ptr )*realsz
563  ierr( 1 ) = 1
564  END IF
565 *
566 * Check all processes for an error
567 *
568  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
569  $ ierr, 1, -1, 0 )
570 *
571  IF( ierr( 1 ).GT.0 ) THEN
572  IF( iam.EQ.0 )
573  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
574  kskip = kskip + 1
575  GO TO 15
576  END IF
577 *
578  myrhs_size = numroc( n, nb, mycol, 0, npcol )
579 *
580 * Generate RHS
581 *
582  CALL psmatgen(ictxtb, 'No', 'No',
583  $ descb2d( m_ ), descb2d( n_ ),
584  $ descb2d( mb_ ), descb2d( nb_ ),
585  $ mem( ipb ),
586  $ descb2d( lld_ ), descb2d( rsrc_ ),
587  $ descb2d( csrc_ ),
588  $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
589  $ myrow, npcol, nprow )
590 *
591  IF( check ) THEN
592  CALL psfillpad( ictxtb, nb, nrhs,
593  $ mem( ipb-iprepad ),
594  $ descb2d( lld_ ),
595  $ iprepad, ipostpad,
596  $ padval )
597  CALL psfillpad( ictxt, worksiz, 1,
598  $ mem( ip_driver_w-iprepad ),
599  $ worksiz, iprepad,
600  $ ipostpad, padval )
601  END IF
602 *
603 *
604  CALL blacs_barrier( ictxt, 'All')
605  CALL sltimer( 2 )
606 *
607 * Solve linear system via factorization
608 *
609  CALL pspttrs( n, nrhs, mem( ipa+int_temp ),
610  $ mem( ipa+1*( nb+10-int_temp ) ), 1,
611  $ desca, mem( ipb ), 1, descb,
612  $ mem( ip_fillin ), fillin_size,
613  $ mem( ipw_solve ), ipw_solve_size,
614  $ info )
615 *
616  CALL sltimer( 2 )
617 *
618  IF( info.NE.0 ) THEN
619  IF( iam.EQ.0 )
620  $ WRITE( nout, fmt = * ) 'PSPTTRS INFO=', info
621  kfail = kfail + 1
622  passed = 'FAILED'
623  GO TO 20
624  END IF
625 *
626  IF( check ) THEN
627 *
628 * check for memory overwrite
629 *
630  CALL pschekpad( ictxt, 'PSPTTRS-work',
631  $ worksiz, 1,
632  $ mem( ip_driver_w-iprepad ),
633  $ worksiz, iprepad,
634  $ ipostpad, padval )
635 *
636 * check the solution to rhs
637 *
638  sresid = zero
639 *
640 * Reset descriptor describing A to 1-by-P grid for
641 * use in banded utility routines
642 *
643  CALL descinit( desca2d, (2), n,
644  $ (2), nb, 0, 0,
645  $ ictxt, (2), ierr( 1 ) )
646  CALL psptlaschk( 'S', uplo, n, bw, bw, nrhs,
647  $ mem( ipb ), 1, 1, descb2d,
648  $ iaseed, mem( ipa ), 1, 1, desca2d,
649  $ ibseed, anorm, sresid,
650  $ mem( ip_driver_w ), worksiz )
651 *
652  IF( iam.EQ.0 ) THEN
653  IF( sresid.GT.thresh )
654  $ WRITE( nout, fmt = 9985 ) sresid
655  END IF
656 *
657 * The second test is a NaN trap
658 *
659  IF( ( sresid.LE.thresh ).AND.
660  $ ( (sresid-sresid).EQ.0.0e+0 ) ) THEN
661  kpass = kpass + 1
662  passed = 'PASSED'
663  ELSE
664  kfail = kfail + 1
665  passed = 'FAILED'
666  END IF
667 *
668  END IF
669 *
670  15 CONTINUE
671 * Skipped tests jump to here to print out "SKIPPED"
672 *
673 * Gather maximum of all CPU and WALL clock timings
674 *
675  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,
676  $ wtime )
677  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,
678  $ ctime )
679 *
680 * Print results
681 *
682  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
683 *
684  nops = 0
685  nops2 = 0
686 *
687  n_first = nb
688  nprocs_real = ( n-1 )/nb + 1
689  n_last = mod( n-1, nb ) + 1
690 *
691 *
692  nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*
693  $ ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +
694  $ dble(n)*( 1.d0+dble(bw)*( 3.d0 /
695  $ 2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )
696  nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)
697  $ *( -1.d0 /2.d0+dble(bw)
698  $ *( -1.d0 / 3.d0 ) ) ) +
699  $ dble(n)*( dble(bw) /
700  $ 2.d0*( 1.d0+dble(bw) ) )
701 *
702  nops = nops +
703  $ dble(nrhs)*( ( 2*dble(n)-dble(bw) )*
704  $ ( dble(bw)+1.d0 ) )+ dble(nrhs)*
705  $ ( dble(bw)*( 2*dble(n)-
706  $ ( dble(bw)+1.d0 ) ) )
707 *
708 *
709 * Second calc to represent actual hardware speed
710 *
711 * NB bw^2 flops for LLt factorization in 1st proc
712 *
713  nops2 = ( (dble(n_first))* dble(bw)**2 )
714 *
715  IF ( nprocs_real .GT. 1) THEN
716 * 4 NB bw^2 flops for LLt factorization and
717 * spike calc in last processor
718 *
719  nops2 = nops2 +
720  $ 4*( (dble(n_last)*dble(bw)**2) )
721  ENDIF
722 *
723  IF ( nprocs_real .GT. 2) THEN
724 * 4 NB bw^2 flops for LLt factorization and
725 * spike calc in other processors
726 *
727  nops2 = nops2 + (nprocs_real-2)*
728  $ 4*( (dble(nb)*dble(bw)**2) )
729  ENDIF
730 *
731 * Reduced system
732 *
733  nops2 = nops2 +
734  $ ( nprocs_real-1 ) * ( bw*bw*bw/3 )
735  IF( nprocs_real .GT. 1 ) THEN
736  nops2 = nops2 +
737  $ ( nprocs_real-2 ) * ( 2 * bw*bw*bw )
738  ENDIF
739 *
740 *
741 * nrhs * 4 n_first*bw flops for LLt solve in proc 1.
742 *
743  nops2 = nops2 +
744  $ ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )
745 *
746  IF ( nprocs_real .GT. 1 ) THEN
747 *
748 * 2*nrhs*4 n_last*bw flops for LLt solve in last.
749 *
750  nops2 = nops2 +
751  $ 2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )
752  ENDIF
753 *
754  IF ( nprocs_real .GT. 2 ) THEN
755 *
756 * 2 * nrhs * 4 NB*bw flops for LLt solve in others.
757 *
758  nops2 = nops2 +
759  $ ( nprocs_real-2)*2*
760  $ ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )
761  ENDIF
762 *
763 * Reduced system
764 *
765  nops2 = nops2 +
766  $ nrhs*( nprocs_real-1 ) * ( bw*bw )
767  IF( nprocs_real .GT. 1 ) THEN
768  nops2 = nops2 +
769  $ nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )
770  ENDIF
771 *
772 *
773 * Calculate total megaflops - factorization and/or
774 * solve -- for WALL and CPU time, and print output
775 *
776 * Print WALL time if machine supports it
777 *
778  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
779  tmflops = nops /
780  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
781  ELSE
782  tmflops = 0.0d+0
783  END IF
784 *
785  IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN
786  tmflops2 = nops2 /
787  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
788  ELSE
789  tmflops2 = 0.0d+0
790  END IF
791 *
792  IF( wtime( 2 ).GE.0.0d+0 )
793  $ WRITE( nout, fmt = 9993 ) 'WALL', uplo,
794  $ n,
795  $ bw,
796  $ nb, nrhs, nprow, npcol,
797  $ wtime( 1 ), wtime( 2 ), tmflops,
798  $ tmflops2, passed
799 *
800 * Print CPU time if machine supports it
801 *
802  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
803  tmflops = nops /
804  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
805  ELSE
806  tmflops = 0.0d+0
807  END IF
808 *
809  IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN
810  tmflops2 = nops2 /
811  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
812  ELSE
813  tmflops2 = 0.0d+0
814  END IF
815 *
816  IF( ctime( 2 ).GE.0.0d+0 )
817  $ WRITE( nout, fmt = 9993 ) 'CPU ', uplo,
818  $ n,
819  $ bw,
820  $ nb, nrhs, nprow, npcol,
821  $ ctime( 1 ), ctime( 2 ), tmflops,
822  $ tmflops2, passed
823 *
824  END IF
825  20 CONTINUE
826 *
827 *
828  30 CONTINUE
829 * NNB loop
830 *
831  45 CONTINUE
832 * BW[] loop
833 *
834  40 CONTINUE
835 * NMAT loop
836 *
837  CALL blacs_gridexit( ictxt )
838  CALL blacs_gridexit( ictxtb )
839 *
840  50 CONTINUE
841 * NGRIDS DROPOUT
842  60 CONTINUE
843 * NGRIDS loop
844 *
845 * Print ending messages and close output file
846 *
847  IF( iam.EQ.0 ) THEN
848  ktests = kpass + kfail + kskip
849  WRITE( nout, fmt = * )
850  WRITE( nout, fmt = 9992 ) ktests
851  IF( check ) THEN
852  WRITE( nout, fmt = 9991 ) kpass
853  WRITE( nout, fmt = 9989 ) kfail
854  ELSE
855  WRITE( nout, fmt = 9990 ) kpass
856  END IF
857  WRITE( nout, fmt = 9988 ) kskip
858  WRITE( nout, fmt = * )
859  WRITE( nout, fmt = * )
860  WRITE( nout, fmt = 9987 )
861  IF( nout.NE.6 .AND. nout.NE.0 )
862  $ CLOSE ( nout )
863  END IF
864 *
865  CALL blacs_exit( 0 )
866 *
867  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
868  $ '; It should be at least 1' )
869  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
870  $ i4 )
871  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
872  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
873  $ i11 )
874  9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ',
875  $ 'Slv Time MFLOPS MFLOP2 CHECK' )
876  9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',
877  $ '-------- ------ ------ ------' )
878  9993 FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
879  $ i5, 1x, i2, 1x,
880  $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
881  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
882  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
883  9990 FORMAT( i5, ' tests completed without checking.' )
884  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
885  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
886  9987 FORMAT( 'END OF TESTS.' )
887  9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )
888  9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
889 *
890  stop
891 *
892 * End of PSPTTRS_DRIVER
893 *
894  END
895 *
max
#define max(A, B)
Definition: pcgemr.c:180
pspttrs
subroutine pspttrs(N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pspttrs.f:3
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pschekpad
subroutine pschekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pschekpad.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
psmatgen
subroutine psmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: psmatgen.f:4
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
psbmatgen
subroutine psbmatgen(ICTXT, AFORM, AFORM2, BWL, BWU, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, MYROW, MYCOL, NPROW, NPCOL)
Definition: psbmatgen.f:5
psptdriver
program psptdriver
Definition: psptdriver.f:1
psptlaschk
subroutine psptlaschk(SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, DESCX, IASEED, A, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK, WORKSIZ)
Definition: psptlaschk.f:4
pspttrf
subroutine pspttrf(N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO)
Definition: pspttrf.f:3
psfillpad
subroutine psfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psfillpad.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
psptinfo
subroutine psptinfo(SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: psptinfo.f:6
min
#define min(A, B)
Definition: pcgemr.c:181