ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcinvdriver.f
Go to the documentation of this file.
1  PROGRAM pcinvdriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * Purpose
9 * =======
10 *
11 * PCINVDRIVER is the main test program for the COMPLEX
12 * SCALAPACK matrix inversion routines. This test driver computes the
13 * inverse of different kind of matrix and tests the results.
14 *
15 * The program must be driven by a short data file. An annotated example
16 * of a data file can be obtained by deleting the first 3 characters
17 * from the following 14 lines:
18 * 'ScaLAPACK Matrix Inversion Testing input file'
19 * 'PVM machine.'
20 * 'INV.out' output file name (if any)
21 * 6 device out
22 * 5 number of matrix types (next line)
23 * 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD
24 * 4 number of problems sizes
25 * 1000 2000 3000 4000 values of N
26 * 3 number of NB's
27 * 4 30 35 values of NB
28 * 2 number of process grids (ordered P & Q)
29 * 4 2 values of P
30 * 4 4 values of Q
31 * 1.0 threshold
32 *
33 * Internal Parameters
34 * ===================
35 *
36 * TOTMEM INTEGER, default = 2000000
37 * TOTMEM is a machine-specific parameter indicating the
38 * maximum amount of available memory in bytes.
39 * The user should customize TOTMEM to his platform. Remember
40 * to leave room in memory for the operating system, the BLACS
41 * buffer, etc. For example, on a system with 8 MB of memory
42 * per process (e.g., one processor on an Intel iPSC/860), the
43 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
44 * code, BLACS buffer, etc). However, for PVM, we usually set
45 * TOTMEM = 2000000. Some experimenting with the maximum value
46 * of TOTMEM may be required.
47 *
48 * INTGSZ INTEGER, default = 4 bytes.
49 * REALSZ INTEGER, default = 4 bytes.
50 * CPLXSZ INTEGER, default = 8 bytes.
51 * INTGSZ, REALSZ and CPLXSZ indicate the length in bytes on
52 * the given platform for an integer, a single precision real
53 * and a single precision complex.
54 * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
55 *
56 * All arrays used by SCALAPACK routines are allocated from
57 * this array and referenced by pointers. The integer IPA,
58 * for example, is a pointer to the starting element of MEM for
59 * the matrix A.
60 *
61 * =====================================================================
62 *
63 * .. Parameters ..
64  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
65  $ lld_, mb_, m_, nb_, n_, rsrc_
66  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
67  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
68  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
69  INTEGER cplxsz, intgsz, memsiz, ntests, realsz, totmem
70  COMPLEX padval, zero
71  parameter( cplxsz = 8, intgsz = 4, realsz = 4,
72  $ totmem = 2000000, memsiz = totmem / cplxsz,
73  $ ntests = 20,
74  $ padval = ( -9923.0e+0, -9923.0e+0 ),
75  $ zero = ( 0.0e+0, 0.0e+0 ) )
76 * ..
77 * .. Local Scalars ..
78  CHARACTER uplo
79  CHARACTER*3 mtyp
80  CHARACTER*6 passed
81  CHARACTER*80 outfile
82  LOGICAL check
83  INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
84  $ ippiv, iprepad, ipostpad, ipiw, ipw, itemp, j,
85  $ k, ktests, kpass, kfail, kskip, l, lcm, lipiv,
86  $ liwork, lwork, mycol, myrow, n, nb, ngrids,
87  $ nmat, nmtyp, nnb, nout, np, npcol, nprocs,
88  $ nprow, nq, workiinv, workinv, worksiz
89  REAL anorm, fresid, rcond, thresh
90  DOUBLE PRECISION nops, tmflops
91 * ..
92 * .. Local Arrays ..
93  CHARACTER*3 mattyp( ntests )
94  INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
95  $ nval( ntests ), pval( ntests ),
96  $ qval( ntests )
97  DOUBLE PRECISION ctime( 2 ), wtime( 2 )
98  COMPLEX mem( memsiz )
99 * ..
100 * .. External Subroutines ..
101  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
102  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
103  $ blacs_pinfo, descinit, igsum2d, pcchekpad,
108 * ..
109 * .. External Functions ..
110  LOGICAL lsamen
111  INTEGER iceil, ilcm, numroc
113  EXTERNAL iceil, ilcm, lsamen, numroc, pclange,
115 * ..
116 * .. Intrinsic Functions ..
117  INTRINSIC dble, max
118 * ..
119 * .. Data Statements ..
120  DATA ktests, kpass, kfail, kskip /4*0/
121 * ..
122 * .. Executable Statements ..
123 *
124 * Get starting information
125 *
126  CALL blacs_pinfo( iam, nprocs )
127  iaseed = 100
128  CALL pcinvinfo( outfile, nout, nmtyp, mattyp, ntests, nmat, nval,
129  $ ntests, nnb, nbval, ntests, ngrids, pval, ntests,
130  $ qval, ntests, thresh, mem, iam, nprocs )
131  check = ( thresh.GE.0.0e+0 )
132 *
133 * Loop over the different matrix types
134 *
135  DO 40 i = 1, nmtyp
136 *
137  mtyp = mattyp( i )
138 *
139 * Print headings
140 *
141  IF( iam.EQ.0 ) THEN
142  WRITE( nout, fmt = * )
143  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
144  WRITE( nout, fmt = 9986 )
145  $ 'A is a general matrix.'
146  ELSE IF( lsamen( 3, mtyp, 'UTR' ) ) THEN
147  WRITE( nout, fmt = 9986 )
148  $ 'A is an upper triangular matrix.'
149  ELSE IF( lsamen( 3, mtyp, 'LTR' ) ) THEN
150  WRITE( nout, fmt = 9986 )
151  $ 'A is a lower triangular matrix.'
152  ELSE IF( lsamen( 3, mtyp, 'UPD' ) ) THEN
153  WRITE( nout, fmt = 9986 )
154  $ 'A is a Hermitian positive definite matrix.'
155  WRITE( nout, fmt = 9986 )
156  $ 'Only the upper triangular part will be '//
157  $ 'referenced.'
158  ELSE IF( lsamen( 3, mtyp, 'LPD' ) ) THEN
159  WRITE( nout, fmt = 9986 )
160  $ 'A is a Hermitian positive definite matrix.'
161  WRITE( nout, fmt = 9986 )
162  $ 'Only the lower triangular part will be '//
163  $ 'referenced.'
164  END IF
165  WRITE( nout, fmt = * )
166  WRITE( nout, fmt = 9995 )
167  WRITE( nout, fmt = 9994 )
168  WRITE( nout, fmt = * )
169  END IF
170 *
171 * Loop over different process grids
172 *
173  DO 30 j = 1, ngrids
174 *
175  nprow = pval( j )
176  npcol = qval( j )
177 *
178 * Make sure grid information is correct
179 *
180  ierr( 1 ) = 0
181  IF( nprow.LT.1 ) THEN
182  IF( iam.EQ.0 )
183  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
184  ierr( 1 ) = 1
185  ELSE IF( npcol.LT.1 ) THEN
186  IF( iam.EQ.0 )
187  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
188  ierr( 1 ) = 1
189  ELSE IF( nprow*npcol.GT.nprocs ) THEN
190  IF( iam.EQ.0 )
191  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
192  ierr( 1 ) = 1
193  END IF
194 *
195  IF( ierr( 1 ).GT.0 ) THEN
196  IF( iam.EQ.0 )
197  $ WRITE( nout, fmt = 9997 ) 'grid'
198  kskip = kskip + 1
199  GO TO 30
200  END IF
201 *
202 * Define process grid
203 *
204  CALL blacs_get( -1, 0, ictxt )
205  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
206  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
207 *
208 * Go to bottom of loop if this case doesn't use my process
209 *
210  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
211  $ GO TO 30
212 *
213  DO 20 k = 1, nmat
214 *
215  n = nval( k )
216 *
217 * Make sure matrix information is correct
218 *
219  ierr( 1 ) = 0
220  IF( n.LT.1 ) THEN
221  IF( iam.EQ.0 )
222  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
223  ierr( 1 ) = 1
224  END IF
225 *
226 * Make sure no one had error
227 *
228  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
229 *
230  IF( ierr( 1 ).GT.0 ) THEN
231  IF( iam.EQ.0 )
232  $ WRITE( nout, fmt = 9997 ) 'matrix'
233  kskip = kskip + 1
234  GO TO 20
235  END IF
236 *
237 * Loop over different blocking sizes
238 *
239  DO 10 l = 1, nnb
240 *
241  nb = nbval( l )
242 *
243 * Make sure nb is legal
244 *
245  ierr( 1 ) = 0
246  IF( nb.LT.1 ) THEN
247  ierr( 1 ) = 1
248  IF( iam.EQ.0 )
249  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
250  END IF
251 *
252 * Check all processes for an error
253 *
254  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
255  $ 0 )
256 *
257  IF( ierr( 1 ).GT.0 ) THEN
258  IF( iam.EQ.0 )
259  $ WRITE( nout, fmt = 9997 ) 'NB'
260  kskip = kskip + 1
261  GO TO 10
262  END IF
263 *
264 * Padding constants
265 *
266  np = numroc( n, nb, myrow, 0, nprow )
267  nq = numroc( n, nb, mycol, 0, npcol )
268  IF( check ) THEN
269  iprepad = max( nb, np )
270  imidpad = nb
271  ipostpad = max( nb, nq )
272  ELSE
273  iprepad = 0
274  imidpad = 0
275  ipostpad = 0
276  END IF
277 *
278 * Initialize the array descriptor for the matrix A
279 *
280  CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
281  $ max( 1, np ) + imidpad, ierr( 1 ) )
282 *
283 * Check all processes for an error
284 *
285  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
286  $ 0 )
287 *
288  IF( ierr( 1 ).LT.0 ) THEN
289  IF( iam.EQ.0 )
290  $ WRITE( nout, fmt = 9997 ) 'descriptor'
291  kskip = kskip + 1
292  GO TO 10
293  END IF
294 *
295 * Assign pointers into MEM for ScaLAPACK arrays, A is
296 * allocated starting at position MEM( IPREPAD+1 )
297 *
298  ipa = iprepad+1
299 *
300  lcm = ilcm( nprow, npcol )
301  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
302 *
303 * Pivots are needed by LU factorization
304 *
305  ippiv = ipa + desca( lld_ ) * nq + ipostpad +
306  $ iprepad
307  lipiv = iceil( intgsz * ( np + nb ), cplxsz )
308  ipw = ippiv + lipiv + ipostpad + iprepad
309 *
310  lwork = max( 1, np * desca( nb_ ) )
311  workinv = lwork + ipostpad
312 *
313 * Figure the amount of workspace required by the
314 * general matrix inversion
315 *
316  IF( nprow.EQ.npcol ) THEN
317  liwork = nq + desca( nb_ )
318  ELSE
319 *
320 * change the integer workspace needed for PDGETRI
321 * LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) *
322 * $ ICEIL( ICEIL( DESCA( LLD_ ),
323 * $ DESCA( MB_ ) ), LCM / NPROW ) )
324 * $ + NQ
325  liwork = numroc( desca( m_ ) +
326  $ desca( mb_ ) * nprow
327  $ + mod( 1 - 1, desca( mb_ ) ), desca( nb_ ),
328  $ mycol, desca( csrc_ ), npcol ) +
329  $ max( desca( mb_ ) * iceil( iceil(
330  $ numroc( desca( m_ ) + desca( mb_ ) * nprow,
331  $ desca( mb_ ), myrow, desca( rsrc_ ), nprow ),
332  $ desca( mb_ ) ), lcm / nprow ), desca( nb_ ) )
333 *
334  END IF
335  workiinv = iceil( liwork*intgsz, cplxsz ) +
336  $ ipostpad
337  ipiw = ipw + workinv + iprepad
338  worksiz = workinv + iprepad + workiinv
339 *
340  ELSE
341 *
342 * No pivots or workspace needed for triangular or
343 * Hermitian positive definite matrices.
344 *
345  ipw = ipa + desca( lld_ ) * nq + ipostpad + iprepad
346  worksiz = 1 + ipostpad
347 *
348  END IF
349 *
350  IF( check ) THEN
351 *
352 * Figure amount of work space for the norm
353 * computations
354 *
355  IF( lsamen( 3, mtyp, 'GEN' ).OR.
356  $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
357  itemp = nq
358  ELSE
359  itemp = 2 * nq + np
360  IF( nprow.NE.npcol ) THEN
361  itemp = itemp +
362  $ nb * iceil( iceil( np, nb ),
363  $ lcm / nprow )
364  END IF
365  END IF
366  worksiz = max( worksiz-ipostpad,
367  $ iceil( realsz * itemp, cplxsz ) )
368 *
369 * Figure the amount of workspace required by the
370 * checking routine
371 *
372  worksiz = max( worksiz, 2 * nb * max( 1, np ) ) +
373  $ ipostpad
374 *
375  END IF
376 *
377 * Check for adequate memory for problem size
378 *
379  ierr( 1 ) = 0
380  IF( ipw+worksiz.GT.memsiz ) THEN
381  IF( iam.EQ.0 )
382  $ WRITE( nout, fmt = 9996 ) 'inversion',
383  $ ( ipw + worksiz ) * cplxsz
384  ierr( 1 ) = 1
385  END IF
386 *
387 * Check all processes for an error
388 *
389  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
390  $ 0 )
391 *
392  IF( ierr( 1 ).GT.0 ) THEN
393  IF( iam.EQ.0 )
394  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
395  kskip = kskip + 1
396  GO TO 10
397  END IF
398 *
399  IF( lsamen( 3, mtyp, 'GEN' ).OR.
400  $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
401 *
402 * Generate a general diagonally dominant matrix A
403 *
404  CALL pcmatgen( ictxt, 'N', 'D', desca( m_ ),
405  $ desca( n_ ), desca( mb_ ),
406  $ desca( nb_ ), mem( ipa ),
407  $ desca( lld_ ), desca( rsrc_ ),
408  $ desca( csrc_ ), iaseed, 0, np, 0,
409  $ nq, myrow, mycol, nprow, npcol )
410 *
411  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
412 *
413 * Generate a Hermitian positive definite matrix A
414 *
415  CALL pcmatgen( ictxt, 'H', 'D', desca( m_ ),
416  $ desca( n_ ), desca( mb_ ),
417  $ desca( nb_ ), mem( ipa ),
418  $ desca( lld_ ), desca( rsrc_ ),
419  $ desca( csrc_ ), iaseed, 0, np, 0,
420  $ nq, myrow, mycol, nprow, npcol )
421 *
422  END IF
423 *
424 * Zeros not-referenced part of A, if any.
425 *
426  IF( lsamen( 1, mtyp, 'U' ) ) THEN
427 *
428  uplo = 'U'
429  CALL pclaset( 'Lower', n-1, n-1, zero, zero,
430  $ mem( ipa ), 2, 1, desca )
431 *
432  ELSE IF( lsamen( 1, mtyp, 'L' ) ) THEN
433 *
434  uplo = 'L'
435  CALL pclaset( 'Upper', n-1, n-1, zero, zero,
436  $ mem( ipa ), 1, 2, desca )
437 *
438  ELSE
439 *
440  uplo = 'G'
441 *
442  END IF
443 *
444 * Need 1-norm of A for checking
445 *
446  IF( check ) THEN
447 *
448  CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
449  $ desca( lld_ ), iprepad, ipostpad,
450  $ padval )
451  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
452  $ mem( ipw-iprepad ),
453  $ worksiz-ipostpad, iprepad,
454  $ ipostpad, padval )
455 *
456  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
457 *
458  CALL pcfillpad( ictxt, lipiv, 1,
459  $ mem( ippiv-iprepad ), lipiv,
460  $ iprepad, ipostpad, padval )
461  anorm = pclange( '1', n, n, mem( ipa ), 1, 1,
462  $ desca, mem( ipw ) )
463  CALL pcchekpad( ictxt, 'PCLANGE', np, nq,
464  $ mem( ipa-iprepad ),
465  $ desca( lld_ ),
466  $ iprepad, ipostpad, padval )
467  CALL pcchekpad( ictxt, 'PCLANGE',
468  $ worksiz-ipostpad, 1,
469  $ mem( ipw-iprepad ),
470  $ worksiz-ipostpad,
471  $ iprepad, ipostpad, padval )
472  CALL pcfillpad( ictxt, workinv-ipostpad, 1,
473  $ mem( ipw-iprepad ),
474  $ workinv-ipostpad,
475  $ iprepad, ipostpad, padval )
476  CALL pcfillpad( ictxt, workiinv-ipostpad, 1,
477  $ mem( ipiw-iprepad ),
478  $ workiinv-ipostpad, iprepad,
479  $ ipostpad, padval )
480  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
481 *
482  anorm = pclantr( '1', uplo, 'Non unit', n, n,
483  $ mem( ipa ), 1, 1, desca,
484  $ mem( ipw ) )
485  CALL pcchekpad( ictxt, 'PCLANTR', np, nq,
486  $ mem( ipa-iprepad ),
487  $ desca( lld_ ),
488  $ iprepad, ipostpad, padval )
489  CALL pcchekpad( ictxt, 'PCLANTR',
490  $ worksiz-ipostpad, 1,
491  $ mem( ipw-iprepad ),
492  $ worksiz-ipostpad,
493  $ iprepad, ipostpad, padval )
494 *
495  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
496 *
497  anorm = pclanhe( '1', uplo, n, mem( ipa ), 1, 1,
498  $ desca, mem( ipw ) )
499  CALL pcchekpad( ictxt, 'PCLANHE', np, nq,
500  $ mem( ipa-iprepad ),
501  $ desca( lld_ ),
502  $ iprepad, ipostpad, padval )
503  CALL pcchekpad( ictxt, 'PCLANHE',
504  $ worksiz-ipostpad, 1,
505  $ mem( ipw-iprepad ),
506  $ worksiz-ipostpad,
507  $ iprepad, ipostpad, padval )
508 *
509  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'SY' ) ) THEN
510 *
511  CALL pcfillpad( ictxt, lipiv, 1,
512  $ mem( ippiv-iprepad ), lipiv,
513  $ iprepad, ipostpad, padval )
514  anorm = pclansy( '1', uplo, n, mem( ipa ), 1, 1,
515  $ desca, mem( ipw ) )
516  CALL pcchekpad( ictxt, 'PCLANSY', np, nq,
517  $ mem( ipa-iprepad ),
518  $ desca( lld_ ),
519  $ iprepad, ipostpad, padval )
520  CALL pcchekpad( ictxt, 'PCLANSY',
521  $ worksiz-ipostpad, 1,
522  $ mem( ipw-iprepad ),
523  $ worksiz-ipostpad,
524  $ iprepad,ipostpad, padval )
525 *
526  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'HE' ) ) THEN
527  CALL pcfillpad( ictxt, lipiv, 1,
528  $ mem( ippiv-iprepad ), lipiv,
529  $ iprepad, ipostpad, padval )
530  anorm = pclanhe( '1', uplo, n, mem( ipa ), 1, 1,
531  $ desca, mem( ipw ) )
532  CALL pcchekpad( ictxt, 'PCLANHE', np, nq,
533  $ mem( ipa-iprepad ),
534  $ desca( lld_ ),
535  $ iprepad, ipostpad, padval )
536  CALL pcchekpad( ictxt, 'PCLANHE',
537  $ worksiz-ipostpad, 1,
538  $ mem( ipw-iprepad ),
539  $ worksiz-ipostpad,
540  $ iprepad, ipostpad, padval )
541 *
542  END IF
543 *
544  END IF
545 *
546  CALL slboot()
547  CALL blacs_barrier( ictxt, 'All' )
548 *
549  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
550 *
551 * Perform LU factorization
552 *
553  CALL sltimer( 1 )
554  CALL pcgetrf( n, n, mem( ipa ), 1, 1, desca,
555  $ mem( ippiv ), info )
556  CALL sltimer( 1 )
557 *
558  IF( check ) THEN
559 *
560 * Check for memory overwrite
561 *
562  CALL pcchekpad( ictxt, 'PCGETRF', np, nq,
563  $ mem( ipa-iprepad ),
564  $ desca( lld_ ),
565  $ iprepad, ipostpad, padval )
566  CALL pcchekpad( ictxt, 'PCGETRF', lipiv, 1,
567  $ mem( ippiv-iprepad ), lipiv,
568  $ iprepad, ipostpad, padval )
569  END IF
570 *
571 * Perform the general matrix inversion
572 *
573  CALL sltimer( 2 )
574  CALL pcgetri( n, mem( ipa ), 1, 1, desca,
575  $ mem( ippiv ), mem( ipw ), lwork,
576  $ mem( ipiw ), liwork, info )
577  CALL sltimer( 2 )
578 *
579  IF( check ) THEN
580 *
581 * Check for memory overwrite
582 *
583  CALL pcchekpad( ictxt, 'PCGETRI', np, nq,
584  $ mem( ipa-iprepad ),
585  $ desca( lld_ ),
586  $ iprepad, ipostpad, padval )
587  CALL pcchekpad( ictxt, 'PCGETRI', lipiv, 1,
588  $ mem( ippiv-iprepad ), lipiv,
589  $ iprepad, ipostpad, padval )
590  CALL pcchekpad( ictxt, 'PCGETRI',
591  $ workiinv-ipostpad, 1,
592  $ mem( ipiw-iprepad ),
593  $ workiinv-ipostpad,
594  $ iprepad, ipostpad, padval )
595  CALL pcchekpad( ictxt, 'PCGETRI',
596  $ workinv-ipostpad, 1,
597  $ mem( ipw-iprepad ),
598  $ workinv-ipostpad,
599  $ iprepad, ipostpad, padval )
600  END IF
601 *
602  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
603 *
604 * Perform the general matrix inversion
605 *
606  CALL sltimer( 2 )
607  CALL pctrtri( uplo, 'Non unit', n, mem( ipa ), 1,
608  $ 1, desca, info )
609  CALL sltimer( 2 )
610 *
611  IF( check ) THEN
612 *
613 * Check for memory overwrite
614 *
615  CALL pcchekpad( ictxt, 'PCTRTRI', np, nq,
616  $ mem( ipa-iprepad ),
617  $ desca( lld_ ),
618  $ iprepad, ipostpad, padval )
619  END IF
620 *
621  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
622 *
623 * Perform Cholesky factorization
624 *
625  CALL sltimer( 1 )
626  CALL pcpotrf( uplo, n, mem( ipa ), 1, 1, desca,
627  $ info )
628  CALL sltimer( 1 )
629 *
630  IF( check ) THEN
631 *
632 * Check for memory overwrite
633 *
634  CALL pcchekpad( ictxt, 'PCPOTRF', np, nq,
635  $ mem( ipa-iprepad ),
636  $ desca( lld_ ),
637  $ iprepad, ipostpad, padval )
638  END IF
639 *
640 * Perform the Hermitian positive definite matrix
641 * inversion
642 *
643  CALL sltimer( 2 )
644  CALL pcpotri( uplo, n, mem( ipa ), 1, 1, desca,
645  $ info )
646  CALL sltimer( 2 )
647 *
648  IF( check ) THEN
649 *
650 * Check for memory overwrite
651 *
652  CALL pcchekpad( ictxt, 'PCPOTRI', np, nq,
653  $ mem( ipa-iprepad ),
654  $ desca( lld_ ),
655  $ iprepad, ipostpad, padval )
656  END IF
657 *
658  END IF
659 *
660  IF( check ) THEN
661 *
662  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
663  $ mem( ipw-iprepad ),
664  $ worksiz-ipostpad, iprepad,
665  $ ipostpad, padval )
666 *
667 * Compute fresid = || inv(A)*A-I ||
668 *
669  CALL pcinvchk( mtyp, n, mem( ipa ), 1, 1, desca,
670  $ iaseed, anorm, fresid, rcond,
671  $ mem( ipw ) )
672 *
673 * Check for memory overwrite
674 *
675  CALL pcchekpad( ictxt, 'PCINVCHK', np, nq,
676  $ mem( ipa-iprepad ),
677  $ desca( lld_ ),
678  $ iprepad, ipostpad, padval )
679  CALL pcchekpad( ictxt, 'PCINVCHK',
680  $ worksiz-ipostpad, 1,
681  $ mem( ipw-iprepad ),
682  $ worksiz-ipostpad, iprepad,
683  $ ipostpad, padval )
684 *
685 * Test residual and detect NaN result
686 *
687  IF( fresid.LE.thresh .AND. info.EQ.0 .AND.
688  $ ( (fresid-fresid) .EQ. 0.0e+0 ) ) THEN
689  kpass = kpass + 1
690  passed = 'PASSED'
691  ELSE
692  kfail = kfail + 1
693  IF( info.GT.0 ) THEN
694  passed = 'SINGUL'
695  ELSE
696  passed = 'FAILED'
697  END IF
698  END IF
699 *
700  ELSE
701 *
702 * Don't perform the checking, only the timing
703 * operation
704 *
705  kpass = kpass + 1
706  fresid = fresid - fresid
707  passed = 'BYPASS'
708 *
709  END IF
710 *
711 * Gather maximum of all CPU and WALL clock timings
712 *
713  CALL slcombine( ictxt, 'All', '>', 'W', 2, 1, wtime )
714  CALL slcombine( ictxt, 'All', '>', 'C', 2, 1, ctime )
715 *
716 * Print results
717 *
718  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
719 *
720  IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
721 *
722 * 8/3 N^3 - N^2 flops for LU factorization
723 *
724  nops = ( 8.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) -
725  $ dble( n )**2
726 *
727 * 16/3 N^3 for matrix inversion
728 *
729  nops = nops +
730  $ ( 16.0d+0 / 3.0d+0 ) * ( dble( n )**3 )
731 *
732  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
733 *
734 * 4/3 N^3 + 2 N^2 for triangular matrix inversion
735 *
736  ctime(1) = 0.0d+0
737  wtime(1) = 0.0d+0
738  nops = ( 4.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
739  $ 2.0d+0 * ( dble( n )**2 )
740 *
741  ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
742 *
743 * 4/3 N^3 + 3 N^2 flops for Cholesky factorization
744 *
745  nops = ( 4.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
746  $ 2.0d+0 * ( dble( n )**2 )
747 *
748 * 8/3 N^3 + 5 N^2 flops for Cholesky inversion
749 *
750  nops = nops +
751  $ ( 8.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
752  $ 5.0d+0 * ( dble( n )**2 )
753 *
754  END IF
755 *
756 * Figure total megaflops -- factorization and
757 * inversion, for WALL and CPU time, and print
758 * output.
759 *
760 * Print WALL time if machine supports it
761 *
762  IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
763  tmflops = nops /
764  $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
765  ELSE
766  tmflops = 0.0d+0
767  END IF
768 *
769  IF( wtime( 2 ) .GE. 0.0d+0 )
770  $ WRITE( nout, fmt = 9993 ) 'WALL', n, nb, nprow,
771  $ npcol, wtime( 1 ), wtime( 2 ), tmflops,
772  $ rcond, fresid, passed
773 *
774 * Print CPU time if machine supports it
775 *
776  IF( ctime( 1 ) + ctime( 2 ) .GT. 0.0d+0 ) THEN
777  tmflops = nops /
778  $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
779  ELSE
780  tmflops = 0.0d+0
781  END IF
782 *
783  IF( ctime( 2 ) .GE. 0.0d+0 )
784  $ WRITE( nout, fmt = 9993 ) 'CPU ', n, nb, nprow,
785  $ npcol, ctime( 1 ), ctime( 2 ), tmflops,
786  $ rcond, fresid, passed
787  END IF
788 *
789  10 CONTINUE
790 *
791  20 CONTINUE
792 *
793  CALL blacs_gridexit( ictxt )
794 *
795  30 CONTINUE
796 *
797  40 CONTINUE
798 *
799 * Print out ending messages and close output file
800 *
801  IF( iam.EQ.0 ) THEN
802  ktests = kpass + kfail + kskip
803  WRITE( nout, fmt = * )
804  WRITE( nout, fmt = 9992 ) ktests
805  IF( check ) THEN
806  WRITE( nout, fmt = 9991 ) kpass
807  WRITE( nout, fmt = 9989 ) kfail
808  ELSE
809  WRITE( nout, fmt = 9990 ) kpass
810  END IF
811  WRITE( nout, fmt = 9988 ) kskip
812  WRITE( nout, fmt = * )
813  WRITE( nout, fmt = * )
814  WRITE( nout, fmt = 9987 )
815  IF( nout.NE.6 .AND. nout.NE.0 )
816  $ CLOSE ( nout )
817  END IF
818 *
819  CALL blacs_exit( 0 )
820 *
821  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
822  $ '; It should be at least 1' )
823  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
824  $ i4 )
825  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
826  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
827  $ i11 )
828  9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ',
829  $ ' MFLOPS Cond Resid CHECK' )
830  9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ',
831  $ '----------- ------- ------- ------' )
832  9993 FORMAT( a4, 1x, i5, 1x, i3, 1x, i5, 1x, i5, 1x, f8.2, 1x, f8.2,
833  $ 1x, f11.2, 1x, f7.1, 1x, f7.2, 1x, a6 )
834  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
835  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
836  9990 FORMAT( i5, ' tests completed without checking.' )
837  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
838  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
839  9987 FORMAT( 'END OF TESTS.' )
840  9986 FORMAT( a )
841 *
842  stop
843 *
844 * End of PCINVDRIVER
845 *
846  END
max
#define max(A, B)
Definition: pcgemr.c:180
pcgetrf
subroutine pcgetrf(M, N, A, IA, JA, DESCA, IPIV, INFO)
Definition: pcgetrf.f:2
pcgetri
subroutine pcgetri(N, A, IA, JA, DESCA, IPIV, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pcgetri.f:3
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pclanhe
real function pclanhe(NORM, UPLO, N, A, IA, JA, DESCA, WORK)
Definition: pclanhe.f:3
pcpotri
subroutine pcpotri(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pcpotri.f:2
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pcpotrf
subroutine pcpotrf(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pcpotrf.f:2
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
pcinvinfo
subroutine pcinvinfo(SUMMRY, NOUT, NMTYP, MATTYP, LDMTYP, NMAT, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcinvinfo.f:5
pclantr
real function pclantr(NORM, UPLO, DIAG, M, N, A, IA, JA, DESCA, WORK)
Definition: pclantr.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pclaset
subroutine pclaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pcblastst.f:7508
slboot
subroutine slboot()
Definition: sltimer.f:2
pctrtri
subroutine pctrtri(UPLO, DIAG, N, A, IA, JA, DESCA, INFO)
Definition: pctrtri.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
lsamen
logical function lsamen(N, CA, CB)
Definition: pblastst.f:1457
pcinvdriver
program pcinvdriver
Definition: pcinvdriver.f:1
pclansy
real function pclansy(NORM, UPLO, N, A, IA, JA, DESCA, WORK)
Definition: pclansy.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2
pcinvchk
subroutine pcinvchk(MATTYP, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, RCOND, WORK)
Definition: pcinvchk.f:3