SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzinvdriver.f
Go to the documentation of this file.
1 PROGRAM pzinvdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* =======
10*
11* PZINVDRIVER is the main test program for the COMPLEX*16
12* SCALAPACK matrix inversion routines. This test driver computes the
13* inverse of different kind of matrix and tests the results.
14*
15* The program must be driven by a short data file. An annotated example
16* of a data file can be obtained by deleting the first 3 characters
17* from the following 14 lines:
18* 'ScaLAPACK Matrix Inversion Testing input file'
19* 'PVM machine.'
20* 'INV.out' output file name (if any)
21* 6 device out
22* 5 number of matrix types (next line)
23* 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD
24* 4 number of problems sizes
25* 1000 2000 3000 4000 values of N
26* 3 number of NB's
27* 4 30 35 values of NB
28* 2 number of process grids (ordered P & Q)
29* 4 2 values of P
30* 4 4 values of Q
31* 1.0 threshold
32*
33* Internal Parameters
34* ===================
35*
36* TOTMEM INTEGER, default = 2000000
37* TOTMEM is a machine-specific parameter indicating the
38* maximum amount of available memory in bytes.
39* The user should customize TOTMEM to his platform. Remember
40* to leave room in memory for the operating system, the BLACS
41* buffer, etc. For example, on a system with 8 MB of memory
42* per process (e.g., one processor on an Intel iPSC/860), the
43* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
44* code, BLACS buffer, etc). However, for PVM, we usually set
45* TOTMEM = 2000000. Some experimenting with the maximum value
46* of TOTMEM may be required.
47*
48* INTGSZ INTEGER, default = 4 bytes.
49* DBLESZ INTEGER, default = 8 bytes.
50* ZPLXSZ INTEGER, default = 16 bytes.
51* INTGSZ, DBLESZ and ZPLXSZ indicate the length in bytes on
52* the given platform for an integer, a double precision real
53* and a double precision complex.
54* MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ )
55*
56* All arrays used by SCALAPACK routines are allocated from
57* this array and referenced by pointers. The integer IPA,
58* for example, is a pointer to the starting element of MEM for
59* the matrix A.
60*
61* =====================================================================
62*
63* .. Parameters ..
64 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
65 $ lld_, mb_, m_, nb_, n_, rsrc_
66 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
67 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
68 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
69 INTEGER dblesz, intgsz, memsiz, ntests, totmem, zplxsz
70 COMPLEX*16 padval, zero
71 parameter( dblesz = 8, intgsz = 4, totmem = 2000000,
72 $ zplxsz = 16, memsiz = totmem / zplxsz,
73 $ ntests = 20,
74 $ padval = ( -9923.0d+0, -9923.0d+0 ),
75 $ zero = ( 0.0d+0, 0.0d+0 ) )
76* ..
77* .. Local Scalars ..
78 CHARACTER uplo
79 CHARACTER*3 mtyp
80 CHARACTER*6 passed
81 CHARACTER*80 outfile
82 LOGICAL check
83 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
84 $ ippiv, iprepad, ipostpad, ipiw, ipw, itemp, j,
85 $ k, ktests, kpass, kfail, kskip, l, lcm, lipiv,
86 $ liwork, lwork, mycol, myrow, n, nb, ngrids,
87 $ nmat, nmtyp, nnb, nout, np, npcol, nprocs,
88 $ nprow, nq, workiinv, workinv, worksiz
89 REAL thresh
90 DOUBLE PRECISION anorm, fresid, nops, rcond, tmflops
91* ..
92* .. Local Arrays ..
93 CHARACTER*3 mattyp( ntests )
94 INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
95 $ nval( ntests ), pval( ntests ),
96 $ qval( ntests )
97 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
98 COMPLEX*16 mem( memsiz )
99* ..
100* .. External Subroutines ..
101 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
102 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
103 $ blacs_pinfo, descinit, igsum2d, pzchekpad,
108* ..
109* .. External Functions ..
110 LOGICAL lsamen
111 INTEGER iceil, ilcm, numroc
112 DOUBLE PRECISION pzlange, pzlanhe, pzlansy, pzlantr
113 EXTERNAL iceil, ilcm, lsamen, numroc, pzlange,
115* ..
116* .. Intrinsic Functions ..
117 INTRINSIC dble, max
118* ..
119* .. Data Statements ..
120 DATA ktests, kpass, kfail, kskip /4*0/
121* ..
122* .. Executable Statements ..
123*
124* Get starting information
125*
126 CALL blacs_pinfo( iam, nprocs )
127 iaseed = 100
128 CALL pzinvinfo( outfile, nout, nmtyp, mattyp, ntests, nmat, nval,
129 $ ntests, nnb, nbval, ntests, ngrids, pval, ntests,
130 $ qval, ntests, thresh, mem, iam, nprocs )
131 check = ( thresh.GE.0.0e+0 )
132*
133* Loop over the different matrix types
134*
135 DO 40 i = 1, nmtyp
136*
137 mtyp = mattyp( i )
138*
139* Print headings
140*
141 IF( iam.EQ.0 ) THEN
142 WRITE( nout, fmt = * )
143 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
144 WRITE( nout, fmt = 9986 )
145 $ 'A is a general matrix.'
146 ELSE IF( lsamen( 3, mtyp, 'UTR' ) ) THEN
147 WRITE( nout, fmt = 9986 )
148 $ 'A is an upper triangular matrix.'
149 ELSE IF( lsamen( 3, mtyp, 'LTR' ) ) THEN
150 WRITE( nout, fmt = 9986 )
151 $ 'A is a lower triangular matrix.'
152 ELSE IF( lsamen( 3, mtyp, 'UPD' ) ) THEN
153 WRITE( nout, fmt = 9986 )
154 $ 'A is a Hermitian positive definite matrix.'
155 WRITE( nout, fmt = 9986 )
156 $ 'Only the upper triangular part will be '//
157 $ 'referenced.'
158 ELSE IF( lsamen( 3, mtyp, 'LPD' ) ) THEN
159 WRITE( nout, fmt = 9986 )
160 $ 'A is a Hermitian positive definite matrix.'
161 WRITE( nout, fmt = 9986 )
162 $ 'Only the lower triangular part will be '//
163 $ 'referenced.'
164 END IF
165 WRITE( nout, fmt = * )
166 WRITE( nout, fmt = 9995 )
167 WRITE( nout, fmt = 9994 )
168 WRITE( nout, fmt = * )
169 END IF
170*
171* Loop over different process grids
172*
173 DO 30 j = 1, ngrids
174*
175 nprow = pval( j )
176 npcol = qval( j )
177*
178* Make sure grid information is correct
179*
180 ierr( 1 ) = 0
181 IF( nprow.LT.1 ) THEN
182 IF( iam.EQ.0 )
183 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
184 ierr( 1 ) = 1
185 ELSE IF( npcol.LT.1 ) THEN
186 IF( iam.EQ.0 )
187 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
188 ierr( 1 ) = 1
189 ELSE IF( nprow*npcol.GT.nprocs ) THEN
190 IF( iam.EQ.0 )
191 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
192 ierr( 1 ) = 1
193 END IF
194*
195 IF( ierr( 1 ).GT.0 ) THEN
196 IF( iam.EQ.0 )
197 $ WRITE( nout, fmt = 9997 ) 'grid'
198 kskip = kskip + 1
199 GO TO 30
200 END IF
201*
202* Define process grid
203*
204 CALL blacs_get( -1, 0, ictxt )
205 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
206 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
207*
208* Go to bottom of loop if this case doesn't use my process
209*
210 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
211 $ GO TO 30
212*
213 DO 20 k = 1, nmat
214*
215 n = nval( k )
216*
217* Make sure matrix information is correct
218*
219 ierr( 1 ) = 0
220 IF( n.LT.1 ) THEN
221 IF( iam.EQ.0 )
222 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
223 ierr( 1 ) = 1
224 END IF
225*
226* Make sure no one had error
227*
228 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
229*
230 IF( ierr( 1 ).GT.0 ) THEN
231 IF( iam.EQ.0 )
232 $ WRITE( nout, fmt = 9997 ) 'matrix'
233 kskip = kskip + 1
234 GO TO 20
235 END IF
236*
237* Loop over different blocking sizes
238*
239 DO 10 l = 1, nnb
240*
241 nb = nbval( l )
242*
243* Make sure nb is legal
244*
245 ierr( 1 ) = 0
246 IF( nb.LT.1 ) THEN
247 ierr( 1 ) = 1
248 IF( iam.EQ.0 )
249 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
250 END IF
251*
252* Check all processes for an error
253*
254 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
255 $ 0 )
256*
257 IF( ierr( 1 ).GT.0 ) THEN
258 IF( iam.EQ.0 )
259 $ WRITE( nout, fmt = 9997 ) 'NB'
260 kskip = kskip + 1
261 GO TO 10
262 END IF
263*
264* Padding constants
265*
266 np = numroc( n, nb, myrow, 0, nprow )
267 nq = numroc( n, nb, mycol, 0, npcol )
268 IF( check ) THEN
269 iprepad = max( nb, np )
270 imidpad = nb
271 ipostpad = max( nb, nq )
272 ELSE
273 iprepad = 0
274 imidpad = 0
275 ipostpad = 0
276 END IF
277*
278* Initialize the array descriptor for the matrix A
279*
280 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
281 $ max( 1, np ) + imidpad, ierr( 1 ) )
282*
283* Check all processes for an error
284*
285 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
286 $ 0 )
287*
288 IF( ierr( 1 ).LT.0 ) THEN
289 IF( iam.EQ.0 )
290 $ WRITE( nout, fmt = 9997 ) 'descriptor'
291 kskip = kskip + 1
292 GO TO 10
293 END IF
294*
295* Assign pointers into MEM for ScaLAPACK arrays, A is
296* allocated starting at position MEM( IPREPAD+1 )
297*
298 ipa = iprepad+1
299*
300 lcm = ilcm( nprow, npcol )
301 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
302*
303* Pivots are needed by LU factorization
304*
305 ippiv = ipa + desca( lld_ ) * nq + ipostpad +
306 $ iprepad
307 lipiv = iceil( intgsz * ( np + nb ), zplxsz )
308 ipw = ippiv + lipiv + ipostpad + iprepad
309*
310 lwork = max( 1, np * desca( nb_ ) )
311 workinv = lwork + ipostpad
312*
313* Figure the amount of workspace required by the
314* general matrix inversion
315*
316 IF( nprow.EQ.npcol ) THEN
317 liwork = nq + desca( nb_ )
318 ELSE
319*
320* change the integer workspace needed for PDGETRI
321* LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) *
322* $ ICEIL( ICEIL( DESCA( LLD_ ),
323* $ DESCA( MB_ ) ), LCM / NPROW ) )
324* $ + NQ
325 liwork = numroc( desca( m_ ) +
326 $ desca( mb_ ) * nprow
327 $ + mod( 1 - 1, desca( mb_ ) ), desca( nb_ ),
328 $ mycol, desca( csrc_ ), npcol ) +
329 $ max( desca( mb_ ) * iceil( iceil(
330 $ numroc( desca( m_ ) + desca( mb_ ) * nprow,
331 $ desca( mb_ ), myrow, desca( rsrc_ ), nprow ),
332 $ desca( mb_ ) ), lcm / nprow ), desca( nb_ ) )
333*
334 END IF
335 workiinv = iceil( liwork*intgsz, zplxsz ) +
336 $ ipostpad
337 ipiw = ipw + workinv + iprepad
338 worksiz = workinv + iprepad + workiinv
339*
340 ELSE
341*
342* No pivots or workspace needed for triangular or
343* Hermitian positive definite matrices.
344*
345 ipw = ipa + desca( lld_ ) * nq + ipostpad + iprepad
346 worksiz = 1 + ipostpad
347*
348 END IF
349*
350 IF( check ) THEN
351*
352* Figure amount of work space for the norm
353* computations
354*
355 IF( lsamen( 3, mtyp, 'GEN' ).OR.
356 $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
357 itemp = nq
358 ELSE
359 itemp = 2 * nq + np
360 IF( nprow.NE.npcol ) THEN
361 itemp = itemp +
362 $ nb * iceil( iceil( np, nb ),
363 $ lcm / nprow )
364 END IF
365 END IF
366 worksiz = max( worksiz-ipostpad,
367 $ iceil( dblesz * itemp, zplxsz ) )
368*
369* Figure the amount of workspace required by the
370* checking routine
371*
372 worksiz = max( worksiz, 2 * nb * max( 1, np ) ) +
373 $ ipostpad
374*
375 END IF
376*
377* Check for adequate memory for problem size
378*
379 ierr( 1 ) = 0
380 IF( ipw+worksiz.GT.memsiz ) THEN
381 IF( iam.EQ.0 )
382 $ WRITE( nout, fmt = 9996 ) 'inversion',
383 $ ( ipw + worksiz ) * zplxsz
384 ierr( 1 ) = 1
385 END IF
386*
387* Check all processes for an error
388*
389 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
390 $ 0 )
391*
392 IF( ierr( 1 ).GT.0 ) THEN
393 IF( iam.EQ.0 )
394 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
395 kskip = kskip + 1
396 GO TO 10
397 END IF
398*
399 IF( lsamen( 3, mtyp, 'GEN' ).OR.
400 $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
401*
402* Generate a general diagonally dominant matrix A
403*
404 CALL pzmatgen( ictxt, 'N', 'D', desca( m_ ),
405 $ desca( n_ ), desca( mb_ ),
406 $ desca( nb_ ), mem( ipa ),
407 $ desca( lld_ ), desca( rsrc_ ),
408 $ desca( csrc_ ), iaseed, 0, np, 0,
409 $ nq, myrow, mycol, nprow, npcol )
410*
411 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
412*
413* Generate a Hermitian positive definite matrix A
414*
415 CALL pzmatgen( ictxt, 'H', 'D', desca( m_ ),
416 $ desca( n_ ), desca( mb_ ),
417 $ desca( nb_ ), mem( ipa ),
418 $ desca( lld_ ), desca( rsrc_ ),
419 $ desca( csrc_ ), iaseed, 0, np, 0,
420 $ nq, myrow, mycol, nprow, npcol )
421*
422 END IF
423*
424* Zeros not-referenced part of A, if any.
425*
426 IF( lsamen( 1, mtyp, 'U' ) ) THEN
427*
428 uplo = 'U'
429 CALL pzlaset( 'Lower', n-1, n-1, zero, zero,
430 $ mem( ipa ), 2, 1, desca )
431*
432 ELSE IF( lsamen( 1, mtyp, 'L' ) ) THEN
433*
434 uplo = 'L'
435 CALL pzlaset( 'Upper', n-1, n-1, zero, zero,
436 $ mem( ipa ), 1, 2, desca )
437*
438 ELSE
439*
440 uplo = 'G'
441*
442 END IF
443*
444* Need 1-norm of A for checking
445*
446 IF( check ) THEN
447*
448 CALL pzfillpad( ictxt, np, nq, mem( ipa-iprepad ),
449 $ desca( lld_ ), iprepad, ipostpad,
450 $ padval )
451 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
452 $ mem( ipw-iprepad ),
453 $ worksiz-ipostpad, iprepad,
454 $ ipostpad, padval )
455*
456 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
457*
458 CALL pzfillpad( ictxt, lipiv, 1,
459 $ mem( ippiv-iprepad ), lipiv,
460 $ iprepad, ipostpad, padval )
461 anorm = pzlange( '1', n, n, mem( ipa ), 1, 1,
462 $ desca, mem( ipw ) )
463 CALL pzchekpad( ictxt, 'PZLANGE', np, nq,
464 $ mem( ipa-iprepad ),
465 $ desca( lld_ ),
466 $ iprepad, ipostpad, padval )
467 CALL pzchekpad( ictxt, 'PZLANGE',
468 $ worksiz-ipostpad, 1,
469 $ mem( ipw-iprepad ),
470 $ worksiz-ipostpad,
471 $ iprepad, ipostpad, padval )
472 CALL pzfillpad( ictxt, workinv-ipostpad, 1,
473 $ mem( ipw-iprepad ),
474 $ workinv-ipostpad,
475 $ iprepad, ipostpad, padval )
476 CALL pzfillpad( ictxt, workiinv-ipostpad, 1,
477 $ mem( ipiw-iprepad ),
478 $ workiinv-ipostpad, iprepad,
479 $ ipostpad, padval )
480 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
481*
482 anorm = pzlantr( '1', uplo, 'Non unit', n, n,
483 $ mem( ipa ), 1, 1, desca,
484 $ mem( ipw ) )
485 CALL pzchekpad( ictxt, 'PZLANTR', np, nq,
486 $ mem( ipa-iprepad ),
487 $ desca( lld_ ),
488 $ iprepad, ipostpad, padval )
489 CALL pzchekpad( ictxt, 'PZLANTR',
490 $ worksiz-ipostpad, 1,
491 $ mem( ipw-iprepad ),
492 $ worksiz-ipostpad,
493 $ iprepad, ipostpad, padval )
494*
495 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
496*
497 anorm = pzlanhe( '1', uplo, n, mem( ipa ), 1, 1,
498 $ desca, mem( ipw ) )
499 CALL pzchekpad( ictxt, 'PZLANHE', np, nq,
500 $ mem( ipa-iprepad ),
501 $ desca( lld_ ),
502 $ iprepad, ipostpad, padval )
503 CALL pzchekpad( ictxt, 'PZLANHE',
504 $ worksiz-ipostpad, 1,
505 $ mem( ipw-iprepad ),
506 $ worksiz-ipostpad,
507 $ iprepad, ipostpad, padval )
508*
509 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'SY' ) ) THEN
510*
511 CALL pzfillpad( ictxt, lipiv, 1,
512 $ mem( ippiv-iprepad ), lipiv,
513 $ iprepad, ipostpad, padval )
514 anorm = pzlansy( '1', uplo, n, mem( ipa ), 1, 1,
515 $ desca, mem( ipw ) )
516 CALL pzchekpad( ictxt, 'PZLANSY', np, nq,
517 $ mem( ipa-iprepad ),
518 $ desca( lld_ ),
519 $ iprepad, ipostpad, padval )
520 CALL pzchekpad( ictxt, 'PZLANSY',
521 $ worksiz-ipostpad, 1,
522 $ mem( ipw-iprepad ),
523 $ worksiz-ipostpad,
524 $ iprepad,ipostpad, padval )
525*
526 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'HE' ) ) THEN
527 CALL pzfillpad( ictxt, lipiv, 1,
528 $ mem( ippiv-iprepad ), lipiv,
529 $ iprepad, ipostpad, padval )
530 anorm = pzlanhe( '1', uplo, n, mem( ipa ), 1, 1,
531 $ desca, mem( ipw ) )
532 CALL pzchekpad( ictxt, 'PZLANHE', np, nq,
533 $ mem( ipa-iprepad ),
534 $ desca( lld_ ),
535 $ iprepad, ipostpad, padval )
536 CALL pzchekpad( ictxt, 'PZLANHE',
537 $ worksiz-ipostpad, 1,
538 $ mem( ipw-iprepad ),
539 $ worksiz-ipostpad,
540 $ iprepad, ipostpad, padval )
541*
542 END IF
543*
544 END IF
545*
546 CALL slboot()
547 CALL blacs_barrier( ictxt, 'All' )
548*
549 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
550*
551* Perform LU factorization
552*
553 CALL sltimer( 1 )
554 CALL pzgetrf( n, n, mem( ipa ), 1, 1, desca,
555 $ mem( ippiv ), info )
556 CALL sltimer( 1 )
557*
558 IF( check ) THEN
559*
560* Check for memory overwrite
561*
562 CALL pzchekpad( ictxt, 'PZGETRF', np, nq,
563 $ mem( ipa-iprepad ),
564 $ desca( lld_ ),
565 $ iprepad, ipostpad, padval )
566 CALL pzchekpad( ictxt, 'PZGETRF', lipiv, 1,
567 $ mem( ippiv-iprepad ), lipiv,
568 $ iprepad, ipostpad, padval )
569 END IF
570*
571* Perform the general matrix inversion
572*
573 CALL sltimer( 2 )
574 CALL pzgetri( n, mem( ipa ), 1, 1, desca,
575 $ mem( ippiv ), mem( ipw ), lwork,
576 $ mem( ipiw ), liwork, info )
577 CALL sltimer( 2 )
578*
579 IF( check ) THEN
580*
581* Check for memory overwrite
582*
583 CALL pzchekpad( ictxt, 'PZGETRI', np, nq,
584 $ mem( ipa-iprepad ),
585 $ desca( lld_ ),
586 $ iprepad, ipostpad, padval )
587 CALL pzchekpad( ictxt, 'PZGETRI', lipiv, 1,
588 $ mem( ippiv-iprepad ), lipiv,
589 $ iprepad, ipostpad, padval )
590 CALL pzchekpad( ictxt, 'PZGETRI',
591 $ workiinv-ipostpad, 1,
592 $ mem( ipiw-iprepad ),
593 $ workiinv-ipostpad,
594 $ iprepad, ipostpad, padval )
595 CALL pzchekpad( ictxt, 'PZGETRI',
596 $ workinv-ipostpad, 1,
597 $ mem( ipw-iprepad ),
598 $ workinv-ipostpad,
599 $ iprepad, ipostpad, padval )
600 END IF
601*
602 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
603*
604* Perform the general matrix inversion
605*
606 CALL sltimer( 2 )
607 CALL pztrtri( uplo, 'Non unit', n, mem( ipa ), 1,
608 $ 1, desca, info )
609 CALL sltimer( 2 )
610*
611 IF( check ) THEN
612*
613* Check for memory overwrite
614*
615 CALL pzchekpad( ictxt, 'PZTRTRI', np, nq,
616 $ mem( ipa-iprepad ),
617 $ desca( lld_ ),
618 $ iprepad, ipostpad, padval )
619 END IF
620*
621 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
622*
623* Perform Cholesky factorization
624*
625 CALL sltimer( 1 )
626 CALL pzpotrf( uplo, n, mem( ipa ), 1, 1, desca,
627 $ info )
628 CALL sltimer( 1 )
629*
630 IF( check ) THEN
631*
632* Check for memory overwrite
633*
634 CALL pzchekpad( ictxt, 'PZPOTRF', np, nq,
635 $ mem( ipa-iprepad ),
636 $ desca( lld_ ),
637 $ iprepad, ipostpad, padval )
638 END IF
639*
640* Perform the Hermitian positive definite matrix
641* inversion
642*
643 CALL sltimer( 2 )
644 CALL pzpotri( uplo, n, mem( ipa ), 1, 1, desca,
645 $ info )
646 CALL sltimer( 2 )
647*
648 IF( check ) THEN
649*
650* Check for memory overwrite
651*
652 CALL pzchekpad( ictxt, 'PZPOTRI', np, nq,
653 $ mem( ipa-iprepad ),
654 $ desca( lld_ ),
655 $ iprepad, ipostpad, padval )
656 END IF
657*
658 END IF
659*
660 IF( check ) THEN
661*
662 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
663 $ mem( ipw-iprepad ),
664 $ worksiz-ipostpad, iprepad,
665 $ ipostpad, padval )
666*
667* Compute fresid = || inv(A)*A-I ||
668*
669 CALL pzinvchk( mtyp, n, mem( ipa ), 1, 1, desca,
670 $ iaseed, anorm, fresid, rcond,
671 $ mem( ipw ) )
672*
673* Check for memory overwrite
674*
675 CALL pzchekpad( ictxt, 'PZINVCHK', np, nq,
676 $ mem( ipa-iprepad ),
677 $ desca( lld_ ),
678 $ iprepad, ipostpad, padval )
679 CALL pzchekpad( ictxt, 'PZINVCHK',
680 $ worksiz-ipostpad, 1,
681 $ mem( ipw-iprepad ),
682 $ worksiz-ipostpad, iprepad,
683 $ ipostpad, padval )
684*
685* Test residual and detect NaN result
686*
687 IF( fresid.LE.thresh .AND. info.EQ.0 .AND.
688 $ ( (fresid-fresid) .EQ. 0.0d+0 ) ) THEN
689 kpass = kpass + 1
690 passed = 'PASSED'
691 ELSE
692 kfail = kfail + 1
693 IF( info.GT.0 ) THEN
694 passed = 'SINGUL'
695 ELSE
696 passed = 'FAILED'
697 END IF
698 END IF
699*
700 ELSE
701*
702* Don't perform the checking, only the timing
703* operation
704*
705 kpass = kpass + 1
706 fresid = fresid - fresid
707 passed = 'BYPASS'
708*
709 END IF
710*
711* Gather maximum of all CPU and WALL clock timings
712*
713 CALL slcombine( ictxt, 'All', '>', 'W', 2, 1, wtime )
714 CALL slcombine( ictxt, 'All', '>', 'C', 2, 1, ctime )
715*
716* Print results
717*
718 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
719*
720 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
721*
722* 8/3 N^3 - N^2 flops for LU factorization
723*
724 nops = ( 8.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) -
725 $ dble( n )**2
726*
727* 16/3 N^3 for matrix inversion
728*
729 nops = nops +
730 $ ( 16.0d+0 / 3.0d+0 ) * ( dble( n )**3 )
731*
732 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
733*
734* 4/3 N^3 + 2 N^2 for triangular matrix inversion
735*
736 ctime(1) = 0.0d+0
737 wtime(1) = 0.0d+0
738 nops = ( 4.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
739 $ 2.0d+0 * ( dble( n )**2 )
740*
741 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'PD' ) ) THEN
742*
743* 4/3 N^3 + 3 N^2 flops for Cholesky factorization
744*
745 nops = ( 4.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
746 $ 2.0d+0 * ( dble( n )**2 )
747*
748* 8/3 N^3 + 5 N^2 flops for Cholesky inversion
749*
750 nops = nops +
751 $ ( 8.0d+0 / 3.0d+0 ) * ( dble( n )**3 ) +
752 $ 5.0d+0 * ( dble( n )**2 )
753*
754 END IF
755*
756* Figure total megaflops -- factorization and
757* inversion, for WALL and CPU time, and print
758* output.
759*
760* Print WALL time if machine supports it
761*
762 IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN
763 tmflops = nops /
764 $ ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )
765 ELSE
766 tmflops = 0.0d+0
767 END IF
768*
769 IF( wtime( 2 ) .GE. 0.0d+0 )
770 $ WRITE( nout, fmt = 9993 ) 'WALL', n, nb, nprow,
771 $ npcol, wtime( 1 ), wtime( 2 ), tmflops,
772 $ rcond, fresid, passed
773*
774* Print CPU time if machine supports it
775*
776 IF( ctime( 1 ) + ctime( 2 ) .GT. 0.0d+0 ) THEN
777 tmflops = nops /
778 $ ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )
779 ELSE
780 tmflops = 0.0d+0
781 END IF
782*
783 IF( ctime( 2 ) .GE. 0.0d+0 )
784 $ WRITE( nout, fmt = 9993 ) 'CPU ', n, nb, nprow,
785 $ npcol, ctime( 1 ), ctime( 2 ), tmflops,
786 $ rcond, fresid, passed
787 END IF
788*
789 10 CONTINUE
790*
791 20 CONTINUE
792*
793 CALL blacs_gridexit( ictxt )
794*
795 30 CONTINUE
796*
797 40 CONTINUE
798*
799* Print out ending messages and close output file
800*
801 IF( iam.EQ.0 ) THEN
802 ktests = kpass + kfail + kskip
803 WRITE( nout, fmt = * )
804 WRITE( nout, fmt = 9992 ) ktests
805 IF( check ) THEN
806 WRITE( nout, fmt = 9991 ) kpass
807 WRITE( nout, fmt = 9989 ) kfail
808 ELSE
809 WRITE( nout, fmt = 9990 ) kpass
810 END IF
811 WRITE( nout, fmt = 9988 ) kskip
812 WRITE( nout, fmt = * )
813 WRITE( nout, fmt = * )
814 WRITE( nout, fmt = 9987 )
815 IF( nout.NE.6 .AND. nout.NE.0 )
816 $ CLOSE ( nout )
817 END IF
818*
819 CALL blacs_exit( 0 )
820*
821 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
822 $ '; It should be at least 1' )
823 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
824 $ i4 )
825 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
826 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
827 $ i11 )
828 9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ',
829 $ ' MFLOPS Cond Resid CHECK' )
830 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ',
831 $ '----------- ------- ------- ------' )
832 9993 FORMAT( a4, 1x, i5, 1x, i3, 1x, i5, 1x, i5, 1x, f8.2, 1x, f8.2,
833 $ 1x, f11.2, 1x, f7.1, 1x, f7.2, 1x, a6 )
834 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
835 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
836 9990 FORMAT( i5, ' tests completed without checking.' )
837 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
838 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
839 9987 FORMAT( 'END OF TESTS.' )
840 9986 FORMAT( a )
841*
842 stop
843*
844* End of PZINVDRIVER
845*
846 END
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pzmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
logical function lsamen(n, ca, cb)
Definition pblastst.f:1457
#define max(A, B)
Definition pcgemr.c:180
subroutine pzlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pzblastst.f:7509
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pzchekpad.f:3
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pzfillpad.f:2
subroutine pzgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition pzgetrf.f:2
subroutine pzgetri(n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
Definition pzgetri.f:3
subroutine pzinvchk(mattyp, n, a, ia, ja, desca, iaseed, anorm, fresid, rcond, work)
Definition pzinvchk.f:3
program pzinvdriver
Definition pzinvdriver.f:1
subroutine pzinvinfo(summry, nout, nmtyp, mattyp, ldmtyp, nmat, nval, ldnval, nnb, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pzinvinfo.f:5
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
Definition pzlange.f:3
double precision function pzlanhe(norm, uplo, n, a, ia, ja, desca, work)
Definition pzlanhe.f:3
double precision function pzlansy(norm, uplo, n, a, ia, ja, desca, work)
Definition pzlansy.f:3
double precision function pzlantr(norm, uplo, diag, m, n, a, ia, ja, desca, work)
Definition pzlantr.f:3
subroutine pzpotrf(uplo, n, a, ia, ja, desca, info)
Definition pzpotrf.f:2
subroutine pzpotri(uplo, n, a, ia, ja, desca, info)
Definition pzpotri.f:2
subroutine pztrtri(uplo, diag, n, a, ia, ja, desca, info)
Definition pztrtri.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267