dd/d90/pdludriver_8f_source.html

      PROGRAM pdludriver

*

*  -- ScaLAPACK testing driver (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 1, 1997

*

*  Purpose

*  ========

*

*  PDLUDRIVER is the main test program for the DOUBLE PRECISION

*  SCALAPACK LU routines.  This test driver performs an LU factorization

*  and solve. If the input matrix is non-square, only the factorization

*  is performed.  Condition estimation and iterative refinement are

*  optionally performed.

*

*  The program must be driven by a short data file.  An annotated

*  example of a data file can be obtained by deleting the first 3

*  characters from the following 18 lines:

*  'SCALAPACK, Version 2.0,  LU factorization input file'

*  'Intel iPSC/860 hypercube, gamma model.'

*  'LU.out'             output file name (if any)

*  6                    device out

*  1                    number of problems sizes

*  31 201               values of M

*  31 201               values of N

*  1                    number of NB's

*  2                    values of NB

*  1                    number of NRHS's

*  1                    values of NRHS

*  1                    number of NBRHS's

*  1                    values of NBRHS

*  1                    number of process grids (ordered pairs of P & Q)

*  2 1 4 2 3 8          values of P

*  2 4 1 3 2 1          values of Q

*  1.0                  threshold

*  T                    (T or F) Test Cond. Est. and Iter. Ref. Routines

*

*

*  Internal Parameters

*  ===================

*

*  TOTMEM   INTEGER, default = 2000000

*           TOTMEM is a machine-specific parameter indicating the

*           maximum amount of available memory in bytes.

*           The user should customize TOTMEM to his platform.  Remember

*           to leave room in memory for the operating system, the BLACS

*           buffer, etc.  For example, on a system with 8 MB of memory

*           per process (e.g., one processor on an Intel iPSC/860), the

*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,

*           code, BLACS buffer, etc).  However, for PVM, we usually set

*           TOTMEM = 2000000.  Some experimenting with the maximum value

*           of TOTMEM may be required.

*

*  INTGSZ   INTEGER, default = 4 bytes.

*  DBLESZ   INTEGER, default = 8 bytes.

*           INTGSZ and DBLESZ indicate the length in bytes on the

*           given platform for an integer and a double precision real.

*  MEM      DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )

*

*           All arrays used by SCALAPACK routines are allocated from

*           this array and referenced by pointers.  The integer IPA,

*           for example, is a pointer to the starting element of MEM for

*           the matrix A.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      INTEGER            dblesz, intgsz, memsiz, ntests, totmem

      DOUBLE PRECISION   padval, zero

      parameter( dblesz = 8, intgsz = 4, totmem = 4000000,

     $                     memsiz = totmem / dblesz, ntests = 20,

     $                     padval = -9923.0d+0, zero = 0.0d+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            check, est

      CHARACTER*6        passed

      CHARACTER*80       outfile

      INTEGER            hh, i, iam, iaseed, ibseed, ictxt, imidpad,

     $                   info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,

     $                   ipostpad, ippiv, iprepad, ipw, ipw2, j, k,

     $                   kfail, kk, kpass, kskip, ktests, lcm, lcmq,

     $                   lipiv, liwork, lwork, lw2, m, maxmn,

     $                   minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,

     $                   ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,

     $                   nprocs, nprow, nq, nrhs, worksiz

      REAL               thresh

      DOUBLE PRECISION   anorm, anorm1, fresid, nops, rcond,

     $                   sresid, sresid2, tmflops

*     ..

*     .. Local Arrays ..

      INTEGER            desca( dlen_ ), descb( dlen_ ), ierr( 1 ),

     $                   mval( ntests ), nbrval( ntests ),

     $                   nbval( ntests ), nrval( ntests ),

     $                   nval( ntests ), pval( ntests ),

     $                   qval( ntests )

      DOUBLE PRECISION   ctime( 2 ), mem( memsiz ), wtime( 2 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_barrier, blacs_exit, blacs_get,

     $                   blacs_gridexit, blacs_gridinfo, blacs_gridinit,

     $                   blacs_pinfo, descinit, igsum2d, pdchekpad,

     $                   pdfillpad, pdgecon, pdgerfs,

     $                   pdgetrf, pdgetrrv, pdgetrs,

     $                   pdlafchk, pdlaschk, pdluinfo,

     $                   pdmatgen, slboot, slcombine, sltimer

*     ..

*     .. External Functions ..

      INTEGER            iceil, ilcm, numroc

      DOUBLE PRECISION   pdlange

      EXTERNAL           iceil, ilcm, numroc, pdlange

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          dble, max, min

*     ..

*     .. Data Statements ..

      DATA               kfail, kpass, kskip, ktests / 4*0 /

*     ..

*     .. Executable Statements ..

*

*     Get starting information

*

      CALL blacs_pinfo( iam, nprocs )

      iaseed = 100

      ibseed = 200

      CALL pdluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,

     $               nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,

     $               ntests, ngrids, pval, ntests, qval, ntests, thresh,

     $               est, mem, iam, nprocs )

      check = ( thresh.GE.0.0e+0 )

*

*     Print headings

*

      IF( iam.EQ.0 ) THEN

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9995 )

         WRITE( nout, fmt = 9994 )

         WRITE( nout, fmt = * )

      END IF

*

*     Loop over different process grids

*

      DO 50 i = 1, ngrids

*

         nprow = pval( i )

         npcol = qval( i )

*

*        Make sure grid information is correct

*

         ierr( 1 ) = 0

         IF( nprow.LT.1 ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow

            ierr( 1 ) = 1

         ELSE IF( npcol.LT.1 ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol

            ierr( 1 ) = 1

         ELSE IF( nprow*npcol.GT.nprocs ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs

            ierr( 1 ) = 1

         END IF

*

         IF( ierr( 1 ).GT.0 ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9997 ) 'grid'

            kskip = kskip + 1

            GO TO 50

         END IF

*

*        Define process grid

*

         CALL blacs_get( -1, 0, ictxt )

         CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )

         CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

*        Go to bottom of process grid loop if this case doesn't use my

*        process

*

         IF( myrow.GE.nprow .OR. mycol.GE.npcol )

     $      GO TO 50

*

         DO 40 j = 1, nmat

*

            m = mval( j )

            n = nval( j )

*

*           Make sure matrix information is correct

*

            ierr( 1 ) = 0

            IF( m.LT.1 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m

               ierr( 1 ) = 1

            ELSE IF( n.LT.1 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n

               ierr( 1 ) = 1

            END IF

*

*           Check all processes for an error

*

            CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )

*

            IF( ierr( 1 ).GT.0 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9997 ) 'matrix'

               kskip = kskip + 1

               GO TO 40

            END IF

*

            DO 30 k = 1, nnb

*

               nb = nbval( k )

*

*              Make sure nb is legal

*

               ierr( 1 ) = 0

               IF( nb.LT.1 ) THEN

                  ierr( 1 ) = 1

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb

               END IF

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )

*

               IF( ierr( 1 ).GT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'NB'

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

*              Padding constants

*

               mp = numroc( m, nb, myrow, 0, nprow )

               np = numroc( n, nb, myrow, 0, nprow )

               nq = numroc( n, nb, mycol, 0, npcol )

               IF( check ) THEN

                  iprepad  = max( nb, mp )

                  imidpad  = nb

                  ipostpad = max( nb, nq )

               ELSE

                  iprepad  = 0

                  imidpad  = 0

                  ipostpad = 0

               END IF

*

*              Initialize the array descriptor for the matrix A

*

               CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,

     $                        max( 1, mp )+imidpad, ierr( 1 ) )

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )

*

               IF( ierr( 1 ).LT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'descriptor'

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

*              Assign pointers into MEM for SCALAPACK arrays, A is

*              allocated starting at position MEM( IPREPAD+1 )

*

               ipa = iprepad+1

               IF( est .AND. m.EQ.n ) THEN

                  ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad

                  ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad

               ELSE

                  ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad

               END IF

               lipiv = iceil( intgsz*( mp+nb ), dblesz )

               ipw = ippiv + lipiv + ipostpad + iprepad

*

               IF( check ) THEN

*

*                 Calculate the amount of workspace required by the

*                 checking routines PDLANGE, PDGETRRV, and

*                 PDLAFCHK

*

                  worksiz = max( 2, nq )

*

                  worksiz = max( worksiz, mp*desca( nb_ )+

     $                      nq*desca( mb_ ) )

*

                  worksiz = max( worksiz, mp * desca( nb_ ) )

*

                  worksiz = worksiz + ipostpad

*

               ELSE

*

                  worksiz = ipostpad

*

               END IF

*

*              Check for adequate memory for problem size

*

               ierr( 1 ) = 0

               IF( ipw+worksiz.GT.memsiz ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9996 ) 'factorization',

     $                      ( ipw+worksiz )*dblesz

                  ierr( 1 ) = 1

               END IF

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )

*

               IF( ierr( 1 ).GT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'MEMORY'

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

*              Generate matrix A of Ax = b

*

               CALL pdmatgen( ictxt, 'No transpose', 'No transpose',

     $                        desca( m_ ), desca( n_ ), desca( mb_ ),

     $                        desca( nb_ ), mem( ipa ), desca( lld_ ),

     $                        desca( rsrc_ ), desca( csrc_ ), iaseed, 0,

     $                        mp, 0, nq, myrow, mycol, nprow, npcol )

*

*              Calculate inf-norm of A for residual error-checking

*

               IF( check ) THEN

                  CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),

     $                            desca( lld_ ), iprepad, ipostpad,

     $                            padval )

                  CALL pdfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),

     $                            lipiv, iprepad, ipostpad, padval )

                  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,

     $                            mem( ipw-iprepad ), worksiz-ipostpad,

     $                            iprepad, ipostpad, padval )

                  anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1, desca,

     $                             mem( ipw ) )

                  anorm1 = pdlange( '1', m, n, mem( ipa ), 1, 1, desca,

     $                             mem( ipw ) )

                  CALL pdchekpad( ictxt, 'PDLANGE', mp, nq,

     $                            mem( ipa-iprepad ), desca( lld_ ),

     $                            iprepad, ipostpad, padval )

                  CALL pdchekpad( ictxt, 'PDLANGE', worksiz-ipostpad,

     $                            1, mem( ipw-iprepad ),

     $                            worksiz-ipostpad, iprepad, ipostpad,

     $                            padval )

               END IF

*

               IF( est .AND. m.EQ.n ) THEN

                  CALL pdmatgen( ictxt, 'No transpose', 'No transpose',

     $                           desca( m_ ), desca( n_ ), desca( mb_ ),

     $                           desca( nb_ ), mem( ipa0 ),

     $                           desca( lld_ ), desca( rsrc_ ),

     $                           desca( csrc_ ), iaseed, 0, mp, 0, nq,

     $                           myrow, mycol, nprow, npcol )

                  IF( check )

     $               CALL pdfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),

     $                               desca( lld_ ), iprepad, ipostpad,

     $                               padval )

               END IF

*

               CALL slboot()

               CALL blacs_barrier( ictxt, 'All' )

               CALL sltimer( 1 )

*

*              Perform LU factorization

*

               CALL pdgetrf( m, n, mem( ipa ), 1, 1, desca,

     $                       mem( ippiv ), info )

*

               CALL sltimer( 1 )

*

               IF( info.NE.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = * ) 'PDGETRF INFO=', info

                  kfail = kfail + 1

                  rcond = zero

                  GO TO 30

               END IF

*

               IF( check ) THEN

*

*                 Check for memory overwrite in LU factorization

*

                  CALL pdchekpad( ictxt, 'PDGETRF', mp, nq,

     $                            mem( ipa-iprepad ), desca( lld_ ),

     $                            iprepad, ipostpad, padval )

                  CALL pdchekpad( ictxt, 'PDGETRF', lipiv, 1,

     $                            mem( ippiv-iprepad ), lipiv, iprepad,

     $                            ipostpad, padval )

               END IF

*

               IF( m.NE.n ) THEN

*

*                 For non-square matrices, factorization only

*

                  nrhs = 0

                  nbrhs = 0

*

                  IF( check ) THEN

*

*                    Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)

*

                     CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,

     $                              mem( ippiv ), mem( ipw ) )

                     CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,

     $                              desca, iaseed, anorm, fresid,

     $                              mem( ipw ) )

*

*                    Check for memory overwrite

*

                     CALL pdchekpad( ictxt, 'PDGETRRV', mp, nq,

     $                               mem( ipa-iprepad ), desca( lld_ ),

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, 'PDGETRRV', lipiv, 1,

     $                               mem( ippiv-iprepad ), lipiv,

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, 'PDGETRRV',

     $                               worksiz-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               worksiz-ipostpad, iprepad,

     $                               ipostpad, padval )

*

*                    Test residual and detect NaN result

*

                     IF( ( fresid.LE.thresh          ) .AND.

     $                   ( (fresid-fresid).EQ.0.0d+0 ) ) THEN

                        kpass = kpass + 1

                        passed = 'PASSED'

                     ELSE

                        kfail = kfail + 1

                        passed = 'FAILED'

                        IF( myrow.EQ.0 .AND. mycol.EQ.0 )

     $                     WRITE( nout, fmt = 9986 ) fresid

                     END IF

*

                  ELSE

*

*                    Don't perform the checking, only timing

*

                     kpass = kpass + 1

                     fresid = fresid - fresid

                     passed = 'BYPASS'

*

                  END IF

*

*                 Gather maximum of all CPU and WALL clock timings

*

                  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,

     $                            wtime )

                  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,

     $                            ctime )

*

*                 Print results

*

                  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN

*

                     maxmn = max( m, n )

                     minmn = min( m, n )

*

*                    M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU

*                    factorization when M >= N

*

                     nops = dble( maxmn )*( dble( minmn )**2 ) -

     $                      (1.0d+0 / 3.0d+0)*( dble( minmn )**3 ) -

     $                      (1.0d+0 / 2.0d+0)*( dble( minmn )**2 )

*

*                    Calculate total megaflops -- factorization only,

*                    -- for WALL and CPU time, and print output

*

*                    Print WALL time if machine supports it

*

                     IF( wtime( 1 ).GT.0.0d+0 ) THEN

                        tmflops = nops / ( wtime( 1 ) * 1.0d+6 )

                     ELSE

                        tmflops = 0.0d+0

                     END IF

*

                     wtime( 2 ) = 0.0d+0

                     IF( wtime( 1 ).GE.0.0d+0 )

     $                  WRITE( nout, fmt = 9993 ) 'WALL', m, n, nb,

     $                         nrhs, nbrhs, nprow, npcol, wtime( 1 ),

     $                         wtime( 2 ), tmflops, passed

*

*                    Print CPU time if machine supports it

*

                     IF( ctime( 1 ).GT.0.0d+0 ) THEN

                        tmflops = nops / ( ctime( 1 ) * 1.0d+6 )

                     ELSE

                        tmflops = 0.0d+0

                     END IF

*

                     ctime( 2 ) = 0.0d+0

                     IF( ctime( 1 ).GE.0.0d+0 )

     $                  WRITE( nout, fmt = 9993 ) 'CPU ', m, n, nb,

     $                         nrhs, nbrhs, nprow, npcol, ctime( 1 ),

     $                         ctime( 2 ), tmflops, passed

                  END IF

*

               ELSE

*

*                 If M = N

*

                  IF( est ) THEN

*

*                    Calculate workspace required for PDGECON

*

                     lwork = max( 1, 2*np ) + max( 1, 2*nq ) +

     $                       max( 2, desca( nb_ )*

     $                       max( 1, iceil( nprow-1, npcol ) ),

     $                       nq + desca( nb_ )*

     $                       max( 1, iceil( npcol-1, nprow ) ) )

                     ipw2  = ipw + lwork + ipostpad + iprepad

                     liwork = max( 1, np )

                     lw2 = iceil( liwork*intgsz, dblesz ) + ipostpad

*

                     ierr( 1 ) = 0

                     IF( ipw2+lw2.GT.memsiz ) THEN

                        IF( iam.EQ.0 )

     $                     WRITE( nout, fmt = 9996 )'cond est',

     $                     ( ipw2+lw2 )*dblesz

                        ierr( 1 ) = 1

                     END IF

*

*                    Check all processes for an error

*

                     CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,

     $                             -1, 0 )

*

                     IF( ierr( 1 ).GT.0 ) THEN

                        IF( iam.EQ.0 )

     $                     WRITE( nout, fmt = 9997 ) 'MEMORY'

                        kskip = kskip + 1

                        GO TO 30

                     END IF

*

                     IF( check ) THEN

                        CALL pdfillpad( ictxt, lwork, 1,

     $                                  mem( ipw-iprepad ), lwork,

     $                                  iprepad, ipostpad, padval )

                        CALL pdfillpad( ictxt, lw2-ipostpad, 1,

     $                                  mem( ipw2-iprepad ),

     $                                  lw2-ipostpad, iprepad,

     $                                  ipostpad, padval )

                     END IF

*

*                    Compute condition number of the matrix

*

                     CALL pdgecon( '1', n, mem( ipa ), 1, 1, desca,

     $                             anorm1, rcond, mem( ipw ), lwork,

     $                             mem( ipw2 ), liwork, info )

*

                     IF( check ) THEN

                        CALL pdchekpad( ictxt, 'PDGECON', np, nq,

     $                                  mem( ipa-iprepad ),

     $                                  desca( lld_ ), iprepad,

     $                                  ipostpad, padval )

                        CALL pdchekpad( ictxt, 'PDGECON', lwork, 1,

     $                                  mem( ipw-iprepad ), lwork,

     $                                  iprepad, ipostpad, padval )

                        CALL pdchekpad( ictxt, 'PDGECON',

     $                                  lw2-ipostpad, 1,

     $                                  mem( ipw2-iprepad ),

     $                                  lw2-ipostpad, iprepad,

     $                                  ipostpad, padval )

                     END IF

                  END IF

*

*                 Loop over the different values for NRHS

*

                  DO 20 hh = 1, nnr

*

                     nrhs = nrval( hh )

*

                     DO 10 kk = 1, nnbr

*

                        nbrhs = nbrval( kk )

*

*                       Initialize Array Descriptor for rhs

*

                        CALL descinit( descb, n, nrhs, nb, nbrhs, 0, 0,

     $                                 ictxt, max( 1, np )+imidpad,

     $                                 ierr( 1 ) )

*

*                       Check all processes for an error

*

                        CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,

     $                                -1, 0 )

*

                        IF( ierr( 1 ).LT.0 ) THEN

                           IF( iam.EQ.0 )

     $                        WRITE( nout, fmt = 9997 ) 'descriptor'

                           kskip = kskip + 1

                           GO TO 10

                        END IF

*

*                       move IPW to allow room for RHS

*

                        myrhs = numroc( descb( n_ ), descb( nb_ ),

     $                                  mycol, descb( csrc_ ), npcol )

                        ipb = ipw

*

                        IF( est ) THEN

                           ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +

     $                            iprepad

                           ipferr = ipb0 + descb( lld_ )*myrhs +

     $                              ipostpad + iprepad

                           ipberr = myrhs + ipferr + ipostpad + iprepad

                           ipw = myrhs + ipberr + ipostpad + iprepad

                        ELSE

                           ipw = ipb + descb( lld_ )*myrhs + ipostpad +

     $                           iprepad

                        END IF

*

*                       Set worksiz: routines requiring most workspace

*                       is PDLASCHK

*

                        IF( check ) THEN

                           lcm = ilcm( nprow, npcol )

                           lcmq = lcm / npcol

                           worksiz = max( worksiz-ipostpad,

     $                       nq * nbrhs + np * nbrhs +

     $                       max( max( nq*nb, 2*nbrhs ),

     $                       nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,

     $                       0,0,lcmq ) ) )

                           worksiz = ipostpad + worksiz

                        ELSE

                           worksiz = ipostpad

                        END IF

*

                        ierr( 1 ) = 0

                        IF( ipw+worksiz.GT.memsiz ) THEN

                           IF( iam.EQ.0 )

     $                        WRITE( nout, fmt = 9996 )'solve',

     $                        ( ipw+worksiz )*dblesz

                           ierr( 1 ) = 1

                        END IF

*

*                       Check all processes for an error

*

                        CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,

     $                                -1, 0 )

*

                        IF( ierr( 1 ).GT.0 ) THEN

                           IF( iam.EQ.0 )

     $                        WRITE( nout, fmt = 9997 ) 'MEMORY'

                           kskip = kskip + 1

                           GO TO 10

                        END IF

*

*                       Generate RHS

*

                        CALL pdmatgen( ictxt, 'No', 'No', descb( m_ ),

     $                                 descb( n_ ), descb( mb_ ),

     $                                 descb( nb_ ), mem( ipb ),

     $                                 descb( lld_ ), descb( rsrc_ ),

     $                                 descb( csrc_ ), ibseed, 0, np, 0,

     $                                 myrhs, myrow, mycol, nprow,

     $                                 npcol )

*

                        IF( check )

     $                     CALL pdfillpad( ictxt, np, myrhs,

     $                                     mem( ipb-iprepad ),

     $                                     descb( lld_ ), iprepad,

     $                                     ipostpad, padval )

*

                        IF( est ) THEN

                           CALL pdmatgen( ictxt, 'No', 'No',

     $                                    descb( m_ ), descb( n_ ),

     $                                    descb( mb_ ), descb( nb_ ),

     $                                    mem( ipb0 ), descb( lld_ ),

     $                                    descb( rsrc_ ),

     $                                    descb( csrc_ ), ibseed, 0, np,

     $                                    0, myrhs, myrow, mycol, nprow,

     $                                    npcol )

                           IF( check ) THEN

                              CALL pdfillpad( ictxt, np, myrhs,

     $                                        mem( ipb0-iprepad ),

     $                                        descb( lld_ ), iprepad,

     $                                        ipostpad, padval )

                              CALL pdfillpad( ictxt, 1, myrhs,

     $                                        mem( ipferr-iprepad ), 1,

     $                                        iprepad, ipostpad,

     $                                        padval )

                              CALL pdfillpad( ictxt, 1, myrhs,

     $                                        mem( ipberr-iprepad ), 1,

     $                                        iprepad, ipostpad,

     $                                        padval )

                           END IF

                        END IF

*

                        CALL blacs_barrier( ictxt, 'All' )

                        CALL sltimer( 2 )

*

*                       Solve linear sytem via LU factorization

*

                        CALL pdgetrs( 'No', n, nrhs, mem( ipa ), 1, 1,

     $                                desca, mem( ippiv ), mem( ipb ),

     $                                1, 1, descb, info )

*

                        CALL sltimer( 2 )

*

                        IF( check ) THEN

*

*                          check for memory overwrite

*

                           CALL pdchekpad( ictxt, 'PDGETRS', np, nq,

     $                                     mem( ipa-iprepad ),

     $                                     desca( lld_ ), iprepad,

     $                                     ipostpad, padval )

                           CALL pdchekpad( ictxt, 'PDGETRS', lipiv, 1,

     $                                     mem( ippiv-iprepad ), lipiv,

     $                                     iprepad, ipostpad, padval )

                           CALL pdchekpad( ictxt, 'PDGETRS', np,

     $                                     myrhs, mem( ipb-iprepad ),

     $                                     descb( lld_ ), iprepad,

     $                                     ipostpad, padval )

*

                           CALL pdfillpad( ictxt, worksiz-ipostpad,

     $                                     1, mem( ipw-iprepad ),

     $                                     worksiz-ipostpad, iprepad,

     $                                     ipostpad, padval )

*

*                          check the solution to rhs

*

                           CALL pdlaschk( 'No', 'N', n, nrhs,

     $                                    mem( ipb ), 1, 1, descb,

     $                                    iaseed, 1, 1, desca, ibseed,

     $                                    anorm, sresid, mem( ipw ) )

*

                           IF( iam.EQ.0 .AND. sresid.GT.thresh )

     $                        WRITE( nout, fmt = 9985 ) sresid

*

*                          check for memory overwrite

*

                           CALL pdchekpad( ictxt, 'PDLASCHK', np,

     $                                     myrhs, mem( ipb-iprepad ),

     $                                     descb( lld_ ), iprepad,

     $                                     ipostpad, padval )

                           CALL pdchekpad( ictxt, 'PDLASCHK',

     $                                     worksiz-ipostpad, 1,

     $                                     mem( ipw-iprepad ),

     $                                     worksiz-ipostpad,

     $                                     iprepad, ipostpad, padval )

*

*                          The second test is a NaN trap

*

                           IF( sresid.LE.thresh .AND.

     $                         ( sresid-sresid ).EQ.0.0d+0 ) THEN

                              kpass = kpass + 1

                              passed = 'PASSED'

                           ELSE

                              kfail = kfail + 1

                              passed = 'FAILED'

                           END IF

                        ELSE

                           kpass = kpass + 1

                           sresid = sresid - sresid

                           passed = 'BYPASS'

                        END IF

*

                        IF( est ) THEN

*

*                          Calculate workspace required for PDGERFS

*

                           lwork = max( 1, 3*np )

                           ipw2  = ipw + lwork + ipostpad + iprepad

                           liwork = max( 1, np )

                           lw2 = iceil( liwork*intgsz, dblesz ) +

     $                           ipostpad

*

                           ierr( 1 ) = 0

                           IF( ipw2+lw2.GT.memsiz ) THEN

                              IF( iam.EQ.0 )

     $                           WRITE( nout, fmt = 9996 )

     $                           'iter ref', ( ipw2+lw2 )*dblesz

                              ierr( 1 ) = 1

                           END IF

*

*                          Check all processes for an error

*

                           CALL igsum2d( ictxt, 'All', ' ', 1, 1,

     $                                   ierr, 1, -1, 0 )

*

                           IF( ierr( 1 ).GT.0 ) THEN

                              IF( iam.EQ.0 )

     $                           WRITE( nout, fmt = 9997 )

     $                           'MEMORY'

                              kskip = kskip + 1

                              GO TO 10

                           END IF

*

                           IF( check ) THEN

                              CALL pdfillpad( ictxt, lwork, 1,

     $                                        mem( ipw-iprepad ),

     $                                        lwork, iprepad, ipostpad,

     $                                        padval )

                              CALL pdfillpad( ictxt, lw2-ipostpad, 1,

     $                                        mem( ipw2-iprepad ),

     $                                        lw2-ipostpad, iprepad,

     $                                        ipostpad, padval )

                           END IF

*

*                          Use iterative refinement to improve the

*                          computed solution

*

                           CALL pdgerfs( 'No', n, nrhs, mem( ipa0 ), 1,

     $                                   1, desca, mem( ipa ), 1, 1,

     $                                   desca, mem( ippiv ),

     $                                   mem( ipb0 ), 1, 1, descb,

     $                                   mem( ipb ), 1, 1, descb,

     $                                   mem( ipferr ), mem( ipberr ),

     $                                   mem( ipw ), lwork, mem( ipw2 ),

     $                                   liwork, info )

*

                           IF( check ) THEN

                              CALL pdchekpad( ictxt, 'PDGERFS', np,

     $                                        nq, mem( ipa0-iprepad ),

     $                                        desca( lld_ ), iprepad,

     $                                        ipostpad, padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', np,

     $                                        nq, mem( ipa-iprepad ),

     $                                        desca( lld_ ), iprepad,

     $                                        ipostpad, padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', lipiv,

     $                                        1, mem( ippiv-iprepad ),

     $                                        lipiv, iprepad,

     $                                        ipostpad, padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', np,

     $                                        myrhs, mem( ipb-iprepad ),

     $                                        descb( lld_ ), iprepad,

     $                                        ipostpad, padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', np,

     $                                        myrhs,

     $                                        mem( ipb0-iprepad ),

     $                                        descb( lld_ ), iprepad,

     $                                        ipostpad, padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', 1,

     $                                        myrhs,

     $                                        mem( ipferr-iprepad ), 1,

     $                                        iprepad, ipostpad,

     $                                        padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', 1,

     $                                        myrhs,

     $                                        mem( ipberr-iprepad ), 1,

     $                                        iprepad, ipostpad,

     $                                        padval )

                              CALL pdchekpad( ictxt, 'PDGERFS', lwork,

     $                                        1, mem( ipw-iprepad ),

     $                                        lwork, iprepad, ipostpad,

     $                                        padval )

                              CALL pdchekpad( ictxt, 'PDGERFS',

     $                                        lw2-ipostpad, 1,

     $                                        mem( ipw2-iprepad ),

     $                                        lw2-ipostpad, iprepad,

     $                                        ipostpad, padval )

*

                              CALL pdfillpad( ictxt, worksiz-ipostpad,

     $                                        1, mem( ipw-iprepad ),

     $                                        worksiz-ipostpad, iprepad,

     $                                        ipostpad, padval )

*

*                             check the solution to rhs

*

                              CALL pdlaschk( 'No', 'N', n, nrhs,

     $                                       mem( ipb ), 1, 1, descb,

     $                                       iaseed, 1, 1, desca,

     $                                       ibseed, anorm, sresid2,

     $                                       mem( ipw ) )

*

                              IF( iam.EQ.0 .AND. sresid2.GT.thresh )

     $                           WRITE( nout, fmt = 9985 ) sresid2

*

*                             check for memory overwrite

*

                              CALL pdchekpad( ictxt, 'PDLASCHK', np,

     $                                        myrhs, mem( ipb-iprepad ),

     $                                        descb( lld_ ), iprepad,

     $                                        ipostpad, padval )

                              CALL pdchekpad( ictxt, 'PDLASCHK',

     $                                        worksiz-ipostpad, 1,

     $                                        mem( ipw-iprepad ),

     $                                        worksiz-ipostpad, iprepad,

     $                                        ipostpad, padval )

                           END IF

                        END IF

*

*                       Gather max. of all CPU and WALL clock timings

*

                        CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,

     $                                  wtime )

                        CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,

     $                                  ctime )

*

*                       Print results

*

                        IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN

*

*                          2/3 N^3 - 1/2 N^2 flops for LU factorization

*

                           nops = (2.0d+0/3.0d+0)*( dble(n)**3 ) -

     $                            (1.0d+0/2.0d+0)*( dble(n)**2 )

*

*                          nrhs * 2 N^2 flops for LU solve.

*

                           nops = nops + 2.0d+0*(dble(n)**2)*dble(nrhs)

*

*                          Calculate total megaflops -- factorization

*                          and solve -- for WALL and CPU time, and print

*                          output

*

*                          Print WALL time if machine supports it

*

                           IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 )

     $                        THEN

                              tmflops = nops /

     $                            ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )

                           ELSE

                              tmflops = 0.0d+0

                           END IF

*

*                          Print WALL time if supported

*

                           IF( wtime( 2 ).GE.0.0d+0 )

     $                        WRITE( nout, fmt = 9993 ) 'WALL', m, n,

     $                               nb, nrhs, nbrhs, nprow, npcol,

     $                               wtime( 1 ), wtime( 2 ), tmflops,

     $                               passed

*

*                          Print CPU time if supported

*

                           IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 )

     $                        THEN

                              tmflops = nops /

     $                            ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )

                           ELSE

                              tmflops = 0.0d+0

                           END IF

*

                           IF( ctime( 2 ).GE.0.0d+0 )

     $                        WRITE( nout, fmt = 9993 ) 'CPU ', m, n,

     $                               nb, nrhs, nbrhs, nprow, npcol,

     $                               ctime( 1 ), ctime( 2 ), tmflops,

     $                               passed

                        END IF

   10                CONTINUE

   20             CONTINUE

*

                  IF( check.AND.( sresid.GT.thresh ) ) THEN

*

*                    Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)

*

                     CALL pdgetrrv( m, n, mem( ipa ), 1, 1, desca,

     $                              mem( ippiv ), mem( ipw ) )

                     CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                              1, desca, iaseed, anorm, fresid,

     $                              mem( ipw ) )

*

*                    Check for memory overwrite

*

                     CALL pdchekpad( ictxt, 'PDGETRRV', np, nq,

     $                               mem( ipa-iprepad ), desca( lld_ ),

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, 'PDGETRRV', lipiv,

     $                               1, mem( ippiv-iprepad ), lipiv,

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, 'PDGETRRV',

     $                               worksiz-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               worksiz-ipostpad, iprepad,

     $                               ipostpad, padval )

*

                     IF( myrow.EQ.0 .AND. mycol.EQ.0 )

     $                  WRITE( nout, fmt = 9986 ) fresid

                  END IF

               END IF

   30       CONTINUE

   40    CONTINUE

         CALL blacs_gridexit( ictxt )

   50 CONTINUE

*

*     Print ending messages and close output file

*

   60 CONTINUE

      IF( iam.EQ.0 ) THEN

         ktests = kpass + kfail + kskip

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9992 ) ktests

         IF( check ) THEN

            WRITE( nout, fmt = 9991 ) kpass

            WRITE( nout, fmt = 9989 ) kfail

         ELSE

            WRITE( nout, fmt = 9990 ) kpass

         END IF

         WRITE( nout, fmt = 9988 ) kskip

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9987 )

         IF( nout.NE.6 .AND. nout.NE.0 )

     $      CLOSE( nout )

      END IF

*

      CALL blacs_exit( 0 )

*

 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,

     $        '; It should be at least 1' )

 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',

     $        i4 )

 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )

 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',

     $        i11 )

 9995 FORMAT( 'TIME     M     N  NB NRHS NBRHS    P    Q  LU Time ',

     $        'Sol Time  MFLOPS  CHECK' )

 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',

     $        '-------- -------- ------' )

 9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,

     $        i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )

 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )

 9991 FORMAT( i5, ' tests completed and passed residual checks.' )

 9990 FORMAT( i5, ' tests completed without checking.' )

 9989 FORMAT( i5, ' tests completed and failed residual checks.' )

 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )

 9987 FORMAT( 'END OF TESTS.' )

 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )

 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )

*

      stop

*

*     End of PDLUDRIVER

*

      END

pdlafchk
subroutine pdlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pdlafchk.f:3

pdmatgen
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4

descinit
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3

iceil
integer function iceil(inum, idenom)
Definition iceil.f:2

ilcm
integer function ilcm(m, n)
Definition ilcm.f:2

numroc
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pdchekpad
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3

pdfillpad
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2

pdgecon
subroutine pdgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, iwork, liwork, info)
Definition pdgecon.f:3

pdgerfs
subroutine pdgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, iwork, liwork, info)
Definition pdgerfs.f:5

pdgetrf
subroutine pdgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition pdgetrf.f:2

pdgetrrv
subroutine pdgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition pdgetrrv.f:2

pdgetrs
subroutine pdgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pdgetrs.f:3

pdlange
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3

pdlaschk
subroutine pdlaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pdlaschk.f:4

pdludriver
program pdludriver
Definition pdludriver.f:1

pdluinfo
subroutine pdluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition pdluinfo.f:5

slboot
subroutine slboot()
Definition sltimer.f:2

sltimer
subroutine sltimer(i)
Definition sltimer.f:47

slcombine
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267