d7/d9a/pcptdriver_8f_source.html

      PROGRAM pcptdriver

*

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     November 15, 1997

*

*  Purpose

*  =======

*

*  PCPTDRIVER is a test program for the

*  ScaLAPACK Band Cholesky routines corresponding to the options

*  indicated by CPT.  This test driver performs an

*  A = L*L**H factorization

*  and solves a linear system with the factors for 1 or more RHS.

*

*  The program must be driven by a short data file.

*  Here's an example file:

*'ScaLAPACK, Version 1.2, banded linear systems input file'

*'PVM.'

*''                              output file name (if any)

*6                               device out

*'L'                             define Lower or Upper

*9                               number of problem sizes

*1 5 17 28 37 121 200 1023 2048 3073     values of N

*6                               number of bandwidths

*1 2 4 10 31 64                values of BW

*1                               number of NB's

*-1 3 4 5                        values of NB (-1 for automatic choice)

*1                               number of NRHS's (must be 1)

*8                               values of NRHS

*1                               number of NBRHS's (ignored)

*1                               values of NBRHS (ignored)

*6                               number of process grids

*1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"

*3.0                             threshold

*

*  Internal Parameters

*  ===================

*

*  TOTMEM   INTEGER, default = 6200000.

*           TOTMEM is a machine-specific parameter indicating the

*           maximum amount of available memory in bytes.

*           The user should customize TOTMEM to his platform.  Remember

*           to leave room in memory for the operating system, the BLACS

*           buffer, etc.  For example, on a system with 8 MB of memory

*           per process (e.g., one processor on an Intel iPSC/860), the

*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,

*           code, BLACS buffer, etc).  However, for PVM, we usually set

*           TOTMEM = 2000000.  Some experimenting with the maximum value

*           of TOTMEM may be required.

*

*  INTGSZ   INTEGER, default = 4 bytes.

*  CPLXSZ   INTEGER, default = 8 bytes.

*           INTGSZ and CPLXSZ indicate the length in bytes on the

*           given platform for an integer and a single precision

*           complex.

*  MEM      DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )

*           All arrays used by ScaLAPACK routines are allocated from

*           this array and referenced by pointers.  The integer IPB,

*           for example, is a pointer to the starting element of MEM for

*           the solution vector(s) B.

*

*  =====================================================================

*

*  Code Developer: Andrew J. Cleary, University of Tennessee.

*    Current address: Lawrence Livermore National Labs.

*  This version released: August, 2001.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            totmem

      parameter( totmem = 3000000 )

      INTEGER            block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

*

      REAL               zero

      INTEGER            cplxsz, memsiz, ntests

      COMPLEX            padval

      parameter( cplxsz = 8,

     $                     memsiz = totmem / cplxsz, ntests = 20,

     $                     padval = ( -9923.0e+0, -9923.0e+0 ),

     $                     zero = 0.0e+0 )

      INTEGER            int_one

      parameter( int_one = 1 )

*     ..

*     .. Local Scalars ..

      LOGICAL            check

      CHARACTER          uplo

      CHARACTER*6        passed

      CHARACTER*80       outfile

      INTEGER            bw, bw_num, fillin_size, free_ptr, h, hh, i,

     $                   iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,

     $                   imidpad, info, int_temp, ipa, ipb, ipostpad,

     $                   iprepad, ipw, ipw_size, ipw_solve,

     $                   ipw_solve_size, ip_driver_w, ip_fillin, j, k,

     $                   kfail, kpass, kskip, ktests, mycol, myrhs_size,

     $                   myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,

     $                   nnr, nout, np, npcol, nprocs, nprocs_real,

     $                   nprow, nq, nrhs, n_first, n_last, worksiz

      REAL               anorm, sresid, thresh

      DOUBLE PRECISION   nops, nops2, tmflops, tmflops2

*     ..

*     .. Local Arrays ..

      INTEGER            bwval( ntests ), desca( 7 ), desca2d( dlen_ ),

     $                   descb( 7 ), descb2d( dlen_ ), ierr( 1 ),

     $                   nbrval( ntests ), nbval( ntests ),

     $                   nrval( ntests ), nval( ntests ),

     $                   pval( ntests ), qval( ntests )

      DOUBLE PRECISION   ctime( 2 ), wtime( 2 )

      COMPLEX            mem( memsiz )

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_barrier, blacs_exit, blacs_get,

     $                   blacs_gridexit, blacs_gridinfo, blacs_gridinit,

     $                   blacs_pinfo, descinit, igsum2d, pcbmatgen,

     $                   pcchekpad, pcfillpad, pcmatgen, pcptinfo,

     $                   pcptlaschk, pcpttrf, pcpttrs, slboot,

     $                   slcombine, sltimer

*     ..

*     .. External Functions ..

      INTEGER            numroc

      LOGICAL            lsame

      REAL               pclange

      EXTERNAL           lsame, numroc, pclange

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          dble, max, min, mod

*     ..

*     .. Data Statements ..

      DATA               kfail, kpass, kskip, ktests / 4*0 /

*     ..

*

*

*

*     .. Executable Statements ..

*

*     Get starting information

*

      CALL blacs_pinfo( iam, nprocs )

      iaseed = 100

      ibseed = 200

*

      CALL pcptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,

     $               bwval, ntests, nnb, nbval, ntests, nnr, nrval,

     $               ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,

     $               qval, ntests, thresh, mem, iam, nprocs )

*

      check = ( thresh.GE.0.0e+0 )

*

*     Print headings

*

      IF( iam.EQ.0 ) THEN

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9995 )

         WRITE( nout, fmt = 9994 )

         WRITE( nout, fmt = * )

      END IF

*

*     Loop over different process grids

*

      DO 60 i = 1, ngrids

*

         nprow = pval( i )

         npcol = qval( i )

*

*        Make sure grid information is correct

*

         ierr( 1 ) = 0

         IF( nprow.LT.1 ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow

            ierr( 1 ) = 1

         ELSE IF( npcol.LT.1 ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol

            ierr( 1 ) = 1

         ELSE IF( nprow*npcol.GT.nprocs ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs

            ierr( 1 ) = 1

         END IF

*

         IF( ierr( 1 ).GT.0 ) THEN

            IF( iam.EQ.0 )

     $         WRITE( nout, fmt = 9997 ) 'grid'

            kskip = kskip + 1

            GO TO 50

         END IF

*

*        Define process grid

*

         CALL blacs_get( -1, 0, ictxt )

         CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )

*

*

*        Define transpose process grid

*

         CALL blacs_get( -1, 0, ictxtb )

         CALL blacs_gridinit( ictxtb, 'Column-major', npcol, nprow )

*

*        Go to bottom of process grid loop if this case doesn't use my

*        process

*

         CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

         IF( myrow.LT.0 .OR. mycol.LT.0 ) THEN

            GO TO 50

         ENDIF

*

         DO 40 j = 1, nmat

*

           ierr( 1 ) = 0

*

           n = nval( j )

*

*          Make sure matrix information is correct

*

           IF( n.LT.1 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n

               ierr( 1 ) = 1

           END IF

*

*          Check all processes for an error

*

           CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,

     $                    -1, 0 )

*

           IF( ierr( 1 ).GT.0 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9997 ) 'size'

               kskip = kskip + 1

               GO TO 40

           END IF

*

*

           DO 45 bw_num = 1, nbw

*

             ierr( 1 ) = 0

*

             bw = 1

             IF( bw.LT.0 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9999 ) 'Band', 'bw', bw

               ierr( 1 ) = 1

             END IF

*

             IF( bw.GT.n-1 ) THEN

               ierr( 1 ) = 1

             END IF

*

*            Check all processes for an error

*

             CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,

     $                    -1, 0 )

*

             IF( ierr( 1 ).GT.0 ) THEN

               kskip = kskip + 1

               GO TO 45

             END IF

*

             DO 30 k = 1, nnb

*

               ierr( 1 ) = 0

*

               nb = nbval( k )

               IF( nb.LT.0 ) THEN

                  nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )

     $               + int_one

                  nb = max( nb, 2*int_one )

                  nb = min( n, nb )

               END IF

*

*              Make sure NB is legal

*

               ierr( 1 ) = 0

               IF( nb.LT.min( 2*int_one, n ) ) THEN

                  ierr( 1 ) = 1

               ENDIF

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1,

     $                       -1, 0 )

*

               IF( ierr( 1 ).GT.0 ) THEN

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

*              Padding constants

*

               np = numroc( (2), (2),

     $                      myrow, 0, nprow )

               nq = numroc( n, nb, mycol, 0, npcol )

*

               IF( check ) THEN

                  iprepad  = ((2)+10)

                  imidpad  = 10

                  ipostpad = ((2)+10)

               ELSE

                  iprepad  = 0

                  imidpad  = 0

                  ipostpad = 0

               END IF

*

*              Initialize the array descriptor for the matrix A

*

               CALL descinit( desca2d, n, (2),

     $                       nb, 1, 0, 0,

     $                       ictxtb, nb+10, ierr( 1 ) )

*

*              Convert this to 1D descriptor

*

               desca( 1 ) = 501

               desca( 3 ) = n

               desca( 4 ) = nb

               desca( 5 ) = 0

               desca( 2 ) = ictxt

               desca( 6 ) = ((2)+10)

               desca( 7 ) = 0

*

               ierr_temp = ierr( 1 )

               ierr( 1 ) = 0

               ierr( 1 ) = min( ierr( 1 ), ierr_temp )

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )

*

               IF( ierr( 1 ).LT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'descriptor'

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

*              Assign pointers into MEM for SCALAPACK arrays, A is

*              allocated starting at position MEM( IPREPAD+1 )

*

               free_ptr = 1

               ipb = 0

*

*              Save room for prepadding

               free_ptr = free_ptr + iprepad

*

               ipa = free_ptr

               free_ptr = free_ptr + (nb+10)*(2)

     $                     + ipostpad

*

*              Add memory for fillin

*              Fillin space needs to store:

*                Fillin spike:

*                Contribution to previous proc's diagonal block of

*                  reduced system:

*                Off-diagonal block of reduced system:

*                Diagonal block of reduced system:

*

               fillin_size =

     $            (12*npcol + 3*nb)

*

*              Claim memory for fillin

*

               free_ptr = free_ptr + iprepad

               ip_fillin = free_ptr

               free_ptr = free_ptr + fillin_size

*

*              Workspace needed by computational routines:

*

               ipw_size = 0

*

*              factorization:

*

               ipw_size = 8*npcol

*

*              Claim memory for IPW

*

               ipw = free_ptr

               free_ptr = free_ptr + ipw_size

*

*              Check for adequate memory for problem size

*

               ierr( 1 ) = 0

               IF( free_ptr.GT.memsiz ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9996 )

     $               'divide and conquer factorization',

     $               (free_ptr )*cplxsz

                  ierr( 1 ) = 1

               END IF

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,

     $                       1, -1, 0 )

*

               IF( ierr( 1 ).GT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'MEMORY'

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

*              Worksize needed for LAPRNT

               worksiz = max( ((2)+10), nb )

*

               IF( check ) THEN

*

*                 Calculate the amount of workspace required by

*                 the checking routines.

*

*                 PCLANGE

                  worksiz = max( worksiz, desca2d( nb_ ) )

*

*                 PCPTLASCHK

                  worksiz = max( worksiz,

     $                   max(5,nb)+2*nb )

               END IF

*

               free_ptr = free_ptr + iprepad

               ip_driver_w = free_ptr

               free_ptr = free_ptr + worksiz + ipostpad

*

*

*              Check for adequate memory for problem size

*

               ierr( 1 ) = 0

               IF( free_ptr.GT.memsiz ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9996 ) 'factorization',

     $               ( free_ptr )*cplxsz

                  ierr( 1 ) = 1

               END IF

*

*              Check all processes for an error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,

     $                       1, -1, 0 )

*

               IF( ierr( 1 ).GT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'MEMORY'

                  kskip = kskip + 1

                  GO TO 30

               END IF

*

               CALL pcbmatgen( ictxt, uplo, 'T', bw, bw, n, (2), nb,

     $                         mem( ipa ), nb+10, 0, 0, iaseed, myrow,

     $                         mycol, nprow, npcol )

               CALL pcfillpad( ictxt, nq, np, mem( ipa-iprepad ),

     $                          nb+10, iprepad, ipostpad,

     $                          padval )

*

               CALL pcfillpad( ictxt, worksiz, 1,

     $                          mem( ip_driver_w-iprepad ), worksiz,

     $                          iprepad, ipostpad, padval )

*

*              Calculate norm of A for residual error-checking

*

               IF( check ) THEN

*

                  anorm = pclange( 'I', n,

     $                            (2), mem( ipa ), 1, 1,

     $                            desca2d, mem( ip_driver_w ) )

                  CALL pcchekpad( ictxt, 'PCLANGE', nq, np,

     $                         mem( ipa-iprepad ), nb+10,

     $                         iprepad, ipostpad, padval )

                  CALL pcchekpad( ictxt, 'PCLANGE',

     $                            worksiz, 1,

     $                            mem( ip_driver_w-iprepad ), worksiz,

     $                            iprepad, ipostpad, padval )

               END IF

*

               IF( lsame( uplo, 'L' ) ) THEN

                  int_temp = 0

               ELSE

                  int_temp = desca2d( lld_ )

               ENDIF

*

*              For SPD Tridiagonal complex matrices, diagonal is stored

*                as a real. Thus, compact D into half the space

*

        DO 10  h=1, numroc(n,nb,mycol,0,npcol)/2

                  mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )

     $               +mem( ipa+int_temp+2*h-1 )*( 0.0e+0, 1.0e+0 )

   10   CONTINUE

               IF( 2*(numroc(n,nb,mycol,0,npcol)/2).NE.

     $               numroc(n,nb,mycol,0,npcol) ) THEN

                  h=numroc(n,nb,mycol,0,npcol)/2+1

                  mem( ipa+int_temp+h-1 ) = mem( ipa+int_temp+2*h-2 )

               ENDIF

*

*

               CALL slboot()

               CALL blacs_barrier( ictxt, 'All' )

*

*              Perform factorization

*

               CALL sltimer( 1 )

*

               CALL pcpttrf( n, mem( ipa+int_temp ),

     $                       mem( ipa+1*( nb+10-int_temp ) ), 1, desca,

     $                       mem( ip_fillin ), fillin_size, mem( ipw ),

     $                       ipw_size, info )

*

               CALL sltimer( 1 )

*

               IF( info.NE.0 ) THEN

                  IF( iam.EQ.0 ) THEN

                    WRITE( nout, fmt = * ) 'PCPTTRF INFO=', info

                  ENDIF

                  kfail = kfail + 1

                  GO TO 30

               END IF

*

               IF( check ) THEN

*

*                 Check for memory overwrite in factorization

*

                  CALL pcchekpad( ictxt, 'PCPTTRF', nq,

     $                         np, mem( ipa-iprepad ), nb+10,

     $                         iprepad, ipostpad, padval )

               END IF

*

*

*              Loop over the different values for NRHS

*

               DO 20 hh = 1, nnr

*

                  ierr( 1 ) = 0

*

                  nrhs = nrval( hh )

*

*                    Initialize Array Descriptor for rhs

*

                     CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,

     $                             ictxtb, nb+10, ierr( 1 ) )

*

*                    Convert this to 1D descriptor

*

                     descb( 1 ) = 502

                     descb( 3 ) = n

                     descb( 4 ) = nb

                     descb( 5 ) = 0

                     descb( 2 ) = ictxt

                     descb( 6 ) = descb2d( lld_ )

                     descb( 7 ) = 0

*

*                    reset free_ptr to reuse space for right hand sides

*

                     IF( ipb .GT. 0 ) THEN

                       free_ptr = ipb

                     ENDIF

*

                     free_ptr = free_ptr + iprepad

                     ipb = free_ptr

                     free_ptr = free_ptr + nrhs*descb2d( lld_ )

     $                          + ipostpad

*

*                    Allocate workspace for workspace in TRS routine:

*

                     ipw_solve_size = (10+2*min(100,nrhs))*npcol+4*nrhs

*

                     ipw_solve = free_ptr

                     free_ptr = free_ptr + ipw_solve_size

*

                     ierr( 1 ) = 0

                     IF( free_ptr.GT.memsiz ) THEN

                        IF( iam.EQ.0 )

     $                     WRITE( nout, fmt = 9996 )'solve',

     $                            ( free_ptr )*cplxsz

                        ierr( 1 ) = 1

                     END IF

*

*                    Check all processes for an error

*

                     CALL igsum2d( ictxt, 'All', ' ', 1, 1,

     $                             ierr, 1, -1, 0 )

*

                     IF( ierr( 1 ).GT.0 ) THEN

                        IF( iam.EQ.0 )

     $                     WRITE( nout, fmt = 9997 ) 'MEMORY'

                        kskip = kskip + 1

                        GO TO 15

                     END IF

*

                     myrhs_size = numroc( n, nb, mycol, 0, npcol )

*

*                    Generate RHS

*

                     CALL pcmatgen(ictxtb, 'No', 'No',

     $                        descb2d( m_ ), descb2d( n_ ),

     $                        descb2d( mb_ ), descb2d( nb_ ),

     $                        mem( ipb ),

     $                        descb2d( lld_ ), descb2d( rsrc_ ),

     $                        descb2d( csrc_ ),

     $                        ibseed, 0, myrhs_size, 0, nrhs, mycol,

     $                        myrow, npcol, nprow )

*

                     IF( check ) THEN

                        CALL pcfillpad( ictxtb, nb, nrhs,

     $                                  mem( ipb-iprepad ),

     $                                  descb2d( lld_ ),

     $                                  iprepad, ipostpad,

     $                                  padval )

                        CALL pcfillpad( ictxt, worksiz, 1,

     $                                  mem( ip_driver_w-iprepad ),

     $                                  worksiz, iprepad,

     $                                  ipostpad, padval )

                     END IF

*

*

                     CALL blacs_barrier( ictxt, 'All')

                     CALL sltimer( 2 )

*

*                    Solve linear system via factorization

*

                     CALL pcpttrs( uplo, n, nrhs, mem( ipa+int_temp ),

     $                             mem( ipa+1*( nb+10-int_temp ) ), 1,

     $                             desca, mem( ipb ), 1, descb,

     $                             mem( ip_fillin ), fillin_size,

     $                             mem( ipw_solve ), ipw_solve_size,

     $                             info )

*

                     CALL sltimer( 2 )

*

                     IF( info.NE.0 ) THEN

                       IF( iam.EQ.0 )

     $  WRITE( nout, fmt = * ) 'PCPTTRS INFO=', info

                       kfail = kfail + 1

                       passed = 'FAILED'

                       GO TO 20

                     END IF

*

                     IF( check ) THEN

*

*                       check for memory overwrite

*

                        CALL pcchekpad( ictxt, 'PCPTTRS-work',

     $                                  worksiz, 1,

     $                                  mem( ip_driver_w-iprepad ),

     $                                  worksiz, iprepad,

     $                                  ipostpad, padval )

*

*                       check the solution to rhs

*

                        sresid = zero

*

*                       Reset descriptor describing A to 1-by-P grid for

*                          use in banded utility routines

*

                        CALL descinit( desca2d, (2), n,

     $                       (2), nb, 0, 0,

     $                       ictxt, (2), ierr( 1 ) )

                        CALL pcptlaschk( 'H', uplo, n, bw, bw, nrhs,

     $                              mem( ipb ), 1, 1, descb2d,

     $                              iaseed, mem( ipa ), 1, 1, desca2d,

     $                              ibseed, anorm, sresid,

     $                              mem( ip_driver_w ), worksiz )

*

                        IF( iam.EQ.0 ) THEN

                           IF( sresid.GT.thresh )

     $                        WRITE( nout, fmt = 9985 ) sresid

                        END IF

*

*                       The second test is a NaN trap

*

                        IF( ( sresid.LE.thresh          ).AND.

     $                      ( (sresid-sresid).EQ.0.0e+0 ) ) THEN

                           kpass = kpass + 1

                           passed = 'PASSED'

                        ELSE

                           kfail = kfail + 1

                           passed = 'FAILED'

                        END IF

*

                     END IF

*

   15                CONTINUE

*                    Skipped tests jump to here to print out "SKIPPED"

*

*                    Gather maximum of all CPU and WALL clock timings

*

                     CALL slcombine( ictxt, 'All', '>', 'W', 2, 1,

     $                               wtime )

                     CALL slcombine( ictxt, 'All', '>', 'C', 2, 1,

     $                               ctime )

*

*                    Print results

*

                     IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN

*

                        nops = 0

                        nops2 = 0

*

                        n_first = nb

                        nprocs_real = ( n-1 )/nb + 1

                        n_last = mod( n-1, nb ) + 1

*

*

                        nops = nops + dble(bw)*( -2.d0 / 3.d0+dble(bw)*

     $                        ( -1.d0+dble(bw)*( -1.d0 / 3.d0 ) ) ) +

     $                        dble(n)*( 1.d0+dble(bw)*( 3.d0 /

     $                        2.d0+dble(bw)*( 1.d0 / 2.d0 ) ) )

                        nops = nops + dble(bw)*( -1.d0 / 6.d0+dble(bw)

     $                        *( -1.d0 /2.d0+dble(bw)

     $                        *( -1.d0 / 3.d0 ) ) ) +

     $                        dble(n)*( dble(bw) /

     $                        2.d0*( 1.d0+dble(bw) ) )

*

                        nops = nops +

     $                         dble(nrhs)*( ( 2*dble(n)-dble(bw) )*

     $                         ( dble(bw)+1.d0 ) )+ dble(nrhs)*

     $                         ( dble(bw)*( 2*dble(n)-

     $                         ( dble(bw)+1.d0 ) ) )

*

*

*                       Second calc to represent actual hardware speed

*

*                     NB bw^2  flops for LLt factorization in 1st proc

*

                      nops2 = ( (dble(n_first))* dble(bw)**2  )

*

                      IF ( nprocs_real .GT. 1) THEN

*                       4 NB bw^2  flops for LLt factorization and

*                         spike calc in last processor

*

                        nops2 = nops2 +

     $                          4*( (dble(n_last)*dble(bw)**2) )

                      ENDIF

*

                      IF ( nprocs_real .GT. 2) THEN

*                       4 NB bw^2  flops for LLt factorization and

*                         spike calc in other processors

*

                        nops2 = nops2 + (nprocs_real-2)*

     $                          4*( (dble(nb)*dble(bw)**2) )

                      ENDIF

*

*                     Reduced system

*

                      nops2 = nops2 +

     $                  ( nprocs_real-1 ) * ( bw*bw*bw/3 )

                      IF( nprocs_real .GT. 1 ) THEN

                        nops2 = nops2 +

     $                     ( nprocs_real-2 ) * ( 2 * bw*bw*bw )

                      ENDIF

*

*

*                     nrhs * 4 n_first*bw flops for LLt solve in proc 1.

*

                      nops2 = nops2 +

     $                    ( 4.0d+0*(dble(n_first)*dble(bw))*dble(nrhs) )

*

                      IF ( nprocs_real .GT. 1 ) THEN

*

*                     2*nrhs*4 n_last*bw flops for LLt solve in last.

*

                        nops2 = nops2 +

     $                  2*( 4.0d+0*(dble(n_last)*dble(bw))*dble(nrhs) )

                      ENDIF

*

                      IF ( nprocs_real .GT. 2 ) THEN

*

*                     2 * nrhs * 4 NB*bw flops for LLt solve in others.

*

                        nops2 = nops2 +

     $                    ( nprocs_real-2)*2*

     $                    ( 4.0d+0*(dble(nb)*dble(bw))*dble(nrhs) )

                      ENDIF

*

*                     Reduced system

*

                      nops2 = nops2 +

     $                  nrhs*( nprocs_real-1 ) * ( bw*bw )

                      IF( nprocs_real .GT. 1 ) THEN

                        nops2 = nops2 +

     $                   nrhs*( nprocs_real-2 ) * ( 3 * bw*bw )

                      ENDIF

*

*

*                     Multiply by 4 to get complex count

*

                      nops2 = nops2 * dble(4)

*

*                       Calculate total megaflops - factorization and/or

*                       solve -- for WALL and CPU time, and print output

*

*                       Print WALL time if machine supports it

*

                        IF( wtime( 1 ) + wtime( 2 ) .GT. 0.0d+0 ) THEN

                           tmflops = nops /

     $                            ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )

                        ELSE

                           tmflops = 0.0d+0

                        END IF

*

                        IF( wtime( 1 )+wtime( 2 ).GT.0.0d+0 ) THEN

                           tmflops2 = nops2 /

     $                            ( ( wtime( 1 )+wtime( 2 ) ) * 1.0d+6 )

                        ELSE

                           tmflops2 = 0.0d+0

                        END IF

*

                        IF( wtime( 2 ).GE.0.0d+0 )

     $                     WRITE( nout, fmt = 9993 ) 'WALL', uplo,

     $                            n,

     $                            bw,

     $                            nb, nrhs, nprow, npcol,

     $                            wtime( 1 ), wtime( 2 ), tmflops,

     $                            tmflops2, passed

*

*                       Print CPU time if machine supports it

*

                        IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN

                           tmflops = nops /

     $                            ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )

                        ELSE

                           tmflops = 0.0d+0

                        END IF

*

                        IF( ctime( 1 )+ctime( 2 ).GT.0.0d+0 ) THEN

                           tmflops2 = nops2 /

     $                            ( ( ctime( 1 )+ctime( 2 ) ) * 1.0d+6 )

                        ELSE

                           tmflops2 = 0.0d+0

                        END IF

*

                        IF( ctime( 2 ).GE.0.0d+0 )

     $                     WRITE( nout, fmt = 9993 ) 'CPU ', uplo,

     $                            n,

     $                            bw,

     $                            nb, nrhs, nprow, npcol,

     $                            ctime( 1 ), ctime( 2 ), tmflops,

     $                            tmflops2, passed

*

                     END IF

   20          CONTINUE

*

*

   30       CONTINUE

*           NNB loop

*

   45      CONTINUE

*          BW[] loop

*

   40   CONTINUE

*       NMAT loop

*

        CALL blacs_gridexit( ictxt )

        CALL blacs_gridexit( ictxtb )

*

   50   CONTINUE

*       NGRIDS DROPOUT

   60 CONTINUE

*     NGRIDS loop

*

*     Print ending messages and close output file

*

      IF( iam.EQ.0 ) THEN

         ktests = kpass + kfail + kskip

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9992 ) ktests

         IF( check ) THEN

            WRITE( nout, fmt = 9991 ) kpass

            WRITE( nout, fmt = 9989 ) kfail

         ELSE

            WRITE( nout, fmt = 9990 ) kpass

         END IF

         WRITE( nout, fmt = 9988 ) kskip

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9987 )

         IF( nout.NE.6 .AND. nout.NE.0 )

     $      CLOSE ( nout )

      END IF

*

      CALL blacs_exit( 0 )

*

 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,

     $        '; It should be at least 1' )

 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',

     $        i4 )

 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )

 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',

     $        i11 )

 9995 FORMAT( 'TIME UL      N  BW   NB  NRHS  P    Q L*U Time ',

     $        'Slv Time   MFLOPS   MFLOP2  CHECK' )

 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',

     $        '--------   ------   ------ ------' )

 9993 FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,

     $        i5, 1x, i2, 1x,

     $        i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )

 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )

 9991 FORMAT( i5, ' tests completed and passed residual checks.' )

 9990 FORMAT( i5, ' tests completed without checking.' )

 9989 FORMAT( i5, ' tests completed and failed residual checks.' )

 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )

 9987 FORMAT( 'END OF TESTS.' )

 9986 FORMAT( '||A - ', a4, '|| / (||A|| * N * eps) = ', g25.7 )

 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )

*

      stop

*

*     End of PCPTTRS_DRIVER

*

      END

*

pcmatgen
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4

descinit
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3

numroc
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2

pcbmatgen
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pcbmatgen.f:5

pcchekpad
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3

pcfillpad
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pclange
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3

pcptdriver
program pcptdriver
Definition pcptdriver.f:1

pcptinfo
subroutine pcptinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pcptinfo.f:6

pcptlaschk
subroutine pcptlaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pcptlaschk.f:4

pcpttrf
subroutine pcpttrf(n, d, e, ja, desca, af, laf, work, lwork, info)
Definition pcpttrf.f:3

pcpttrs
subroutine pcpttrs(uplo, n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pcpttrs.f:3

slboot
subroutine slboot()
Definition sltimer.f:2

sltimer
subroutine sltimer(i)
Definition sltimer.f:47

slcombine
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267

lsame
logical function lsame(ca, cb)
Definition tools.f:1724