da/da6/pdqrdriver_8f_source.html

      PROGRAM pdqrdriver

*

*  -- ScaLAPACK testing driver (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 28, 2001

*

*  Purpose

*  =======

*

*  PDQRDRIVER is the main test program for the DOUBLE PRECISION

*  SCALAPACK QR factorization routines. This test driver performs a QR

*  QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ

*  (complete orthogonal factorization) factorization and checks the

*  results.

*

*  The program must be driven by a short data file.  An annotated

*  example of a data file can be obtained by deleting the first 3

*  characters from the following 16 lines:

*  'ScaLAPACK QR factorizations input file'

*  'PVM machine'

*  'QR.out'                      output file name (if any)

*  6                             device out

*  6                             number of factorizations

*  'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ

*  4                             number of problems sizes

*  55 17 31 201                  values of M

*  5 71 31 201                   values of N

*  3                             number of MB's and NB's

*  4 3 5                         values of MB

*  4 7 3                         values of NB

*  7                             number of process grids (ordered P & Q)

*  1 2 1 4 2 3 8                 values of P

*  7 2 4 1 3 2 1                 values of Q

*  1.0                           threshold

*

*  Internal Parameters

*  ===================

*

*  TOTMEM   INTEGER, default = 2000000

*           TOTMEM is a machine-specific parameter indicating the

*           maximum amount of available memory in bytes.

*           The user should customize TOTMEM to his platform.  Remember

*           to leave room in memory for the operating system, the BLACS

*           buffer, etc.  For example, on a system with 8 MB of memory

*           per process (e.g., one processor on an Intel iPSC/860), the

*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,

*           code, BLACS buffer, etc).  However, for PVM, we usually set

*           TOTMEM = 2000000.  Some experimenting with the maximum value

*           of TOTMEM may be required.

*

*  INTGSZ   INTEGER, default = 4 bytes.

*  DBLESZ   INTEGER, default = 8 bytes.

*           INTGSZ and DBLESZ indicate the length in bytes on the

*           given platform for an integer and a double precision real.

*  MEM      DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )

*

*           All arrays used by SCALAPACK routines are allocated from

*           this array and referenced by pointers.  The integer IPA,

*           for example, is a pointer to the starting element of MEM for

*           the matrix A.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      INTEGER            dblesz, intgsz, memsiz, ntests, totmem

      DOUBLE PRECISION   padval

      parameter( dblesz = 8, intgsz = 4, totmem = 2000000,

     $                     memsiz = totmem / dblesz, ntests = 20,

     $                     padval = -9923.0d+0 )

*     ..

*     .. Local Scalars ..

      CHARACTER*2        fact

      CHARACTER*6        passed

      CHARACTER*7        rout

      CHARACTER*8        routchk

      CHARACTER*80       outfile

      LOGICAL            check

      INTEGER            i, iam, iaseed, ictxt, imidpad, info, ipa,

     $                   ipostpad, ippiv, iprepad, iptau, ipw, j, k,

     $                   kfail, kpass, kskip, ktests, l, lipiv, ltau,

     $                   lwork, m, maxmn, mb, minmn, mnp, mnq, mp,

     $                   mycol, myrow, n, nb, nfact, ngrids, nmat, nnb,

     $                   nout, npcol, nprocs, nprow, nq, workfct,

     $                   worksiz

      REAL               thresh

      DOUBLE PRECISION   anorm, fresid, nops, tmflops

*     ..

*     .. Arrays ..

      CHARACTER*2        factor( ntests )

      INTEGER            desca( dlen_ ), ierr( 1 ), mbval( ntests ),

     $                   mval( ntests ), nbval( ntests ),

     $                   nval( ntests ), pval( ntests ), qval( ntests )

      DOUBLE PRECISION   ctime( 1 ), mem( memsiz ), wtime( 1 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_barrier, blacs_exit, blacs_get,

     $                   blacs_gridexit, blacs_gridinfo, blacs_gridinit,

     $                   blacs_pinfo, descinit, igsum2d, pdchekpad,

     $                   pdfillpad, pdgelqf, pdgelqrv,

     $                   pdgeqlf, pdgeqlrv, pdgeqpf,

     $                   pdqppiv, pdgeqrf, pdgeqrrv,

     $                   pdgerqf, pdgerqrv, pdtzrzrv,

     $                   pdmatgen, pdlafchk, pdqrinfo,

     $                   pdtzrzf, slboot, slcombine, sltimer

*     ..

*     .. External Functions ..

      LOGICAL            lsamen

      INTEGER            iceil, numroc

      DOUBLE PRECISION   pdlange

      EXTERNAL           iceil, lsamen, numroc, pdlange

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          dble, max, min

*     ..

*     .. Data Statements ..

      DATA               ktests, kpass, kfail, kskip /4*0/

*     ..

*     .. Executable Statements ..

*

*     Get starting information

*

      CALL blacs_pinfo( iam, nprocs )

      iaseed = 100

      CALL pdqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,

     $               ntests, nval, ntests, nnb, mbval, ntests, nbval,

     $               ntests, ngrids, pval, ntests, qval, ntests,

     $               thresh, mem, iam, nprocs )

      check = ( thresh.GE.0.0e+0 )

*

*     Loop over the different factorization types

*

      DO 40 i = 1, nfact

*

         fact = factor( i )

*

*        Print headings

*

         IF( iam.EQ.0 ) THEN

            WRITE( nout, fmt = * )

            IF( lsamen( 2, fact, 'QR' ) ) THEN

               rout = 'PDGEQRF'

               routchk = 'PDGEQRRV'

               WRITE( nout, fmt = 9986 )

     $                'QR factorization tests.'

            ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN

               rout = 'PDGEQLF'

               routchk = 'PDGEQLRV'

               WRITE( nout, fmt = 9986 )

     $                'QL factorization tests.'

            ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN

               rout = 'PDGELQF'

               routchk = 'PDGELQRV'

               WRITE( nout, fmt = 9986 )

     $                'LQ factorization tests.'

            ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN

               rout = 'PDGERQF'

               routchk = 'PDGERQRV'

               WRITE( nout, fmt = 9986 )

     $                'RQ factorization tests.'

            ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN

               rout = 'PDGEQPF'

               routchk = 'PDGEQRRV'

               WRITE( nout, fmt = 9986 )

     $                'QR factorization with column pivoting tests.'

            ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN

               rout = 'PDTZRZF'

               routchk = 'PDTZRZRV'

               WRITE( nout, fmt = 9986 )

     $                'Complete orthogonal factorization tests.'

            END IF

            WRITE( nout, fmt = * )

            WRITE( nout, fmt = 9995 )

            WRITE( nout, fmt = 9994 )

            WRITE( nout, fmt = * )

         END IF

*

*        Loop over different process grids

*

         DO 30 j = 1, ngrids

*

            nprow = pval( j )

            npcol = qval( j )

*

*           Make sure grid information is correct

*

            ierr( 1 ) = 0

            IF( nprow.LT.1 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow

               ierr( 1 ) = 1

            ELSE IF( npcol.LT.1 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol

               ierr( 1 ) = 1

            ELSE IF( nprow*npcol.GT.nprocs ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs

               ierr( 1 ) = 1

            END IF

*

            IF( ierr( 1 ).GT.0 ) THEN

               IF( iam.EQ.0 )

     $            WRITE( nout, fmt = 9997 ) 'grid'

               kskip = kskip + 1

               GO TO 30

            END IF

*

*           Define process grid

*

            CALL blacs_get( -1, 0, ictxt )

            CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )

            CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

*           Go to bottom of loop if this case doesn't use my process

*

            IF( myrow.GE.nprow .OR. mycol.GE.npcol )

     $         GO TO 30

*

            DO 20 k = 1, nmat

*

               m = mval( k )

               n = nval( k )

*

*              Make sure matrix information is correct

*

               ierr(1) = 0

               IF( m.LT.1 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m

                  ierr( 1 ) = 1

               ELSE IF( n.LT.1 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n

                  ierr( 1 ) = 1

               END IF

*

*              Make sure no one had error

*

               CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )

*

               IF( ierr( 1 ).GT.0 ) THEN

                  IF( iam.EQ.0 )

     $               WRITE( nout, fmt = 9997 ) 'matrix'

                  kskip = kskip + 1

                  GO TO 20

               END IF

*

*              Loop over different blocking sizes

*

               DO 10 l = 1, nnb

*

                  mb = mbval( l )

                  nb = nbval( l )

*

*                 Make sure mb is legal

*

                  ierr( 1 ) = 0

                  IF( mb.LT.1 ) THEN

                     ierr( 1 ) = 1

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb

                  END IF

*

*                 Check all processes for an error

*

                  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,

     $                          0 )

*

                  IF( ierr( 1 ).GT.0 ) THEN

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9997 ) 'MB'

                     kskip = kskip + 1

                     GO TO 10

                  END IF

*

*                 Make sure nb is legal

*

                  ierr( 1 ) = 0

                  IF( nb.LT.1 ) THEN

                     ierr( 1 ) = 1

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb

                  END IF

*

*                 Check all processes for an error

*

                  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,

     $                          0 )

*

                  IF( ierr( 1 ).GT.0 ) THEN

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9997 ) 'NB'

                     kskip = kskip + 1

                     GO TO 10

                  END IF

*

*                 Padding constants

*

                  mp  = numroc( m, mb, myrow, 0, nprow )

                  nq  = numroc( n, nb, mycol, 0, npcol )

                  mnp = numroc( min( m, n ), mb, myrow, 0, nprow )

                  mnq = numroc( min( m, n ), nb, mycol, 0, npcol )

                  IF( check ) THEN

                     iprepad  = max( mb, mp )

                     imidpad  = nb

                     ipostpad = max( nb, nq )

                  ELSE

                     iprepad  = 0

                     imidpad  = 0

                     ipostpad = 0

                  END IF

*

*                 Initialize the array descriptor for the matrix A

*

                  CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,

     $                           max( 1, mp ) + imidpad, ierr( 1 ) )

*

*                 Check all processes for an error

*

                  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,

     $                          0 )

*

                  IF( ierr( 1 ).LT.0 ) THEN

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9997 ) 'descriptor'

                     kskip = kskip + 1

                     GO TO 10

                  END IF

*

*                 Assign pointers into MEM for ScaLAPACK arrays, A is

*                 allocated starting at position MEM( IPREPAD+1 )

*

                  ipa   = iprepad+1

                  iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad

*

                  IF( lsamen( 2, fact, 'QR' ) ) THEN

*

                     ltau = mnq

                     ipw  = iptau + ltau + ipostpad + iprepad

*

*                    Figure the amount of workspace required by the QR

*                    factorization

*

                     lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )

                     workfct = lwork + ipostpad

                     worksiz = workfct

*

                     IF( check ) THEN

*

*                       Figure the amount of workspace required by the

*                       checking routines PDLAFCHK, PDGEQRRV and

*                       PDLANGE

*

                        worksiz = lwork + mp*desca( nb_ ) + ipostpad

*

                     END IF

*

                  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN

*

                     ltau = nq

                     ipw = iptau + ltau + ipostpad + iprepad

*

*                    Figure the amount of workspace required by the QL

*                    factorization

*

                     lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )

                     workfct = lwork + ipostpad

                     worksiz = workfct

*

                     IF( check ) THEN

*

*                       Figure the amount of workspace required by the

*                       checking routines PDLAFCHK, PDGEQLRV and

*                       PDLANGE

*

                        worksiz = lwork + mp*desca( nb_ ) + ipostpad

*

                     END IF

*

                  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN

*

                     ltau = mnp

                     ipw = iptau + ltau + ipostpad + iprepad

*

*                    Figure the amount of workspace required by the LQ

*                    factorization

*

                     lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )

                     workfct = lwork + ipostpad

                     worksiz = workfct

*

                     IF( check ) THEN

*

*                       Figure the amount of workspace required by the

*                       checking routines PDLAFCHK, PDGELQRV and

*                       PDLANGE

*

                        worksiz = lwork +

     $                            max( mp*desca( nb_ ), nq*desca( mb_ )

     $                            ) + ipostpad

*

                     END IF

*

                  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN

*

                     ltau = mp

                     ipw = iptau + ltau + ipostpad + iprepad

*

*                    Figure the amount of workspace required by the QR

*                    factorization

*

                     lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )

                     workfct = lwork + ipostpad

                     worksiz = workfct

*

                     IF( check ) THEN

*

*                       Figure the amount of workspace required by the

*                       checking routines PDLAFCHK, PDGERQRV and

*                       PDLANGE

*

                        worksiz = lwork +

     $                            max( mp*desca( nb_ ), nq*desca( mb_ )

     $                            ) + ipostpad

*

                     END IF

*

                  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN

*

                     ltau = mnq

                     ippiv = iptau + ltau + ipostpad + iprepad

                     lipiv = iceil( intgsz*nq, dblesz )

                     ipw = ippiv + lipiv + ipostpad + iprepad

*

*                    Figure the amount of workspace required by the

*                    factorization i.e from IPW on.

*

                     lwork = max( 3, mp + max( 1, nq ) ) + 2 * nq

                     workfct = lwork + ipostpad

                     worksiz = workfct

*

                     IF( check ) THEN

*

*                       Figure the amount of workspace required by the

*                       checking routines PDLAFCHK, PDGEQRRV,

*                       PDLANGE.

*

                        worksiz = max( worksiz - ipostpad,

     $                    desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +

     $                    ipostpad

                     END IF

*

                  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN

*

                     ltau = mp

                     ipw = iptau + ltau + ipostpad + iprepad

*

*                    Figure the amount of workspace required by the TZ

*                    factorization

*

                     lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )

                     workfct = lwork + ipostpad

                     worksiz = workfct

*

                     IF( check ) THEN

*

*                       Figure the amount of workspace required by the

*                       checking routines PDLAFCHK, PDTZRZRV and

*                       PDLANGE

*

                        worksiz = lwork +

     $                            max( mp*desca( nb_ ), nq*desca( mb_ )

     $                            ) + ipostpad

*

                     END IF

*

                  END IF

*

*                 Check for adequate memory for problem size

*

                  ierr( 1 ) = 0

                  IF( ipw+worksiz.GT.memsiz ) THEN

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9996 )

     $                         fact // ' factorization',

     $                         ( ipw+worksiz )*dblesz

                     ierr( 1 ) = 1

                  END IF

*

*                 Check all processes for an error

*

                  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,

     $                          0 )

*

                  IF( ierr( 1 ).GT.0 ) THEN

                     IF( iam.EQ.0 )

     $                  WRITE( nout, fmt = 9997 ) 'MEMORY'

                     kskip = kskip + 1

                     GO TO 10

                  END IF

*

*                 Generate the matrix A

*

                  CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ),

     $                           desca( n_ ), desca( mb_ ),

     $                           desca( nb_ ), mem( ipa ),

     $                           desca( lld_ ), desca( rsrc_ ),

     $                           desca( csrc_ ), iaseed, 0, mp, 0, nq,

     $                           myrow, mycol, nprow, npcol )

*

*                 Need the Infinity of A for checking

*

                  IF( check ) THEN

                     CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),

     $                               desca( lld_ ), iprepad, ipostpad,

     $                               padval )

                     IF( lsamen( 2, fact, 'QP' ) ) THEN

                        CALL pdfillpad( ictxt, lipiv, 1,

     $                                  mem( ippiv-iprepad ), lipiv,

     $                                  iprepad, ipostpad, padval )

                     END IF

                     CALL pdfillpad( ictxt, ltau, 1,

     $                               mem( iptau-iprepad ), ltau,

     $                               iprepad, ipostpad, padval )

                     CALL pdfillpad( ictxt, worksiz-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               worksiz-ipostpad,

     $                               iprepad, ipostpad, padval )

                     anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1,

     $                                desca, mem( ipw ) )

                     CALL pdchekpad( ictxt, 'PDLANGE', mp, nq,

     $                               mem( ipa-iprepad ), desca( lld_ ),

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, 'PDLANGE',

     $                               worksiz-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               worksiz-ipostpad, iprepad,

     $                               ipostpad, padval )

                     CALL pdfillpad( ictxt, workfct-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               workfct-ipostpad,

     $                               iprepad, ipostpad, padval )

                  END IF

*

                  CALL slboot()

                  CALL blacs_barrier( ictxt, 'All' )

*

*                 Perform QR factorizations

*

                  IF( lsamen( 2, fact, 'QR' ) ) THEN

                     CALL sltimer( 1 )

                     CALL pdgeqrf( m, n, mem( ipa ), 1, 1, desca,

     $                             mem( iptau ), mem( ipw ), lwork,

     $                             info )

                     CALL sltimer( 1 )

                  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN

                     CALL sltimer( 1 )

                     CALL pdgeqlf( m, n, mem( ipa ), 1, 1, desca,

     $                             mem( iptau ), mem( ipw ), lwork,

     $                             info )

                     CALL sltimer( 1 )

                  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN

                     CALL sltimer( 1 )

                     CALL pdgelqf( m, n, mem( ipa ), 1, 1, desca,

     $                             mem( iptau ), mem( ipw ), lwork,

     $                             info )

                     CALL sltimer( 1 )

                  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN

                     CALL sltimer( 1 )

                     CALL pdgerqf( m, n, mem( ipa ), 1, 1, desca,

     $                             mem( iptau ), mem( ipw ), lwork,

     $                             info )

                     CALL sltimer( 1 )

                  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN

                     CALL sltimer( 1 )

                     CALL pdgeqpf( m, n, mem( ipa ), 1, 1, desca,

     $                             mem( ippiv ), mem( iptau ),

     $                             mem( ipw ), lwork, info )

                     CALL sltimer( 1 )

                  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN

                     CALL sltimer( 1 )

                     IF( n.GE.m )

     $                  CALL pdtzrzf( m, n, mem( ipa ), 1, 1, desca,

     $                                mem( iptau ), mem( ipw ), lwork,

     $                                info )

                     CALL sltimer( 1 )

                  END IF

*

                  IF( check ) THEN

*

*                    Check for memory overwrite in factorization

*

                     CALL pdchekpad( ictxt, rout, mp, nq,

     $                               mem( ipa-iprepad ), desca( lld_ ),

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, rout, ltau, 1,

     $                               mem( iptau-iprepad ), ltau,

     $                               iprepad, ipostpad, padval )

                     IF( lsamen( 2, fact, 'QP' ) ) THEN

                        CALL pdchekpad( ictxt, rout, lipiv, 1,

     $                                  mem( ippiv-iprepad ), lipiv,

     $                                  iprepad, ipostpad, padval )

                     END IF

                     CALL pdchekpad( ictxt, rout, workfct-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               workfct-ipostpad, iprepad,

     $                               ipostpad, padval )

                     CALL pdfillpad( ictxt, worksiz-ipostpad, 1,

     $                               mem( ipw-iprepad ),

     $                               worksiz-ipostpad,

     $                               iprepad, ipostpad, padval )

*

                     IF( lsamen( 2, fact, 'QR' ) ) THEN

*

*                       Compute residual = ||A-Q*R|| / (||A||*N*eps)

*

                        CALL pdgeqrrv( m, n, mem( ipa ), 1, 1, desca,

     $                                 mem( iptau ), mem( ipw ) )

                        CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                              1, desca, iaseed, anorm, fresid,

     $                              mem( ipw ) )

                     ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN

*

*                       Compute residual = ||A-Q*L|| / (||A||*N*eps)

*

                        CALL pdgeqlrv( m, n, mem( ipa ), 1, 1, desca,

     $                                 mem( iptau ), mem( ipw ) )

                        CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                              1, desca, iaseed, anorm, fresid,

     $                              mem( ipw ) )

                     ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN

*

*                       Compute residual = ||A-L*Q|| / (||A||*N*eps)

*

                        CALL pdgelqrv( m, n, mem( ipa ), 1, 1, desca,

     $                                 mem( iptau ), mem( ipw ) )

                        CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                              1, desca, iaseed, anorm, fresid,

     $                              mem( ipw ) )

                     ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN

*

*                       Compute residual = ||A-R*Q|| / (||A||*N*eps)

*

                        CALL pdgerqrv( m, n, mem( ipa ), 1, 1, desca,

     $                                 mem( iptau ), mem( ipw ) )

                        CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                              1, desca, iaseed, anorm, fresid,

     $                              mem( ipw ) )

                     ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN

*

*                       Compute residual = ||AP-Q*R|| / (||A||*N*eps)

*

                        CALL pdgeqrrv( m, n, mem( ipa ), 1, 1, desca,

     $                                 mem( iptau ), mem( ipw ) )

                     ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN

*

*                       Compute residual = ||A-T*Z|| / (||A||*N*eps)

*

                        IF( n.GE.m ) THEN

                           CALL pdtzrzrv( m, n, mem( ipa ), 1, 1, desca,

     $                                    mem( iptau ), mem( ipw ) )

                        END IF

                        CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                                 1, desca, iaseed, anorm, fresid,

     $                                 mem( ipw ) )

                     END IF

*

*                    Check for memory overwrite

*

                     CALL pdchekpad( ictxt, routchk, mp, nq,

     $                               mem( ipa-iprepad ), desca( lld_ ),

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, routchk, ltau, 1,

     $                               mem( iptau-iprepad ), ltau,

     $                               iprepad, ipostpad, padval )

                     CALL pdchekpad( ictxt, routchk, worksiz-ipostpad,

     $                               1, mem( ipw-iprepad ),

     $                               worksiz-ipostpad, iprepad,

     $                               ipostpad, padval )

*

                     IF( lsamen( 2, fact, 'QP' ) ) THEN

*

                        CALL pdqppiv( m, n, mem( ipa ), 1, 1, desca,

     $                                mem( ippiv ) )

*

*                       Check for memory overwrite

*

                        CALL pdchekpad( ictxt, 'PDQPPIV', mp, nq,

     $                                  mem( ipa-iprepad ),

     $                                  desca( lld_ ),

     $                                  iprepad, ipostpad, padval )

                        CALL pdchekpad( ictxt, 'PDQPPIV', lipiv, 1,

     $                                  mem( ippiv-iprepad ), lipiv,

     $                                  iprepad, ipostpad, padval )

*

                        CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,

     $                                 1, desca, iaseed, anorm, fresid,

     $                                 mem( ipw ) )

*

*                       Check for memory overwrite

*

                        CALL pdchekpad( ictxt, 'PDLAFCHK', mp, nq,

     $                                  mem( ipa-iprepad ),

     $                                  desca( lld_ ),

     $                                  iprepad, ipostpad, padval )

                        CALL pdchekpad( ictxt, 'PDLAFCHK',

     $                                  worksiz-ipostpad, 1,

     $                                  mem( ipw-iprepad ),

     $                                  worksiz-ipostpad, iprepad,

     $                                  ipostpad, padval )

                     END IF

*

*                    Test residual and detect NaN result

*

                     IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN

                        kskip = kskip + 1

                        passed = 'BYPASS'

                     ELSE

                        IF( fresid.LE.thresh .AND.

     $                      (fresid-fresid).EQ.0.0d+0 ) THEN

                           kpass = kpass + 1

                           passed = 'PASSED'

                        ELSE

                           kfail = kfail + 1

                           passed = 'FAILED'

                        END IF

                     END IF

*

                  ELSE

*

*                    Don't perform the checking, only timing

*

                     kpass = kpass + 1

                     fresid = fresid - fresid

                     passed = 'BYPASS'

*

                  END IF

*

*                 Gather maximum of all CPU and WALL clock timings

*

                  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )

                  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )

*

*                 Print results

*

                  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN

*

                     minmn = min( m, n )

                     maxmn = max( m, n )

*

                     IF( lsamen( 2, fact, 'TZ' ) ) THEN

                        IF( m.GE.n ) THEN

                           nops = 0.0d+0

                        ELSE

*

*                          5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for

*                          complete orthogonal factorization (M <= N).

*

                           nops = ( 5.0d+0 * (

     $                              dble( n )*( dble( m )**2 ) -

     $                              dble( m )**3 +

     $                              dble( n )*dble( m ) ) +

     $                              dble( m )**2 ) / 2.0d+0

                        END IF

*

                     ELSE

*

*                       2 M N^2 - 2/3 N^2 + M N + N^2 for QR type

*                       factorization when M >= N.

*

                        nops = 2.0d+0 * ( dble( minmn )**2 ) *

     $                     ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +

     $                     ( dble( maxmn )+dble( minmn ) )*dble( minmn )

                     END IF

*

*                    Print WALL time

*

                     IF( wtime( 1 ).GT.0.0d+0 ) THEN

                        tmflops = nops / ( wtime( 1 ) * 1.0d+6 )

                     ELSE

                        tmflops = 0.0d+0

                     END IF

                     IF( wtime( 1 ).GE.0.0d+0 )

     $                  WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,

     $                         nprow, npcol, wtime( 1 ), tmflops,

     $                         passed, fresid

*

*                    Print CPU time

*

                     IF( ctime( 1 ).GT.0.0d+0 ) THEN

                        tmflops = nops / ( ctime( 1 ) * 1.0d+6 )

                     ELSE

                        tmflops = 0.0d+0

                     END IF

                     IF( ctime( 1 ).GE.0.0d+0 )

     $                  WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,

     $                         nprow, npcol, ctime( 1 ), tmflops,

     $                         passed, fresid

*

                  END IF

*

   10          CONTINUE

*

   20       CONTINUE

*

            CALL blacs_gridexit( ictxt )

*

   30    CONTINUE

*

   40 CONTINUE

*

*     Print out ending messages and close output file

*

      IF( iam.EQ.0 ) THEN

         ktests = kpass + kfail + kskip

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9992 ) ktests

         IF( check ) THEN

            WRITE( nout, fmt = 9991 ) kpass

            WRITE( nout, fmt = 9989 ) kfail

         ELSE

            WRITE( nout, fmt = 9990 ) kpass

         END IF

         WRITE( nout, fmt = 9988 ) kskip

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = * )

         WRITE( nout, fmt = 9987 )

         IF( nout.NE.6 .AND. nout.NE.0 )

     $      CLOSE ( nout )

      END IF

*

      CALL blacs_exit( 0 )

*

 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,

     $        '; It should be at least 1' )

 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',

     $        i4 )

 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )

 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',

     $        i11 )

 9995 FORMAT( 'TIME      M      N  MB  NB     P     Q Fact Time ',

     $        '     MFLOPS  CHECK  Residual' )

 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',

     $        '----------- ------  --------' )

 9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,

     $        f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )

 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )

 9991 FORMAT( i5, ' tests completed and passed residual checks.' )

 9990 FORMAT( i5, ' tests completed without checking.' )

 9989 FORMAT( i5, ' tests completed and failed residual checks.' )

 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )

 9987 FORMAT( 'END OF TESTS.' )

 9986 FORMAT( a )

*

      stop

*

*     End of PDQRDRIVER

*

      END

*


      SUBROUTINE pdqppiv( M, N, A, IA, JA, DESCA, IPIV )

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 1, 1997

*

*     .. Scalar Arguments ..

      INTEGER            IA, JA, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), IPIV( * )

      DOUBLE PRECISION   A( * )

*     ..

*

*  Purpose

*  =======

*

*  PDQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots

*  returned by PDGEQPF in reverse order for checking purposes.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  M       (global input) INTEGER

*          The number of rows to be operated on, i.e. the number of rows

*          of the distributed submatrix sub( A ). M >= 0.

*

*  N       (global input) INTEGER

*          The number of columns to be operated on, i.e. the number of

*          columns of the distributed submatrix sub( A ). N >= 0.

*

*  A       (local input/local output) DOUBLE PRECISION pointer into the

*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).

*          On entry, the local pieces of the M-by-N distributed matrix

*          sub( A ) which is to be permuted. On exit, the local pieces

*          of the distributed permuted submatrix sub( A ) * Inv( P ).

*

*  IA      (global input) INTEGER

*          The row index in the global array A indicating the first

*          row of sub( A ).

*

*  JA      (global input) INTEGER

*          The column index in the global array A indicating the

*          first column of sub( A ).

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  IPIV    (local input) INTEGER array, dimension LOCc(JA+N-1).

*          On exit, if IPIV(I) = K, the local i-th column of sub( A )*P

*          was the global K-th column of sub( A ). IPIV is tied to the

*          distributed matrix A.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

*     ..

*     .. Local Scalars ..

      INTEGER            IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,

     $                   IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,

     $                   NPCOL, NPROW, NQ

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, igebr2d, igebs2d, igerv2d,

     $                   igesd2d, igamn2d, infog1l, pdswap

*     ..

*     .. External Functions ..

      INTEGER            INDXL2G, NUMROC

      EXTERNAL           indxl2g, numroc

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, mod

*     ..

*     .. Executable Statements ..

*

*     Get grid parameters

*

      ictxt = desca( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

      CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,

     $              iacol )

      icoffa = mod( ja-1, desca( nb_ ) )

      nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )

      IF( mycol.EQ.iacol )

     $   nq = nq - icoffa

*

      DO 20 j = ja, ja+n-2

*

         ipvt = ja+n-1

         itmp = ja+n

*

*        Find first the local minimum candidate for pivoting

*

         CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),

     $                 jj, iacol )

         DO 10 kk = jj, jja+nq-1

            IF( ipiv( kk ).LT.ipvt )THEN

               iitmp = kk

               ipvt = ipiv( kk )

            END IF

   10    CONTINUE

*

*        Find the global minimum pivot

*

         CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,

     $                 ipcol, 1, -1, mycol )

*

*        Broadcast the corresponding index to the other process columns

*

         IF( mycol.EQ.ipcol ) THEN

            itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),

     $                      npcol )

            CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )

            IF( ipcol.NE.iacol ) THEN

               CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,

     $                       iacol )

            ELSE

               IF( mycol.EQ.iacol )

     $            ipiv( iitmp ) = ipiv( jj )

            END IF

         ELSE

            CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,

     $                    ipcol )

            IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )

     $         CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )

         END IF

*

*        Swap the columns of A

*

         CALL pdswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )

*

   20 CONTINUE

*

*     End of PDQPPIV

*


      END

pdlafchk
subroutine pdlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pdlafchk.f:3

pdmatgen
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4

descinit
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3

iceil
integer function iceil(inum, idenom)
Definition iceil.f:2

infog1l
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3

numroc
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2

lsamen
logical function lsamen(n, ca, cb)
Definition pblastst.f:1457

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pdchekpad
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3

pdfillpad
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2

pdgelqf
subroutine pdgelqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgelqf.f:3

pdgelqrv
subroutine pdgelqrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgelqrv.f:2

pdgeqlf
subroutine pdgeqlf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgeqlf.f:3

pdgeqlrv
subroutine pdgeqlrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgeqlrv.f:2

pdgeqpf
subroutine pdgeqpf(m, n, a, ia, ja, desca, ipiv, tau, work, lwork, info)
Definition pdgeqpf.f:3

pdgeqrf
subroutine pdgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgeqrf.f:3

pdgeqrrv
subroutine pdgeqrrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgeqrrv.f:2

pdgerqf
subroutine pdgerqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgerqf.f:3

pdgerqrv
subroutine pdgerqrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgerqrv.f:2

pdlange
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3

pdqppiv
subroutine pdqppiv(m, n, a, ia, ja, desca, ipiv)
Definition pdqrdriver.f:868

pdqrdriver
program pdqrdriver
Definition pdqrdriver.f:1

pdqrinfo
subroutine pdqrinfo(summry, nout, nfact, factor, ldfact, nmat, mval, ldmval, nval, ldnval, nnb, mbval, ldmbval, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pdqrinfo.f:6

pdtzrzf
subroutine pdtzrzf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdtzrzf.f:3

pdtzrzrv
subroutine pdtzrzrv(m, n, a, ia, ja, desca, tau, work)
Definition pdtzrzrv.f:2

slboot
subroutine slboot()
Definition sltimer.f:2

sltimer
subroutine sltimer(i)
Definition sltimer.f:47

slcombine
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267