SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pcmmch1()

subroutine pcmmch1 ( integer  ictxt,
character*1  uplo,
character*1  trans,
integer  n,
integer  k,
complex  alpha,
complex, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
complex  beta,
complex, dimension( * )  c,
complex, dimension( * )  pc,
integer  ic,
integer  jc,
integer, dimension( * )  descc,
complex, dimension( * )  ct,
real, dimension( * )  g,
real  err,
integer  info 
)

Definition at line 5786 of file pcblastst.f.

5789*
5790* -- PBLAS test routine (version 2.0) --
5791* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5792* and University of California, Berkeley.
5793* April 1, 1998
5794*
5795* .. Scalar Arguments ..
5796 CHARACTER*1 TRANS, UPLO
5797 INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5798 REAL ERR
5799 COMPLEX ALPHA, BETA
5800* ..
5801* .. Array Arguments ..
5802 INTEGER DESCA( * ), DESCC( * )
5803 REAL G( * )
5804 COMPLEX A( * ), C( * ), CT( * ), PC( * )
5805* ..
5806*
5807* Purpose
5808* =======
5809*
5810* PCMMCH1 checks the results of the computational tests.
5811*
5812* Notes
5813* =====
5814*
5815* A description vector is associated with each 2D block-cyclicly dis-
5816* tributed matrix. This vector stores the information required to
5817* establish the mapping between a matrix entry and its corresponding
5818* process and memory location.
5819*
5820* In the following comments, the character _ should be read as
5821* "of the distributed matrix". Let A be a generic term for any 2D
5822* block cyclicly distributed matrix. Its description vector is DESCA:
5823*
5824* NOTATION STORED IN EXPLANATION
5825* ---------------- --------------- ------------------------------------
5826* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5827* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5828* the NPROW x NPCOL BLACS process grid
5829* A is distributed over. The context
5830* itself is global, but the handle
5831* (the integer value) may vary.
5832* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5833* ted matrix A, M_A >= 0.
5834* N_A (global) DESCA( N_ ) The number of columns in the distri-
5835* buted matrix A, N_A >= 0.
5836* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5837* block of the matrix A, IMB_A > 0.
5838* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5839* left block of the matrix A,
5840* INB_A > 0.
5841* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5842* bute the last M_A-IMB_A rows of A,
5843* MB_A > 0.
5844* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5845* bute the last N_A-INB_A columns of
5846* A, NB_A > 0.
5847* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5848* row of the matrix A is distributed,
5849* NPROW > RSRC_A >= 0.
5850* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5851* first column of A is distributed.
5852* NPCOL > CSRC_A >= 0.
5853* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5854* array storing the local blocks of
5855* the distributed matrix A,
5856* IF( Lc( 1, N_A ) > 0 )
5857* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5858* ELSE
5859* LLD_A >= 1.
5860*
5861* Let K be the number of rows of a matrix A starting at the global in-
5862* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5863* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5864* receive if these K rows were distributed over NPROW processes. If K
5865* is the number of columns of a matrix A starting at the global index
5866* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5867* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5868* these K columns were distributed over NPCOL processes.
5869*
5870* The values of Lr() and Lc() may be determined via a call to the func-
5871* tion PB_NUMROC:
5872* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5873* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5874*
5875* Arguments
5876* =========
5877*
5878* ICTXT (local input) INTEGER
5879* On entry, ICTXT specifies the BLACS context handle, indica-
5880* ting the global context of the operation. The context itself
5881* is global, but the value of ICTXT is local.
5882*
5883* UPLO (global input) CHARACTER*1
5884* On entry, UPLO specifies which part of C should contain the
5885* result.
5886*
5887* TRANS (global input) CHARACTER*1
5888* On entry, TRANS specifies whether the matrix A has to be
5889* transposed or not before computing the matrix-matrix product.
5890*
5891* N (global input) INTEGER
5892* On entry, N specifies the order the submatrix operand C. N
5893* must be at least zero.
5894*
5895* K (global input) INTEGER
5896* On entry, K specifies the number of columns (resp. rows) of A
5897* when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5898* zero.
5899*
5900* ALPHA (global input) COMPLEX
5901* On entry, ALPHA specifies the scalar alpha.
5902*
5903* A (local input) COMPLEX array
5904* On entry, A is an array of dimension (DESCA( M_ ),*). This
5905* array contains a local copy of the initial entire matrix PA.
5906*
5907* IA (global input) INTEGER
5908* On entry, IA specifies A's global row index, which points to
5909* the beginning of the submatrix sub( A ).
5910*
5911* JA (global input) INTEGER
5912* On entry, JA specifies A's global column index, which points
5913* to the beginning of the submatrix sub( A ).
5914*
5915* DESCA (global and local input) INTEGER array
5916* On entry, DESCA is an integer array of dimension DLEN_. This
5917* is the array descriptor for the matrix A.
5918*
5919* BETA (global input) COMPLEX
5920* On entry, BETA specifies the scalar beta.
5921*
5922* C (local input/local output) COMPLEX array
5923* On entry, C is an array of dimension (DESCC( M_ ),*). This
5924* array contains a local copy of the initial entire matrix PC.
5925*
5926* PC (local input) COMPLEX array
5927* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5928* array contains the local pieces of the matrix PC.
5929*
5930* IC (global input) INTEGER
5931* On entry, IC specifies C's global row index, which points to
5932* the beginning of the submatrix sub( C ).
5933*
5934* JC (global input) INTEGER
5935* On entry, JC specifies C's global column index, which points
5936* to the beginning of the submatrix sub( C ).
5937*
5938* DESCC (global and local input) INTEGER array
5939* On entry, DESCC is an integer array of dimension DLEN_. This
5940* is the array descriptor for the matrix C.
5941*
5942* CT (workspace) COMPLEX array
5943* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5944* holds a copy of the current column of C.
5945*
5946* G (workspace) REAL array
5947* On entry, G is an array of dimension at least MAX(M,N,K). G
5948* is used to compute the gauges.
5949*
5950* ERR (global output) REAL
5951* On exit, ERR specifies the largest error in absolute value.
5952*
5953* INFO (global output) INTEGER
5954* On exit, if INFO <> 0, the result is less than half accurate.
5955*
5956* -- Written on April 1, 1998 by
5957* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5958*
5959* =====================================================================
5960*
5961* .. Parameters ..
5962 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5963 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5964 $ RSRC_
5965 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
5966 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5967 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5968 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5969 REAL RZERO, RONE
5970 parameter( rzero = 0.0e+0, rone = 1.0e+0 )
5971 COMPLEX ZERO
5972 parameter( zero = ( 0.0e+0, 0.0e+0 ) )
5973* ..
5974* .. Local Scalars ..
5975 LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER
5976 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5977 $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5978 $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5979 REAL EPS, ERRI
5980 COMPLEX Z
5981* ..
5982* .. External Subroutines ..
5983 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5984* ..
5985* .. External Functions ..
5986 LOGICAL LSAME
5987 REAL PSLAMCH
5988 EXTERNAL lsame, pslamch
5989* ..
5990* .. Intrinsic Functions ..
5991 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
5992* ..
5993* .. Statement Functions ..
5994 REAL ABS1
5995 abs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
5996* ..
5997* .. Executable Statements ..
5998*
5999 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6000*
6001 eps = pslamch( ictxt, 'eps' )
6002*
6003 upper = lsame( uplo, 'U' )
6004 notran = lsame( trans, 'N' )
6005 tran = lsame( trans, 'T' )
6006 htran = lsame( trans, 'H' )
6007*
6008 lda = max( 1, desca( m_ ) )
6009 ldc = max( 1, descc( m_ ) )
6010*
6011* Compute expected result in C using data in A, B and C.
6012* Compute gauges in G. This part of the computation is performed
6013* by every process in the grid.
6014*
6015 DO 140 j = 1, n
6016*
6017 IF( upper ) THEN
6018 ibeg = 1
6019 iend = j
6020 ELSE
6021 ibeg = j
6022 iend = n
6023 END IF
6024*
6025 DO 10 i = 1, n
6026 ct( i ) = zero
6027 g( i ) = rzero
6028 10 CONTINUE
6029*
6030 IF( notran ) THEN
6031 DO 30 kk = 1, k
6032 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6033 DO 20 i = ibeg, iend
6034 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6035 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
6036 g( i ) = g( i ) + abs1( a( ioffak ) ) *
6037 $ abs1( a( ioffan ) )
6038 20 CONTINUE
6039 30 CONTINUE
6040 ELSE IF( tran ) THEN
6041 DO 50 kk = 1, k
6042 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6043 DO 40 i = ibeg, iend
6044 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6045 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
6046 g( i ) = g( i ) + abs1( a( ioffak ) ) *
6047 $ abs1( a( ioffan ) )
6048 40 CONTINUE
6049 50 CONTINUE
6050 ELSE IF( htran ) THEN
6051 DO 70 kk = 1, k
6052 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6053 DO 60 i = ibeg, iend
6054 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6055 ct( i ) = ct( i ) + a( ioffan ) *
6056 $ conjg( a( ioffak ) )
6057 g( i ) = g( i ) + abs1( a( ioffak ) ) *
6058 $ abs1( a( ioffan ) )
6059 60 CONTINUE
6060 70 CONTINUE
6061 ELSE
6062 DO 90 kk = 1, k
6063 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6064 DO 80 i = ibeg, iend
6065 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6066 ct( i ) = ct( i ) + conjg( a( ioffan ) ) * a( ioffak )
6067 g( i ) = g( i ) + abs1( conjg( a( ioffan ) ) ) *
6068 $ abs1( a( ioffak ) )
6069 80 CONTINUE
6070 90 CONTINUE
6071 END IF
6072*
6073 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6074*
6075 DO 100 i = ibeg, iend
6076 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
6077 g( i ) = abs1( alpha )*g( i ) +
6078 $ abs1( beta )*abs1( c( ioffc ) )
6079 c( ioffc ) = ct( i )
6080 ioffc = ioffc + 1
6081 100 CONTINUE
6082*
6083* Compute the error ratio for this result.
6084*
6085 err = rzero
6086 info = 0
6087 ldpc = descc( lld_ )
6088 ioffc = ic + ( jc + j - 2 ) * ldc
6089 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6090 $ iic, jjc, icrow, iccol )
6091 icurrow = icrow
6092 rowrep = ( icrow.EQ.-1 )
6093 colrep = ( iccol.EQ.-1 )
6094*
6095 IF( mycol.EQ.iccol .OR. colrep ) THEN
6096*
6097 ibb = descc( imb_ ) - ic + 1
6098 IF( ibb.LE.0 )
6099 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6100 ibb = min( ibb, n )
6101 in = ic + ibb - 1
6102*
6103 DO 110 i = ic, in
6104*
6105 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6106 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6107 $ c( ioffc ) ) / eps
6108 IF( g( i-ic+1 ).NE.rzero )
6109 $ erri = erri / g( i-ic+1 )
6110 err = max( err, erri )
6111 IF( err*sqrt( eps ).GE.rone )
6112 $ info = 1
6113 iic = iic + 1
6114 END IF
6115*
6116 ioffc = ioffc + 1
6117*
6118 110 CONTINUE
6119*
6120 icurrow = mod( icurrow+1, nprow )
6121*
6122 DO 130 i = in+1, ic+n-1, descc( mb_ )
6123 ibb = min( ic+n-i, descc( mb_ ) )
6124*
6125 DO 120 kk = 0, ibb-1
6126*
6127 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6128 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6129 $ c( ioffc ) )/eps
6130 IF( g( i+kk-ic+1 ).NE.rzero )
6131 $ erri = erri / g( i+kk-ic+1 )
6132 err = max( err, erri )
6133 IF( err*sqrt( eps ).GE.rone )
6134 $ info = 1
6135 iic = iic + 1
6136 END IF
6137*
6138 ioffc = ioffc + 1
6139*
6140 120 CONTINUE
6141*
6142 icurrow = mod( icurrow+1, nprow )
6143*
6144 130 CONTINUE
6145*
6146 END IF
6147*
6148* If INFO = 0, all results are at least half accurate.
6149*
6150 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6151 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6152 $ mycol )
6153 IF( info.NE.0 )
6154 $ GO TO 150
6155*
6156 140 CONTINUE
6157*
6158 150 CONTINUE
6159*
6160 RETURN
6161*
6162* End of PCMMCH1
6163*
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: