SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pcmmch()

subroutine pcmmch ( integer  ictxt,
character*1  transa,
character*1  transb,
integer  m,
integer  n,
integer  k,
complex  alpha,
complex, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
complex, dimension( * )  b,
integer  ib,
integer  jb,
integer, dimension( * )  descb,
complex  beta,
complex, dimension( * )  c,
complex, dimension( * )  pc,
integer  ic,
integer  jc,
integer, dimension( * )  descc,
complex, dimension( * )  ct,
real, dimension( * )  g,
real  err,
integer  info 
)

Definition at line 5333 of file pcblastst.f.

5336*
5337* -- PBLAS test routine (version 2.0) --
5338* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5339* and University of California, Berkeley.
5340* April 1, 1998
5341*
5342* .. Scalar Arguments ..
5343 CHARACTER*1 TRANSA, TRANSB
5344 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5345 REAL ERR
5346 COMPLEX ALPHA, BETA
5347* ..
5348* .. Array Arguments ..
5349 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5350 REAL G( * )
5351 COMPLEX A( * ), B( * ), C( * ), CT( * ), PC( * )
5352* ..
5353*
5354* Purpose
5355* =======
5356*
5357* PCMMCH checks the results of the computational tests.
5358*
5359* Notes
5360* =====
5361*
5362* A description vector is associated with each 2D block-cyclicly dis-
5363* tributed matrix. This vector stores the information required to
5364* establish the mapping between a matrix entry and its corresponding
5365* process and memory location.
5366*
5367* In the following comments, the character _ should be read as
5368* "of the distributed matrix". Let A be a generic term for any 2D
5369* block cyclicly distributed matrix. Its description vector is DESCA:
5370*
5371* NOTATION STORED IN EXPLANATION
5372* ---------------- --------------- ------------------------------------
5373* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5374* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5375* the NPROW x NPCOL BLACS process grid
5376* A is distributed over. The context
5377* itself is global, but the handle
5378* (the integer value) may vary.
5379* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5380* ted matrix A, M_A >= 0.
5381* N_A (global) DESCA( N_ ) The number of columns in the distri-
5382* buted matrix A, N_A >= 0.
5383* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5384* block of the matrix A, IMB_A > 0.
5385* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5386* left block of the matrix A,
5387* INB_A > 0.
5388* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5389* bute the last M_A-IMB_A rows of A,
5390* MB_A > 0.
5391* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5392* bute the last N_A-INB_A columns of
5393* A, NB_A > 0.
5394* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5395* row of the matrix A is distributed,
5396* NPROW > RSRC_A >= 0.
5397* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5398* first column of A is distributed.
5399* NPCOL > CSRC_A >= 0.
5400* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5401* array storing the local blocks of
5402* the distributed matrix A,
5403* IF( Lc( 1, N_A ) > 0 )
5404* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5405* ELSE
5406* LLD_A >= 1.
5407*
5408* Let K be the number of rows of a matrix A starting at the global in-
5409* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5410* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5411* receive if these K rows were distributed over NPROW processes. If K
5412* is the number of columns of a matrix A starting at the global index
5413* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5414* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5415* these K columns were distributed over NPCOL processes.
5416*
5417* The values of Lr() and Lc() may be determined via a call to the func-
5418* tion PB_NUMROC:
5419* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5420* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5421*
5422* Arguments
5423* =========
5424*
5425* ICTXT (local input) INTEGER
5426* On entry, ICTXT specifies the BLACS context handle, indica-
5427* ting the global context of the operation. The context itself
5428* is global, but the value of ICTXT is local.
5429*
5430* TRANSA (global input) CHARACTER*1
5431* On entry, TRANSA specifies if the matrix operand A is to be
5432* transposed.
5433*
5434* TRANSB (global input) CHARACTER*1
5435* On entry, TRANSB specifies if the matrix operand B is to be
5436* transposed.
5437*
5438* M (global input) INTEGER
5439* On entry, M specifies the number of rows of C.
5440*
5441* N (global input) INTEGER
5442* On entry, N specifies the number of columns of C.
5443*
5444* K (global input) INTEGER
5445* On entry, K specifies the number of columns (resp. rows) of A
5446* when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5447* PxSYR2K, PxHERK and PxHER2K.
5448*
5449* ALPHA (global input) COMPLEX
5450* On entry, ALPHA specifies the scalar alpha.
5451*
5452* A (local input) COMPLEX array
5453* On entry, A is an array of dimension (DESCA( M_ ),*). This
5454* array contains a local copy of the initial entire matrix PA.
5455*
5456* IA (global input) INTEGER
5457* On entry, IA specifies A's global row index, which points to
5458* the beginning of the submatrix sub( A ).
5459*
5460* JA (global input) INTEGER
5461* On entry, JA specifies A's global column index, which points
5462* to the beginning of the submatrix sub( A ).
5463*
5464* DESCA (global and local input) INTEGER array
5465* On entry, DESCA is an integer array of dimension DLEN_. This
5466* is the array descriptor for the matrix A.
5467*
5468* B (local input) COMPLEX array
5469* On entry, B is an array of dimension (DESCB( M_ ),*). This
5470* array contains a local copy of the initial entire matrix PB.
5471*
5472* IB (global input) INTEGER
5473* On entry, IB specifies B's global row index, which points to
5474* the beginning of the submatrix sub( B ).
5475*
5476* JB (global input) INTEGER
5477* On entry, JB specifies B's global column index, which points
5478* to the beginning of the submatrix sub( B ).
5479*
5480* DESCB (global and local input) INTEGER array
5481* On entry, DESCB is an integer array of dimension DLEN_. This
5482* is the array descriptor for the matrix B.
5483*
5484* BETA (global input) COMPLEX
5485* On entry, BETA specifies the scalar beta.
5486*
5487* C (local input/local output) COMPLEX array
5488* On entry, C is an array of dimension (DESCC( M_ ),*). This
5489* array contains a local copy of the initial entire matrix PC.
5490*
5491* PC (local input) COMPLEX array
5492* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5493* array contains the local pieces of the matrix PC.
5494*
5495* IC (global input) INTEGER
5496* On entry, IC specifies C's global row index, which points to
5497* the beginning of the submatrix sub( C ).
5498*
5499* JC (global input) INTEGER
5500* On entry, JC specifies C's global column index, which points
5501* to the beginning of the submatrix sub( C ).
5502*
5503* DESCC (global and local input) INTEGER array
5504* On entry, DESCC is an integer array of dimension DLEN_. This
5505* is the array descriptor for the matrix C.
5506*
5507* CT (workspace) COMPLEX array
5508* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5509* holds a copy of the current column of C.
5510*
5511* G (workspace) REAL array
5512* On entry, G is an array of dimension at least MAX(M,N,K). G
5513* is used to compute the gauges.
5514*
5515* ERR (global output) REAL
5516* On exit, ERR specifies the largest error in absolute value.
5517*
5518* INFO (global output) INTEGER
5519* On exit, if INFO <> 0, the result is less than half accurate.
5520*
5521* -- Written on April 1, 1998 by
5522* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5523*
5524* =====================================================================
5525*
5526* .. Parameters ..
5527 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5528 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5529 $ RSRC_
5530 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
5531 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5532 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5533 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5534 REAL RZERO, RONE
5535 parameter( rzero = 0.0e+0, rone = 1.0e+0 )
5536 COMPLEX ZERO
5537 parameter( zero = ( 0.0e+0, 0.0e+0 ) )
5538* ..
5539* .. Local Scalars ..
5540 LOGICAL COLREP, CTRANA, CTRANB, ROWREP, TRANA, TRANB
5541 INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5542 $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5543 $ MYCOL, MYROW, NPCOL, NPROW
5544 REAL EPS, ERRI
5545 COMPLEX Z
5546* ..
5547* .. External Subroutines ..
5548 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5549* ..
5550* .. External Functions ..
5551 LOGICAL LSAME
5552 REAL PSLAMCH
5553 EXTERNAL lsame, pslamch
5554* ..
5555* .. Intrinsic Functions ..
5556 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
5557* ..
5558* .. Statement Functions ..
5559 REAL ABS1
5560 abs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
5561* ..
5562* .. Executable Statements ..
5563*
5564 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5565*
5566 eps = pslamch( ictxt, 'eps' )
5567*
5568 trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5569 tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5570 ctrana = lsame( transa, 'C' )
5571 ctranb = lsame( transb, 'C' )
5572*
5573 lda = max( 1, desca( m_ ) )
5574 ldb = max( 1, descb( m_ ) )
5575 ldc = max( 1, descc( m_ ) )
5576*
5577* Compute expected result in C using data in A, B and C.
5578* Compute gauges in G. This part of the computation is performed
5579* by every process in the grid.
5580*
5581 DO 240 j = 1, n
5582*
5583 ioffc = ic + ( jc + j - 2 ) * ldc
5584 DO 10 i = 1, m
5585 ct( i ) = zero
5586 g( i ) = rzero
5587 10 CONTINUE
5588*
5589 IF( .NOT.trana .AND. .NOT.tranb ) THEN
5590 DO 30 kk = 1, k
5591 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5592 DO 20 i = 1, m
5593 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5594 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5595 g( i ) = g( i ) + abs( a( ioffa ) ) *
5596 $ abs( b( ioffb ) )
5597 20 CONTINUE
5598 30 CONTINUE
5599 ELSE IF( trana .AND. .NOT.tranb ) THEN
5600 IF( ctrana ) THEN
5601 DO 50 kk = 1, k
5602 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5603 DO 40 i = 1, m
5604 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5605 ct( i ) = ct( i ) + conjg( a( ioffa ) ) *
5606 $ b( ioffb )
5607 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5608 $ abs1( b( ioffb ) )
5609 40 CONTINUE
5610 50 CONTINUE
5611 ELSE
5612 DO 70 kk = 1, k
5613 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5614 DO 60 i = 1, m
5615 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5616 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5617 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5618 $ abs1( b( ioffb ) )
5619 60 CONTINUE
5620 70 CONTINUE
5621 END IF
5622 ELSE IF( .NOT.trana .AND. tranb ) THEN
5623 IF( ctranb ) THEN
5624 DO 90 kk = 1, k
5625 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5626 DO 80 i = 1, m
5627 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5628 ct( i ) = ct( i ) + a( ioffa ) *
5629 $ conjg( b( ioffb ) )
5630 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5631 $ abs1( b( ioffb ) )
5632 80 CONTINUE
5633 90 CONTINUE
5634 ELSE
5635 DO 110 kk = 1, k
5636 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5637 DO 100 i = 1, m
5638 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5639 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5640 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5641 $ abs1( b( ioffb ) )
5642 100 CONTINUE
5643 110 CONTINUE
5644 END IF
5645 ELSE IF( trana .AND. tranb ) THEN
5646 IF( ctrana ) THEN
5647 IF( ctranb ) THEN
5648 DO 130 kk = 1, k
5649 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5650 DO 120 i = 1, m
5651 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5652 ct( i ) = ct( i ) + conjg( a( ioffa ) ) *
5653 $ conjg( b( ioffb ) )
5654 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5655 $ abs1( b( ioffb ) )
5656 120 CONTINUE
5657 130 CONTINUE
5658 ELSE
5659 DO 150 kk = 1, k
5660 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5661 DO 140 i = 1, m
5662 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5663 ct( i ) = ct( i ) + conjg( a( ioffa ) ) *
5664 $ b( ioffb )
5665 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5666 $ abs1( b( ioffb ) )
5667 140 CONTINUE
5668 150 CONTINUE
5669 END IF
5670 ELSE
5671 IF( ctranb ) THEN
5672 DO 170 kk = 1, k
5673 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5674 DO 160 i = 1, m
5675 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5676 ct( i ) = ct( i ) + a( ioffa ) *
5677 $ conjg( b( ioffb ) )
5678 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5679 $ abs1( b( ioffb ) )
5680 160 CONTINUE
5681 170 CONTINUE
5682 ELSE
5683 DO 190 kk = 1, k
5684 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5685 DO 180 i = 1, m
5686 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5687 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5688 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5689 $ abs1( b( ioffb ) )
5690 180 CONTINUE
5691 190 CONTINUE
5692 END IF
5693 END IF
5694 END IF
5695*
5696 DO 200 i = 1, m
5697 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5698 g( i ) = abs1( alpha )*g( i ) +
5699 $ abs1( beta )*abs1( c( ioffc ) )
5700 c( ioffc ) = ct( i )
5701 ioffc = ioffc + 1
5702 200 CONTINUE
5703*
5704* Compute the error ratio for this result.
5705*
5706 err = rzero
5707 info = 0
5708 ldpc = descc( lld_ )
5709 ioffc = ic + ( jc + j - 2 ) * ldc
5710 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5711 $ iic, jjc, icrow, iccol )
5712 icurrow = icrow
5713 rowrep = ( icrow.EQ.-1 )
5714 colrep = ( iccol.EQ.-1 )
5715*
5716 IF( mycol.EQ.iccol .OR. colrep ) THEN
5717*
5718 ibb = descc( imb_ ) - ic + 1
5719 IF( ibb.LE.0 )
5720 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5721 ibb = min( ibb, m )
5722 in = ic + ibb - 1
5723*
5724 DO 210 i = ic, in
5725*
5726 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5727 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5728 $ c( ioffc ) ) / eps
5729 IF( g( i-ic+1 ).NE.rzero )
5730 $ erri = erri / g( i-ic+1 )
5731 err = max( err, erri )
5732 IF( err*sqrt( eps ).GE.rone )
5733 $ info = 1
5734 iic = iic + 1
5735 END IF
5736*
5737 ioffc = ioffc + 1
5738*
5739 210 CONTINUE
5740*
5741 icurrow = mod( icurrow+1, nprow )
5742*
5743 DO 230 i = in+1, ic+m-1, descc( mb_ )
5744 ibb = min( ic+m-i, descc( mb_ ) )
5745*
5746 DO 220 kk = 0, ibb-1
5747*
5748 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5749 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5750 $ c( ioffc ) )/eps
5751 IF( g( i+kk-ic+1 ).NE.rzero )
5752 $ erri = erri / g( i+kk-ic+1 )
5753 err = max( err, erri )
5754 IF( err*sqrt( eps ).GE.rone )
5755 $ info = 1
5756 iic = iic + 1
5757 END IF
5758*
5759 ioffc = ioffc + 1
5760*
5761 220 CONTINUE
5762*
5763 icurrow = mod( icurrow+1, nprow )
5764*
5765 230 CONTINUE
5766*
5767 END IF
5768*
5769* If INFO = 0, all results are at least half accurate.
5770*
5771 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5772 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5773 $ mycol )
5774 IF( info.NE.0 )
5775 $ GO TO 250
5776*
5777 240 CONTINUE
5778*
5779 250 CONTINUE
5780*
5781 RETURN
5782*
5783* End of PCMMCH
5784*
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: