ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CptrmmB.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_CptrmmB( PBTYP_T * TYPE, char * DIRECB, char * SIDE,
21  char * UPLO, char * TRANSA, char * DIAG, int M, int N,
22  char * ALPHA, char * A, int IA, int JA, int * DESCA,
23  char * B, int IB, int JB, int * DESCB )
24 #else
25 void PB_CptrmmB( TYPE, DIRECB, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A,
26  IA, JA, DESCA, B, IB, JB, DESCB )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * DIAG, * DIRECB, * SIDE, * TRANSA, * UPLO;
31  int IA, IB, JA, JB, M, N;
32  char * ALPHA;
33  PBTYP_T * TYPE;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCB;
38  char * A, * B;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PB_CptrmmB performs one of the matrix-matrix operations
46 *
47 * sub( B ) := alpha * op( sub( A ) ) * sub( B ),
48 *
49 * or
50 *
51 * sub( B ) := alpha * sub( B ) * op( sub( A ) ),
52 *
53 * where
54 *
55 * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L',
56 * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and,
57 *
58 * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1).
59 *
60 * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a
61 * unit, or non-unit, upper or lower triangular submatrix and op( X ) is
62 * one of
63 *
64 * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ).
65 *
66 * This is the inner-product algorithm using the logical LCM hybrid
67 * and static blocking techniques. The submatrix operand sub( A ) stays
68 * in place.
69 *
70 * Notes
71 * =====
72 *
73 * A description vector is associated with each 2D block-cyclicly dis-
74 * tributed matrix. This vector stores the information required to
75 * establish the mapping between a matrix entry and its corresponding
76 * process and memory location.
77 *
78 * In the following comments, the character _ should be read as
79 * "of the distributed matrix". Let A be a generic term for any 2D
80 * block cyclicly distributed matrix. Its description vector is DESC_A:
81 *
82 * NOTATION STORED IN EXPLANATION
83 * ---------------- --------------- ------------------------------------
84 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
85 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
86 * the NPROW x NPCOL BLACS process grid
87 * A is distributed over. The context
88 * itself is global, but the handle
89 * (the integer value) may vary.
90 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
91 * ted matrix A, M_A >= 0.
92 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
93 * buted matrix A, N_A >= 0.
94 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
95 * block of the matrix A, IMB_A > 0.
96 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
97 * left block of the matrix A,
98 * INB_A > 0.
99 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
100 * bute the last M_A-IMB_A rows of A,
101 * MB_A > 0.
102 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
103 * bute the last N_A-INB_A columns of
104 * A, NB_A > 0.
105 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
106 * row of the matrix A is distributed,
107 * NPROW > RSRC_A >= 0.
108 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
109 * first column of A is distributed.
110 * NPCOL > CSRC_A >= 0.
111 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
112 * array storing the local blocks of
113 * the distributed matrix A,
114 * IF( Lc( 1, N_A ) > 0 )
115 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
116 * ELSE
117 * LLD_A >= 1.
118 *
119 * Let K be the number of rows of a matrix A starting at the global in-
120 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
121 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
122 * receive if these K rows were distributed over NPROW processes. If K
123 * is the number of columns of a matrix A starting at the global index
124 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
125 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
126 * these K columns were distributed over NPCOL processes.
127 *
128 * The values of Lr() and Lc() may be determined via a call to the func-
129 * tion PB_Cnumroc:
130 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
131 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
132 *
133 * Arguments
134 * =========
135 *
136 * TYPE (local input) pointer to a PBTYP_T structure
137 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
138 * that contains type information (See pblas.h).
139 *
140 * DIRECB (global input) pointer to CHAR
141 * On entry, DIRECB specifies the direction in which the rows
142 * or columns of sub( B ) should be looped over as follows:
143 * DIRECB = 'F' or 'f' forward or increasing,
144 * DIRECB = 'B' or 'b' backward or decreasing.
145 *
146 * SIDE (global input) pointer to CHAR
147 * On entry, SIDE specifies whether op( sub( A ) ) multiplies
148 * sub( B ) from the left or right as follows:
149 *
150 * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ),
151 *
152 * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ).
153 *
154 * UPLO (global input) pointer to CHAR
155 * On entry, UPLO specifies whether the submatrix sub( A ) is
156 * an upper or lower triangular submatrix as follows:
157 *
158 * UPLO = 'U' or 'u' sub( A ) is an upper triangular
159 * submatrix,
160 *
161 * UPLO = 'L' or 'l' sub( A ) is a lower triangular
162 * submatrix.
163 *
164 * TRANSA (global input) pointer to CHAR
165 * On entry, TRANSA specifies the form of op( sub( A ) ) to be
166 * used in the matrix multiplication as follows:
167 *
168 * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ),
169 *
170 * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )',
171 *
172 * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ).
173 *
174 * DIAG (global input) pointer to CHAR
175 * On entry, DIAG specifies whether or not sub( A ) is unit
176 * triangular as follows:
177 *
178 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
179 * gular,
180 *
181 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
182 * angular.
183 *
184 * M (global input) INTEGER
185 * On entry, M specifies the number of rows of the submatrix
186 * sub( B ). M must be at least zero.
187 *
188 * N (global input) INTEGER
189 * On entry, N specifies the number of columns of the submatrix
190 * sub( B ). N must be at least zero.
191 *
192 * ALPHA (global input) pointer to CHAR
193 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
194 * supplied as zero then the local entries of the array B
195 * corresponding to the entries of the submatrix sub( B ) need
196 * not be set on input.
197 *
198 * A (local input) pointer to CHAR
199 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
200 * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at
201 * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
202 * contains the local entries of the matrix A.
203 * Before entry with UPLO = 'U' or 'u', this array contains the
204 * local entries corresponding to the entries of the upper tri-
205 * angular submatrix sub( A ), and the local entries correspon-
206 * ding to the entries of the strictly lower triangular part of
207 * the submatrix sub( A ) are not referenced.
208 * Before entry with UPLO = 'L' or 'l', this array contains the
209 * local entries corresponding to the entries of the lower tri-
210 * angular submatrix sub( A ), and the local entries correspon-
211 * ding to the entries of the strictly upper triangular part of
212 * the submatrix sub( A ) are not referenced.
213 * Note that when DIAG = 'U' or 'u', the local entries corres-
214 * ponding to the diagonal elements of the submatrix sub( A )
215 * are not referenced either, but are assumed to be unity.
216 *
217 * IA (global input) INTEGER
218 * On entry, IA specifies A's global row index, which points to
219 * the beginning of the submatrix sub( A ).
220 *
221 * JA (global input) INTEGER
222 * On entry, JA specifies A's global column index, which points
223 * to the beginning of the submatrix sub( A ).
224 *
225 * DESCA (global and local input) INTEGER array
226 * On entry, DESCA is an integer array of dimension DLEN_. This
227 * is the array descriptor for the matrix A.
228 *
229 * B (local input/local output) pointer to CHAR
230 * On entry, B is an array of dimension (LLD_B, Kb), where Kb is
231 * at least Lc( 1, JB+N-1 ). Before entry, this array contains
232 * the local entries of the matrix B.
233 * On exit, the local entries of this array corresponding to the
234 * to the entries of the submatrix sub( B ) are overwritten by
235 * the local entries of the m by n transformed submatrix.
236 *
237 * IB (global input) INTEGER
238 * On entry, IB specifies B's global row index, which points to
239 * the beginning of the submatrix sub( B ).
240 *
241 * JB (global input) INTEGER
242 * On entry, JB specifies B's global column index, which points
243 * to the beginning of the submatrix sub( B ).
244 *
245 * DESCB (global and local input) INTEGER array
246 * On entry, DESCB is an integer array of dimension DLEN_. This
247 * is the array descriptor for the matrix B.
248 *
249 * -- Written on April 1, 1998 by
250 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
251 *
252 * ---------------------------------------------------------------------
253 */
254 /*
255 * .. Local Scalars ..
256 */
257  char Broc, GemmTa, GemmTb, TranOp, WBroc, WCroc, conjg, * one,
258  * talpha, * tbeta, top, * zero;
259  int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq,
260  Alq0, Amb, Amp, An, Anb, Anq, Arow, BcurrocR, Bfwd, BiiD,
261  BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR, BnD,
262  BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR,
263  BsrcR, LNorRT, WBfr, WBld, WCfr, WCld, WCpbY, WCsum, ctxt,
264  l, lb, lside, ltmp, mycol, myrow, n, nb, nbb, notran, npcol,
265  nprow, p=0, size, tmp, upper;
266  GEMM_T gemm;
267  GSUM2D_T gsum2d;
268 /*
269 * .. Local Arrays ..
270 */
271  int Ad0[DLEN_], DBUFB[DLEN_], WCd[DLEN_], WBd[DLEN_];
272  char * Aptr = NULL, * Bptr = NULL, * WB = NULL, * WC = NULL;
273 /* ..
274 * .. Executable Statements ..
275 *
276 */
277 /*
278 * Retrieve process grid information
279 */
280  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
281 
282  Bfwd = ( Mupcase( DIRECB[0] ) == CFORWARD );
283  lside = ( Mupcase( SIDE [0] ) == CLEFT );
284  upper = ( Mupcase( UPLO [0] ) == CUPPER );
285  notran = ( ( TranOp = Mupcase( TRANSA[0] ) ) == CNOTRAN );
286  LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
287 
288  size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
289  gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d;
290  nb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
291 /*
292 * Compute local information for sub( A ) and sub( B )
293 */
294  if( lside )
295  {
296  BnD = An = M; BnR = N; Broc = CCOLUMN;
297  BmyprocD = myrow; BnprocsD = nprow;
298  BmyprocR = mycol; BnprocsR = npcol;
299  BnbD = DESCB[MB_ ]; BnbR = DESCB[NB_ ];
300  BsrcR = DESCB[CSRC_]; Bld = DESCB[LLD_];
301  PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
302  &BiiD, &BiiR, &BrocD, &BrocR );
303  Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD );
304  Binb1R = PB_Cfirstnb( BnR, JB, DESCB[INB_], BnbR );
305  }
306  else
307  {
308  BnD = An = N; BnR = M; Broc = CROW;
309  BmyprocD = mycol; BnprocsD = npcol;
310  BmyprocR = myrow; BnprocsR = nprow;
311  BnbR = DESCB[MB_ ]; BnbD = DESCB[NB_ ];
312  BsrcR = DESCB[RSRC_]; Bld = DESCB[LLD_];
313  PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
314  &BiiR, &BiiD, &BrocR, &BrocD );
315  Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD );
316  Binb1R = PB_Cfirstnb( BnR, IB, DESCB[IMB_], BnbR );
317  }
318 /*
319 * Compute descriptor Ad0 for sub( A )
320 */
321  PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
322  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
323 
324  Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
325  Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
326  if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( A, Aii, Ajj, Ald, size );
327 /*
328 * Compute conjugate of alpha for the conjugate transpose cases
329 */
330  if( TranOp == CCOTRAN )
331  {
332  conjg = CCONJG; talpha = PB_Cmalloc( size );
333  PB_Cconjg( TYPE, ALPHA, talpha );
334  }
335  else { conjg = CNOCONJG; talpha = ALPHA; }
336 /*
337 * Retrieve BLACS combine topology, set the transpose parameters to be passed
338 * to the BLAS matrix multiply routine and finally describe the form of the
339 * input and output operands.
340 */
341  if( LNorRT )
342  {
343  top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
344  GemmTa = CNOTRAN; GemmTb = ( lside ? CTRAN : TranOp );
345  WCroc = CCOLUMN; WBroc = CROW;
346  }
347  else
348  {
349  top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
350  GemmTb = CNOTRAN; GemmTa = ( lside ? TranOp : CTRAN );
351  WCroc = CROW; WBroc = CCOLUMN;
352  }
353 /*
354 * Computational partitioning size is computed as the product of the logical
355 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
356 */
357  Alcmb = 2 * nb * PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
358  ( Acol >= 0 ? npcol : 1 ) );
359 /*
360 * When sub( B ) is not replicated and backward pass on sub( B ), find the
361 * virtual process p owning the last row or column of sub( B ).
362 */
363  if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd )
364  {
365  tmp = PB_Cindxg2p( BnR-1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
366  p = MModSub( tmp, BrocR, BnprocsR );
367  }
368 /*
369 * Loop over the processes rows or columns owning the BnR rows or columns of
370 * sub( B ) to be processed.
371 */
372  n = BnR;
373 
374  while( n > 0 )
375  {
376 /*
377 * Find out who is the active process row or column as well as the number of
378 * rows or columns of sub( B ) it owns.
379 */
380  BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, BnprocsR ) );
381  BnpR = PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
382 
383  n -= BnpR;
384 /*
385 * Re-adjust the number of rows or columns to be handled at each step, in order
386 * to average the message sizes and the computational granularity.
387 */
388  if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 );
389 
390  while( BnpR )
391  {
392  nbb = MIN( nbb, BnpR );
393 /*
394 * Describe the local contiguous panel of sub( B )
395 */
396  if( lside )
397  {
398  PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD,
399  BcurrocR, ctxt, Bld );
400  if( BisR || ( BmyprocR == BcurrocR ) )
401  Bptr = Mptr( B, BiiD, BiiR, Bld, size );
402  }
403  else
404  {
405  PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR,
406  BrocD, ctxt, Bld );
407  if( BisR || ( BmyprocR == BcurrocR ) )
408  Bptr = Mptr( B, BiiR, BiiD, Bld, size );
409  }
410 /*
411 * Replicate this panel in the process rows or columns spanned by sub( A ): WB
412 */
413  PB_CInV( TYPE, NOCONJG, &WBroc, An, An, Ad0, nbb, Bptr, 0, 0, DBUFB,
414  &Broc, &WB, WBd, &WBfr );
415 /*
416 * Reuse sub( B ) and/or create vector WC in process columns or rows spanned by
417 * sub( A )
418 */
419  PB_CInOutV( TYPE, &WCroc, An, An, Ad0, nbb, one, Bptr, 0, 0, DBUFB,
420  &Broc, &tbeta, &WC, WCd, &WCfr, &WCsum, &WCpbY );
421 /*
422 * When the input data is first transposed, zero it now for later accumulation
423 */
424  if( notran )
425  PB_Cplapad( TYPE, ALL, NOCONJG, DBUFB[M_], DBUFB[N_], zero, zero,
426  Bptr, 0, 0, DBUFB );
427 /*
428 * Local matrix-matrix multiply iff I own some data
429 */
430  Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
431  Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_];
432  Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
433  Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
434 
435  WCld = WCd[LLD_];
436 
437  if( ( Amp > 0 ) && ( Anq > 0 ) )
438  {
439  WBld = WBd[LLD_];
440 
441  if( upper )
442  {
443 /*
444 * sub( A ) is upper triangular
445 */
446  if( LNorRT )
447  {
448  for( l = 0; l < An; l += Alcmb )
449  {
450  lb = An - l; lb = MIN( lb, Alcmb );
451  Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
452  Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
453  if( Alp > 0 )
454  {
455  Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
456  npcol );
457  gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Alp,
458  &nbb, &Alq0, talpha, Mptr( Aptr, 0, Alq, Ald,
459  size ), &Ald, Mptr( WB, 0, Alq, WBld, size ),
460  &WBld, one, WC, &WCld );
461  }
462  PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
463  talpha, Aptr, l, l, Ad0, Mptr( WB, 0, Alq, WBld,
464  size ), WBld, Mptr( WC, Alp, 0, WCld, size ),
465  WCld, PB_Ctztrmm );
466  }
467  }
468  else
469  {
470  for( l = 0; l < An; l += Alcmb )
471  {
472  lb = An - l; lb = MIN( lb, Alcmb );
473  Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
474  Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
475  Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol );
476  if( Alq0 > 0 )
477  gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb,
478  &Alq0, &Alp, talpha, WB, &WBld, Mptr( Aptr, 0,
479  Alq, Ald, size ), &Ald, one, Mptr( WC, 0, Alq,
480  WCld, size ), &WCld );
481  PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
482  talpha, Aptr, l, l, Ad0, Mptr( WB, Alp, 0, WBld,
483  size ), WBld, Mptr( WC, 0, Alq, WCld, size ),
484  WCld, PB_Ctztrmm );
485  }
486  }
487  }
488  else
489  {
490 /*
491 * sub( A ) is lower triangular
492 */
493  if( LNorRT )
494  {
495  for( l = 0; l < An; l += Alcmb )
496  {
497  lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) );
498  Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
499  Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
500  PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
501  talpha, Aptr, l, l, Ad0, Mptr( WB, 0, Alq, WBld,
502  size ), WBld, Mptr( WC, Alp, 0, WCld, size ),
503  WCld, PB_Ctztrmm );
504  Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
505  nprow );
506  Alp0 = Amp - Alp;
507  Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
508  npcol );
509  if( Alp0 > 0 )
510  gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Alp0,
511  &nbb, &Alq0, talpha, Mptr( Aptr, Alp, Alq, Ald,
512  size ), &Ald, Mptr( WB, 0, Alq, WBld, size ),
513  &WBld, one, Mptr( WC, Alp, 0, WCld, size ),
514  &WCld );
515  }
516  }
517  else
518  {
519  for( l = 0; l < An; l += Alcmb )
520  {
521  lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) );
522  Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
523  Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
524  PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
525  talpha, Aptr, l, l, Ad0, Mptr( WB, Alp, 0, WBld,
526  size ), WBld, Mptr( WC, 0, Alq, WCld, size ),
527  WCld, PB_Ctztrmm );
528  Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
529  nprow );
530  Alp0 = Amp - Alp;
531  Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
532  npcol );
533  if( Alq0 > 0 )
534  gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb,
535  &Alq0, &Alp0, talpha, Mptr( WB, Alp, 0, WBld,
536  size ), &WBld, Mptr( Aptr, Alp, Alq, Ald, size ),
537  &Ald, one, Mptr( WC, 0, Alq, WCld, size ),
538  &WCld );
539  }
540  }
541  }
542  }
543  if( WBfr ) free( WB );
544 
545  if( LNorRT )
546  {
547 /*
548 * Combine the partial column results into WC
549 */
550  if( WCsum && ( Amp > 0 ) )
551  gsum2d( ctxt, ROW, &top, Amp, nbb, WC, WCld, myrow, WCd[CSRC_] );
552 /*
553 * sub( B ) := WC (if necessary)
554 */
555  if( WCpbY )
556  PB_Cpaxpby( TYPE, &conjg, An, nbb, one, WC, 0, 0, WCd, &WCroc,
557  zero, Bptr, 0, 0, DBUFB, &Broc );
558  }
559  else
560  {
561 /*
562 * Combine the partial row results into WC
563 */
564  if( WCsum && ( Anq > 0 ) )
565  gsum2d( ctxt, COLUMN, &top, nbb, Anq, WC, WCld, WCd[RSRC_],
566  mycol );
567 /*
568 * sub( B ) := WC (if necessary)
569 */
570  if( WCpbY )
571  PB_Cpaxpby( TYPE, &conjg, nbb, An, one, WC, 0, 0, WCd, &WCroc,
572  zero, Bptr, 0, 0, DBUFB, &Broc );
573  }
574  if( WCfr ) free( WC );
575 /*
576 * Go to the next contiguous panel if any residing in this process row or column
577 */
578  BnpR -= nbb;
579 
580  if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb;
581  }
582 /*
583 * Go to next or previous process row or column owning some of sub( B )
584 */
585  if( !BisR )
586  p = ( Bfwd ? MModAdd1( p, BnprocsR ) : MModSub1( p, BnprocsR ) );
587  }
588 
589  if( TranOp == CCOTRAN ) free( talpha );
590 /*
591 * End of PB_CptrmmB
592 */
593 }
M_
#define M_
Definition: PBtools.h:39
CCONJG
#define CCONJG
Definition: PBblas.h:21
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
MB_
#define MB_
Definition: PBtools.h:43
PB_Cpaxpby
void PB_Cpaxpby()
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
CSRC_
#define CSRC_
Definition: PBtools.h:46
PB_Cconjg
void PB_Cconjg()
CCOTRAN
#define CCOTRAN
Definition: PBblas.h:22
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
PB_CptrmmB
void PB_CptrmmB(PBTYP_T *TYPE, char *DIRECB, char *SIDE, char *UPLO, char *TRANSA, char *DIAG, int M, int N, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *B, int IB, int JB, int *DESCB)
Definition: PB_CptrmmB.c:25
PB_Cfirstnb
int PB_Cfirstnb()
DLEN_
#define DLEN_
Definition: PBtools.h:48
GEMM_T
F_VOID_FCT(* GEMM_T)()
Definition: pblas.h:313
PB_Ctztrmm
void PB_Ctztrmm()
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cdescribe
void PB_Cdescribe()
CNOCONJG
#define CNOCONJG
Definition: PBblas.h:19
MModAdd
#define MModAdd(I1, I2, d)
Definition: PBtools.h:97
CROW
#define CROW
Definition: PBblacs.h:21
IMB_
#define IMB_
Definition: PBtools.h:41
MModSub
#define MModSub(I1, I2, d)
Definition: PBtools.h:102
pilaenv_
int pilaenv_()
PB_Cdescset
void PB_Cdescset()
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
PB_Cplapad
void PB_Cplapad()
PB_Cindxg2p
int PB_Cindxg2p()
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
GSUM2D_T
void(* GSUM2D_T)()
Definition: pblas.h:282
PB_Ctop
char * PB_Ctop()
RSRC_
#define RSRC_
Definition: PBtools.h:45
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
COMBINE
#define COMBINE
Definition: PBblacs.h:49
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
CFORWARD
#define CFORWARD
Definition: PBblas.h:38
PB_CInV
void PB_CInV()
N_
#define N_
Definition: PBtools.h:40
ALL
#define ALL
Definition: PBblas.h:50
PB_CInOutV
void PB_CInOutV()
PB_Cptrm
void PB_Cptrm()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
CCOLUMN
#define CCOLUMN
Definition: PBblacs.h:20
INB_
#define INB_
Definition: PBtools.h:42
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
CTRAN
#define CTRAN
Definition: PBblas.h:20
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CLEFT
#define CLEFT
Definition: PBblas.h:29
CTXT_
#define CTXT_
Definition: PBtools.h:38
PB_Clcm
int PB_Clcm()