SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cptrsm.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cptrsm( PBTYP_T * TYPE, Int FBCAST, char * SIDE, char * UPLO,
21 char * TRANS, char * DIAG, Int M, Int N, char * ALPHA,
22 char * A, Int IA, Int JA, Int * DESCA, char * BC,
23 Int LDBC, char * BR, Int LDBR )
24#else
25void PB_Cptrsm( TYPE, FBCAST, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA,
26 A, IA, JA, DESCA, BC, LDBC, BR, LDBR )
27/*
28* .. Scalar Arguments ..
29*/
30 char * ALPHA, * DIAG, * SIDE, * TRANS, * UPLO;
31 Int FBCAST, IA, JA, LDBC, LDBR, M, N;
32 PBTYP_T * TYPE;
33/*
34* .. Array Arguments ..
35*/
36 Int * DESCA;
37 char * A, * BC, * BR;
38#endif
39{
40/*
41* Purpose
42* =======
43*
44* PB_Cptrsm solves one of the matrix equations
45*
46* op( sub( A ) ) * X = B, or X * op( sub( A ) ) = alpha * B,
47*
48* where
49*
50* sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L',
51* A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R'.
52*
53* X and B are m by n submatrices, sub( A ) is a unit, or non-unit,
54* upper or lower triangular submatrix and op( Y ) is one of
55*
56* op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ).
57*
58* The submatrix X is overwritten on B.
59*
60* No test for singularity or near-singularity is included in this
61* routine. Such tests must be performed before calling this routine.
62*
63* Notes
64* =====
65*
66* A description vector is associated with each 2D block-cyclicly dis-
67* tributed matrix. This vector stores the information required to
68* establish the mapping between a matrix entry and its corresponding
69* process and memory location.
70*
71* In the following comments, the character _ should be read as
72* "of the distributed matrix". Let A be a generic term for any 2D
73* block cyclicly distributed matrix. Its description vector is DESC_A:
74*
75* NOTATION STORED IN EXPLANATION
76* ---------------- --------------- ------------------------------------
77* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79* the NPROW x NPCOL BLACS process grid
80* A is distributed over. The context
81* itself is global, but the handle
82* (the integer value) may vary.
83* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84* ted matrix A, M_A >= 0.
85* N_A (global) DESCA[ N_ ] The number of columns in the distri-
86* buted matrix A, N_A >= 0.
87* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88* block of the matrix A, IMB_A > 0.
89* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90* left block of the matrix A,
91* INB_A > 0.
92* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93* bute the last M_A-IMB_A rows of A,
94* MB_A > 0.
95* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96* bute the last N_A-INB_A columns of
97* A, NB_A > 0.
98* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99* row of the matrix A is distributed,
100* NPROW > RSRC_A >= 0.
101* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102* first column of A is distributed.
103* NPCOL > CSRC_A >= 0.
104* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105* array storing the local blocks of
106* the distributed matrix A,
107* IF( Lc( 1, N_A ) > 0 )
108* LLD_A >= MAX( 1, Lr( 1, M_A ) )
109* ELSE
110* LLD_A >= 1.
111*
112* Let K be the number of rows of a matrix A starting at the global in-
113* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115* receive if these K rows were distributed over NPROW processes. If K
116* is the number of columns of a matrix A starting at the global index
117* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119* these K columns were distributed over NPCOL processes.
120*
121* The values of Lr() and Lc() may be determined via a call to the func-
122* tion PB_Cnumroc:
123* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125*
126* Arguments
127* =========
128*
129* TYPE (local input) pointer to a PBTYP_T structure
130* On entry, TYPE is a pointer to a structure of type PBTYP_T,
131* that contains type information (See pblas.h).
132*
133* FBCAST (global input) INTEGER
134* On entry, FBCAST specifies whether the transposed of the vec-
135* tor solution should be broadcast or not when there is a pos-
136* sible ambiguity, i.e. when sub( A ) is just one block. When
137* FBCAST is zero, the solution vector is not broadcast, and the
138* the solution vector is broadcast otherwise.
139*
140* SIDE (global input) pointer to CHAR
141* On entry, SIDE specifies whether op( sub( A ) ) appears on
142* the left or right of X as follows:
143*
144* SIDE = 'L' or 'l' op( sub( A ) ) * X = B,
145*
146* SIDE = 'R' or 'r' X * op( sub( A ) ) = B.
147*
148* UPLO (global input) pointer to CHAR
149* On entry, UPLO specifies whether the submatrix sub( A ) is
150* an upper or lower triangular submatrix as follows:
151*
152* UPLO = 'U' or 'u' sub( A ) is an upper triangular
153* submatrix,
154*
155* UPLO = 'L' or 'l' sub( A ) is a lower triangular
156* submatrix.
157*
158* TRANS (global input) pointer to CHAR
159* On entry, TRANS specifies the operation to be performed as
160* follows:
161*
162* TRANS = 'N' or 'n' sub( A ) * X = B,
163*
164* TRANS = 'T' or 't' sub( A )' * X = B,
165*
166* TRANS = 'C' or 'c' conjg( sub( A )' ) * X = B.
167*
168* DIAG (global input) pointer to CHAR
169* On entry, DIAG specifies whether or not sub( A ) is unit
170* triangular as follows:
171*
172* DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
173* gular,
174*
175* DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
176* angular.
177*
178* M (global input) INTEGER
179* On entry, M specifies the number of rows of the submatrix B.
180* M must be at least zero.
181*
182* N (global input) INTEGER
183* On entry, N specifies the number of columns of the submatrix
184* B. N must be at least zero.
185*
186* A (local input) pointer to CHAR
187* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
188* at least Lc( 0, JA+M-1 ) when SIDE = 'L' or 'l' and is at
189* least Lc( 0, JA+N-1 ) otherwise. Before entry, this array
190* contains the local entries of the matrix A.
191* Before entry with UPLO = 'U' or 'u', this array contains the
192* local entries corresponding to the entries of the upper tri-
193* angular submatrix sub( A ), and the local entries correspon-
194* ding to the entries of the strictly lower triangular part of
195* the submatrix sub( A ) are not referenced.
196* Before entry with UPLO = 'L' or 'l', this array contains the
197* local entries corresponding to the entries of the lower tri-
198* angular submatrix sub( A ), and the local entries correspon-
199* ding to the entries of the strictly upper triangular part of
200* the submatrix sub( A ) are not referenced.
201* Note that when DIAG = 'U' or 'u', the local entries corres-
202* ponding to the diagonal elements of the submatrix sub( A )
203* are not referenced either, but are assumed to be unity.
204*
205* IA (global input) INTEGER
206* On entry, IA specifies A's global row index, which points to
207* the beginning of the submatrix sub( A ).
208*
209* JA (global input) INTEGER
210* On entry, JA specifies A's global column index, which points
211* to the beginning of the submatrix sub( A ).
212*
213* DESCA (global and local input) INTEGER array
214* On entry, DESCA is an integer array of dimension DLEN_. This
215* is the array descriptor for the matrix A.
216*
217* BC (local input/local output) pointer to CHAR
218* On entry, BC is an array of dimension (LDBC,Kbc), where Kbc
219* is at least N when SIDE is 'L' or 'l' and at least M other-
220* wise. Before entry, when SIDE is 'L' or 'l' and TRANS is 'N'
221* or 'n' or SIDE is 'R' or 'r' and TRANS is not 'N' or 'n',
222* this array contains the local entries of the right-hand-side
223* matrix B. Otherwise, the entries of BC should be zero. On
224* exit, this array contains the partial solution matrix X.
225*
226* LDBC (local input) INTEGER
227* On entry, LDBC specifies the leading dimension of the array
228* BC. LDBC must be at least MAX( 1, Lr( IA, M ) ) when SIDE
229* is 'L' or 'l' and at least MAX( 1, Lr( IA, N ) ) otherwise.
230*
231* BR (local input/local output) pointer to CHAR
232* On entry, BR is an array of dimension (LDBR,Kbr), where Kbr
233* is at least Lc( JA, M ) when SIDE is 'L' or 'l' and at least
234* Lc( JA, N ) otherwise. Before entry, when SIDE is 'L' or 'l'
235* and TRANS is 'N' or 'n' or SIDE is 'R' or 'r' and TRANS is
236* not 'N' or 'n', the entries of BR should be zero. Otherwise,
237* this array contains the local entries of the right-hand-side
238* matrix B. On exit, this array contains the partial solution
239* matrix X.
240*
241* LDBR (local input) INTEGER
242* On entry, LDBR specifies the leading dimension of the array
243* BR. LDBR must be at least MAX( 1, N ) when SIDE is 'L' or 'l'
244* and at least MAX( 1, M ) otherwise.
245*
246* -- Written on April 1, 1998 by
247* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
248*
249* ---------------------------------------------------------------------
250*/
251/*
252* .. Local Scalars ..
253*/
254 char btop, * negone, * one, * talpha1, * talpha2, * zero;
255 Int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep,
256 AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp,
257 Anq, Arow, Asrc, ChangeRoc=0, LNorRT, Na, Nb, bcst, ctxt,
258 izero=0, k=0, kb, kbprev=0, kbsize, lside, mb1, mycol, myrow,
259 n1, n1last, n1p, n1pprev=0, nb1, nlast, notran, npcol, nprow,
260 rocprev, size, tmp1, tmp2;
261 MMADD_T add, tadd;
262 TZPAD_T pad;
263 GEMM_T gemm;
264 TRSM_T trsm;
265 GESD2D_T send;
266 GERV2D_T recv;
267 GEBS2D_T bsend;
268 GEBR2D_T brecv;
269/*
270* .. Local Arrays ..
271*/
272 char * Aprev = NULL, * Bd = NULL, * Bdprev = NULL,
273 * Bprev = NULL, * work = NULL;
274/* ..
275* .. Executable Statements ..
276*
277*/
278 if( ( M <= 0 ) || ( N <= 0 ) ) return;
279/*
280* Retrieve process grid information
281*/
282 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
283
284 lside = ( Mupcase( SIDE [0] ) == CLEFT );
285 notran = ( Mupcase( TRANS[0] ) == CNOTRAN );
286 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
287 if( LNorRT ) { Na = M; Nb = N; } else { Na = N; Nb = M; }
288/*
289* Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
290*/
291 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
292 &Acol );
293/*
294* Determine if sub( A ) spans more than one process row, and/or more than one
295* process column.
296*/
297 Amb = DESCA[MB_]; Anb = DESCA[NB_]; Ald = DESCA[LLD_ ];
298 Aimb1 = PB_Cfirstnb( Na, IA, DESCA[IMB_], Amb );
299 Anp = PB_Cnumroc( Na, 0, Aimb1, Amb, myrow, Arow, nprow );
300 Ais1Row = !( PB_Cspan( Na, 0, Aimb1, Amb, Arow, nprow ) );
301 Ainb1 = PB_Cfirstnb( Na, JA, DESCA[INB_], Anb );
302 Anq = PB_Cnumroc( Na, 0, Ainb1, Anb, mycol, Acol, npcol );
303 Ais1Col = !( PB_Cspan( Na, 0, Ainb1, Anb, Acol, npcol ) );
304/*
305* When sub( A ) spans only one process, solve the system locally and return.
306*/
307 if( Ais1Row && Ais1Col )
308 {
309 if( LNorRT )
310 {
311 if( Anq > 0 )
312 {
313 if( Anp > 0 )
314 {
315 TYPE->Ftrsm( C2F_CHAR( ( notran ? SIDE : ( lside ? RIGHT :
316 LEFT ) ) ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
317 C2F_CHAR( DIAG ), &M, &N, ALPHA, Mptr( A, Aii, Ajj,
318 Ald, TYPE->size ), &Ald, BC, &LDBC );
319 TYPE->Fmmtadd( &M, &N, TYPE->one, BC, &LDBC, TYPE->zero, BR,
320 &LDBR );
321 }
322 if( ( Arow >= 0 ) && FBCAST )
323 {
324 btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
325 if( myrow == Arow )
326 TYPE->Cgebs2d( ctxt, COLUMN, &btop, N, M, BR, LDBR );
327 else
328 TYPE->Cgebr2d( ctxt, COLUMN, &btop, N, M, BR, LDBR, Arow,
329 mycol );
330 }
331 }
332 }
333 else
334 {
335 if( Anp > 0 )
336 {
337 if( Anq > 0 )
338 {
339 TYPE->Ftrsm( C2F_CHAR( ( notran ? SIDE : ( lside ? RIGHT :
340 LEFT ) ) ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
341 C2F_CHAR( DIAG ), &M, &N, ALPHA, Mptr( A, Aii, Ajj,
342 Ald, TYPE->size ), &Ald, BR, &LDBR );
343 TYPE->Fmmtadd( &M, &N, TYPE->one, BR, &LDBR, TYPE->zero, BC,
344 &LDBC );
345 }
346 if( ( Acol >= 0 ) && FBCAST )
347 {
348 btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
349 if( mycol == Acol )
350 TYPE->Cgebs2d( ctxt, ROW, &btop, N, M, BC, LDBC );
351 else
352 TYPE->Cgebr2d( ctxt, ROW, &btop, N, M, BC, LDBC, myrow,
353 Acol );
354 }
355 }
356 }
357 return;
358 }
359/*
360* Retrieve from TYPE structure useful BLAS and BLACS functions.
361*/
362 size = TYPE->size;
363 negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero;
364 add = TYPE->Fmmadd; tadd = TYPE->Fmmtadd; pad = TYPE->Ftzpad;
365 gemm = TYPE->Fgemm; trsm = TYPE->Ftrsm;
366 send = TYPE->Cgesd2d; recv = TYPE->Cgerv2d;
367 bsend = TYPE->Cgebs2d; brecv = TYPE->Cgebr2d;
368
369 if( ( Anp > 0 ) && ( Anq > 0 ) ) A = Mptr( A, Aii, Ajj, Ald, size );
370
371 if( LNorRT )
372 {
373/*
374* Left - No tran or Right - (co)Trans
375*/
376 if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) &&
377 ( myrow != Arow ) ) ) ) return;
378 btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
379 bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) );
380 AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) );
381
382 if( Mupcase( UPLO[0] ) == CUPPER )
383 {
384/*
385* Initiate lookahead
386*/
387 nlast = ( npcol - 1 ) * Anb;
388 n1 = MAX( nlast, Anb );
389 nlast += Ainb1;
390 n1last = n1 - Anb + MAX( Ainb1, Anb );
391 work = PB_Cmalloc( Nb * MIN( n1last, Anp ) * size );
392 tmp1 = Na-1;
393 Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
394 Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
395 rocprev = Alcol; Anpprev = Anp; Bprev = BC; Bdprev = BR;
396 Aprev = A = Mptr( A, 0, Anq, Ald, size );
397 mb1 = PB_Clastnb( Na, 0, Aimb1, Amb );
398 nb1 = PB_Clastnb( Na, 0, Ainb1, Anb );
399 tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
400 n1 = ( ( Ais1Col || ( Na - nb1 < nlast ) ) ? n1last : n1 );
401 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
402 Asrc = Arow;
403 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
404 nprow );
405 talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Alcol ) ) ?
406 ALPHA : one );
407 while( Na > 0 )
408 {
409 kbsize = kb * size;
410
411 if( Ais1Col || ( mycol == Alcol ) )
412 { A -= Ald*kbsize; Anq -= kb; Bd = Mptr( BR, 0, Anq, LDBR, size ); }
413 if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; }
414/*
415* Partial update of previous block
416*/
417 if( n1pprev > 0 )
418 {
419 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
420 {
421 tmp1 = ( Anpprev - n1pprev ) * size;
422 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &n1pprev, &Nb,
423 &kbprev, negone, Aprev+tmp1, &Ald, Bdprev, &LDBR,
424 talpha1, Bprev+tmp1, &LDBC );
425 }
426/*
427* Send partial updated result to current column
428*/
429 if( !( Ais1Col ) && ChangeRoc )
430 {
431 if( mycol == rocprev )
432 {
433 send( ctxt, n1pprev, Nb, Mptr( Bprev, Anpprev-n1pprev, 0,
434 LDBC, size ), LDBC, myrow, Alcol );
435 }
436 else if( mycol == Alcol )
437 {
438 recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev );
439 add( &n1pprev, &Nb, one, work, &n1pprev, one, Mptr( Bprev,
440 Anpprev-n1pprev, 0, LDBC, size ), &LDBC );
441 }
442 }
443 }
444/*
445* Solve current diagonal block
446*/
447 if( Ais1Col || ( mycol == Alcol ) )
448 {
449 if( AisRowRep || ( myrow == Alrow ) )
450 {
451 trsm( C2F_CHAR( LEFT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
452 C2F_CHAR( DIAG ), &kb, &Nb, talpha2, Mptr( A, Anp, 0,
453 Ald, size ), &Ald, Mptr( BC, Anp, 0, LDBC, size ),
454 &LDBC );
455 tadd( &kb, &Nb, one, Mptr( BC, Anp, 0, LDBC, size ), &LDBC,
456 zero, Mptr( BR, 0, Anq, LDBR, size ), &LDBR );
457 }
458 if( bcst )
459 {
460 if( myrow == Alrow )
461 bsend( ctxt, COLUMN, &btop, Nb, kb, Mptr( BR, 0, Anq, LDBR,
462 size ), LDBR );
463 else
464 brecv( ctxt, COLUMN, &btop, Nb, kb, Mptr( BR, 0, Anq, LDBR,
465 size ), LDBR, Alrow, mycol );
466 }
467 talpha2 = one;
468 }
469 else
470 {
471 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) )
472 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kb, &Nb, &izero,
473 zero, zero, Mptr( BC, Anp, 0, LDBC, size ), &LDBC );
474 }
475/*
476* Finish previous update
477*/
478 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
479 {
480 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
481 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &tmp1, &Nb,
482 &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1,
483 Bprev, &LDBC );
484 talpha1 = one;
485 }
486/*
487* Save info of current step and update info for the next step
488*/
489 if( Ais1Col || ( mycol == Alcol ) ) { Bdprev = Bd; Aprev = A; }
490 if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; }
491
492 n1pprev = n1p;
493 rocprev = Alcol;
494 kbprev = kb;
495 k += kb;
496 Na -= kb;
497
498 mb1 -= kb;
499 if( mb1 == 0 )
500 {
501 if( !( Ais1Row ) && ( Alrow >= 0 ) )
502 Alrow = MModSub1( Alrow, nprow );
503 mb1 = ( Na > Aimb1 ? Amb : Aimb1 );
504 }
505
506 nb1 -= kb;
507 ChangeRoc = ( nb1 == 0 );
508
509 if( ChangeRoc )
510 {
511 if( !( Ais1Col ) && ( Alcol >= 0 ) )
512 Alcol = MModSub1( Alcol, npcol );
513 nb1 = ( Na > Ainb1 ? Anb : Ainb1 );
514 }
515 tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
516 n1 = ( ( Ais1Col || ( Na-nb1 < nlast ) ) ? n1last : n1 );
517 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
518 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
519 nprow );
520 }
521 }
522 else
523 {
524/*
525* Initiate lookahead
526*/
527 n1 = ( MAX( npcol, 2 ) - 1 ) * Anb;
528 work = PB_Cmalloc( Nb*MIN( n1, Anp )*size );
529 Aprev = A; Bprev = BC, Bdprev = BR; Anpprev = Anp;
530 mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol;
531 tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
532 Asrc = Arow;
533 n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc,
534 nprow );
535 talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Acol ) ) ?
536 ALPHA : one );
537 while( kb > 0 )
538 {
539 kbsize = kb * size;
540/*
541* Partial update of previous block
542*/
543 if( n1pprev > 0 )
544 {
545 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
546 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &n1pprev, &Nb,
547 &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1,
548 Bprev, &LDBC );
549/*
550* Send partial updated result to current column
551*/
552 if( !( Ais1Col ) && ChangeRoc )
553 {
554 if( mycol == rocprev )
555 {
556 send( ctxt, n1pprev, Nb, Bprev, LDBC, myrow, Acol );
557 }
558 else if( mycol == Acol )
559 {
560 recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev );
561 add( &n1pprev, &Nb, one, work, &n1pprev, one, Bprev,
562 &LDBC );
563 }
564 }
565 }
566/*
567* Solve current diagonal block
568*/
569 if( Ais1Col || ( mycol == Acol ) )
570 {
571 if( AisRowRep || ( myrow == Arow ) )
572 {
573 trsm( C2F_CHAR( LEFT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
574 C2F_CHAR( DIAG ), &kb, &Nb, talpha2, A, &Ald, BC,
575 &LDBC );
576 tadd( &kb, &Nb, one, BC, &LDBC, zero, BR, &LDBR );
577 }
578 if( bcst )
579 {
580 if( myrow == Arow )
581 bsend( ctxt, COLUMN, &btop, Nb, kb, BR, LDBR );
582 else
583 brecv( ctxt, COLUMN, &btop, Nb, kb, BR, LDBR, Arow,
584 mycol );
585 }
586 talpha2 = one;
587 }
588 else
589 {
590 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) )
591 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kb, &Nb, &izero,
592 zero, zero, BC, &LDBC );
593 }
594/*
595* Finish previous update
596*/
597 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
598 {
599 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
600 {
601 tmp2 = n1pprev * size;
602 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &tmp1, &Nb,
603 &kbprev, negone, Aprev+tmp2, &Ald, Bdprev, &LDBR,
604 talpha1, Bprev+tmp2, &LDBC );
605 }
606 Aprev += Ald * kbprev * size; talpha1 = one;
607 }
608/*
609* Save info of current step and update info for the next step
610*/
611 if( Ais1Col || ( mycol == Acol ) )
612 { A += Ald*kbsize; Bdprev = Bd = BR; BR += LDBR*kbsize; }
613 if( AisRowRep || ( myrow == Arow ) )
614 {
615 Bprev = ( BC += kbsize );
616 A += kbsize;
617 Aprev += kbsize;
618 Anpprev = ( Anp -= kb );
619 }
620 n1pprev = n1p;
621 rocprev = Acol;
622 kbprev = kb;
623 k += kb;
624 Na -= kb;
625
626 mb1 -= kb;
627 if( mb1 == 0 )
628 {
629 if( !( Ais1Row ) && ( Arow >= 0 ) )
630 Arow = MModAdd1( Arow, nprow );
631 mb1 = MIN( Amb, Na );
632 }
633
634 nb1 -= kb;
635 ChangeRoc = ( nb1 == 0 );
636
637 if( ChangeRoc )
638 {
639 if( !( Ais1Col ) && ( Acol >= 0 ) )
640 Acol = MModAdd1( Acol, npcol );
641 nb1 = MIN( Anb, Na );
642 }
643 tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
644 n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k + kb, Aimb1, Amb, myrow,
645 Asrc, nprow );
646 }
647 }
648 }
649 else
650 {
651/*
652* Right - No tran or Left - (co)Trans
653*/
654 if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) &&
655 ( mycol != Acol ) ) ) ) return;
656 btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
657 bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) );
658 AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) );
659
660 if( Mupcase( UPLO[0] ) == CUPPER )
661 {
662/*
663* Initiate lookahead
664*/
665 n1 = ( MAX( nprow, 2 ) - 1 ) * Amb;
666 work = PB_Cmalloc( Nb*MIN( n1, Anq )*size );
667 Aprev = A; Bprev = BR, Bdprev = BC; Anpprev = Anq;
668 mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow;
669 tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
670 Asrc = Acol;
671 n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc,
672 npcol );
673 talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Arow ) ) ?
674 ALPHA : one );
675 while( kb > 0 )
676 {
677 kbsize = kb * size;
678/*
679* Partial update of previous block
680*/
681 if( n1pprev > 0 )
682 {
683 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
684 gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &n1pprev,
685 &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1,
686 Bprev, &LDBR );
687/*
688* Send partial updated result to current row
689*/
690 if( !( Ais1Row ) && ChangeRoc )
691 {
692 if( myrow == rocprev )
693 {
694 send( ctxt, Nb, n1pprev, Bprev, LDBR, Arow, mycol );
695 }
696 else if( myrow == Arow )
697 {
698 recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol );
699 add( &Nb, &n1pprev, one, work, &Nb, one, Bprev, &LDBR );
700 }
701 }
702 }
703/*
704* Solve current diagonal block
705*/
706 if( Ais1Row || ( myrow == Arow ) )
707 {
708 if( AisColRep || ( mycol == Acol ) )
709 {
710 trsm( C2F_CHAR( RIGHT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
711 C2F_CHAR( DIAG ), &Nb, &kb, talpha2, A, &Ald, BR,
712 &LDBR );
713 tadd( &Nb, &kb, one, BR, &LDBR, zero, BC, &LDBC );
714 }
715 if( bcst )
716 {
717 if( mycol == Acol )
718 bsend( ctxt, ROW, &btop, kb, Nb, BC, LDBC );
719 else
720 brecv( ctxt, ROW, &btop, kb, Nb, BC, LDBC, myrow, Acol );
721 }
722 talpha2 = one;
723 }
724 else
725 {
726 if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) )
727 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Nb, &kb, &izero,
728 zero, zero, BR, &LDBR );
729 }
730/*
731* Finish previous update
732*/
733 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
734 {
735 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
736 {
737 tmp2 = n1pprev * size;
738 gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &tmp1,
739 &kbprev, negone, Bdprev, &LDBC, Aprev+Ald*tmp2, &Ald,
740 talpha1, Bprev+LDBR*tmp2, &LDBR );
741 }
742 Aprev += kbprev * size; talpha1 = one;
743 }
744/*
745* Save info of current step and update info for the next step
746*/
747 if( Ais1Row || ( myrow == Arow ) )
748 { A += kbsize; Bdprev = Bd = BC; BC += kbsize; }
749 if( AisColRep || ( mycol == Acol ) )
750 {
751 Bprev = ( BR += LDBR * kbsize );
752 A += Ald * kbsize;
753 Anpprev = ( Anq -= kb );
754 Aprev += Ald * kbsize;
755 }
756 n1pprev = n1p;
757 rocprev = Arow;
758 kbprev = kb;
759 k += kb;
760 Na -= kb;
761
762 nb1 -= kb;
763 if( nb1 == 0 )
764 {
765 if( !( Ais1Col ) && ( Acol >= 0 ) )
766 Acol = MModAdd1( Acol, npcol );
767 nb1 = MIN( Anb, Na );
768 }
769
770 mb1 -= kb;
771 ChangeRoc = ( mb1 == 0 );
772
773 if( ChangeRoc )
774 {
775 if( !( Ais1Row ) && ( Arow >= 0 ) )
776 Arow = MModAdd1( Arow, nprow );
777 mb1 = MIN( Amb, Na );
778 }
779 tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
780 n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k + kb, Ainb1, Anb, mycol,
781 Asrc, npcol );
782 }
783 }
784 else
785 {
786/*
787* Initiate lookahead
788*/
789 nlast = ( nprow - 1 ) * Amb;
790 n1 = MAX( nlast, Amb );
791 nlast += Aimb1;
792 n1last = n1 - Amb + MAX( Aimb1, Amb );
793 work = PB_Cmalloc( Nb * MIN( n1last, Anq ) * size );
794 tmp1 = Na-1;
795 Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
796 Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
797 rocprev = Alrow; Anpprev = Anq; Bprev = BR; Bdprev = BC;
798 Aprev = A = Mptr( A, Anp, 0, Ald, size );
799 mb1 = PB_Clastnb( Na, 0, Aimb1, Amb );
800 nb1 = PB_Clastnb( Na, 0, Ainb1, Anb );
801 tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
802 n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 );
803 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
804 Asrc = Acol;
805 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
806 npcol );
807 talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Alrow ) ) ?
808 ALPHA : one );
809 while( Na > 0 )
810 {
811 kbsize = kb * size;
812
813 if( Ais1Row || ( myrow == Alrow ) )
814 { A -= kbsize; Anp -= kb; Bd = Mptr( BC, Anp, 0, LDBC, size ); }
815 if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; }
816/*
817* Partial update of previous block
818*/
819 if( n1pprev > 0 )
820 {
821 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
822 {
823 tmp1 = ( Anpprev - n1pprev ) * size;
824 TYPE->Fgemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ),
825 &Nb, &n1pprev, &kbprev, negone, Bdprev,
826 &LDBC, Aprev+Ald*tmp1, &Ald, talpha1,
827 Bprev+LDBR*tmp1, &LDBR );
828 }
829/*
830* Send partial updated result to current row
831*/
832 if( !( Ais1Row ) && ChangeRoc )
833 {
834 if( myrow == rocprev )
835 {
836 send( ctxt, Nb, n1pprev, Mptr( Bprev, 0, Anpprev-n1pprev,
837 LDBR, size ), LDBR, Alrow, mycol );
838 }
839 else if( myrow == Alrow )
840 {
841 recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol );
842 add( &Nb, &n1pprev, one, work, &Nb, one, Mptr( Bprev, 0,
843 Anpprev-n1pprev, LDBR, size ), &LDBR );
844 }
845 }
846 }
847/*
848* Solve current diagonal block
849*/
850 if( Ais1Row || ( myrow == Alrow ) )
851 {
852 if( AisColRep || ( mycol == Alcol ) )
853 {
854 trsm( C2F_CHAR( RIGHT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
855 C2F_CHAR( DIAG ), &Nb, &kb, talpha2, Mptr( A, 0, Anq,
856 Ald, size ), &Ald, Mptr( BR, 0, Anq, LDBR, size ),
857 &LDBR );
858 tadd( &Nb, &kb, one, Mptr( BR, 0, Anq, LDBR, size ), &LDBR,
859 zero, Mptr( BC, Anp, 0, LDBC, size ), &LDBC );
860 }
861 if( bcst )
862 {
863 if( mycol == Alcol )
864 bsend( ctxt, ROW, &btop, kb, Nb, Mptr( BC, Anp, 0, LDBC,
865 size ), LDBC );
866 else
867 brecv( ctxt, ROW, &btop, kb, Nb, Mptr( BC, Anp, 0, LDBC,
868 size ), LDBC, myrow, Alcol );
869 }
870 talpha2 = one;
871 }
872 else
873 {
874 if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) )
875 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Nb, &kb, &izero,
876 zero, zero, Mptr( BR, 0, Anq, LDBR, size ), &LDBR );
877 }
878/*
879* Finish previous update
880*/
881 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
882 {
883 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
884 gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &tmp1,
885 &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1,
886 Bprev, &LDBR );
887 talpha1 = one;
888 }
889/*
890* Save info of current step and update info for the next step
891*/
892 if( Ais1Row || ( myrow == Alrow ) ) { Bdprev = Bd; Aprev = A; }
893 if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; }
894
895 n1pprev = n1p;
896 rocprev = Alrow;
897 kbprev = kb;
898 k += kb;
899 Na -= kb;
900
901 nb1 -= kb;
902 if( nb1 == 0 )
903 {
904 if( !( Ais1Col ) && ( Alcol >= 0 ) )
905 Alcol = MModSub1( Alcol, npcol );
906 nb1 = ( Na > Ainb1 ? Anb : Ainb1 );
907 }
908
909 mb1 -= kb;
910 ChangeRoc = ( mb1 == 0 );
911
912 if( ChangeRoc )
913 {
914 if( !( Ais1Row ) && ( Alrow >= 0 ) )
915 Alrow = MModSub1( Alrow, nprow );
916 mb1 = ( Na > Aimb1 ? Amb : Aimb1 );
917 }
918 tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
919 n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 );
920 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
921 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
922 npcol );
923 }
924 }
925 }
926 if( work ) free( work );
927/*
928* End of PB_Cptrsm
929*/
930}
#define Int
Definition Bconfig.h:22
void(* GEBR2D_T)()
Definition pblas.h:285
F_VOID_FCT(* GEMM_T)()
Definition pblas.h:317
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define C2F_CHAR(a)
Definition pblas.h:125
void(* GESD2D_T)()
Definition pblas.h:282
void(* GERV2D_T)()
Definition pblas.h:283
void(* GEBS2D_T)()
Definition pblas.h:284
F_VOID_FCT(* TZPAD_T)()
Definition pblas.h:292
F_VOID_FCT(* TRSM_T)()
Definition pblas.h:325
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define NOTRAN
Definition PBblas.h:44
#define ALL
Definition PBblas.h:50
#define CLEFT
Definition PBblas.h:29
#define TRAN
Definition PBblas.h:46
#define LEFT
Definition PBblas.h:55
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define RIGHT
Definition PBblas.h:56
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
Int PB_Clastnb()
char * PB_Ctop()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define INB_
Definition PBtools.h:42
#define MModSub1(I, d)
Definition PBtools.h:105
void PB_Cptrsm()
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define NB_
Definition PBtools.h:44
Int PB_Cspan()
#define TYPE
Definition clamov.c:7