SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cptrsv.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cptrsv( PBTYP_T * TYPE, Int FBCAST, char * UPLO, char * TRANS,
21 char * DIAG, Int N, char * A, Int IA, Int JA,
22 Int * DESCA, char * XC, Int INCXC, char * XR,
23 Int INCXR )
24#else
25void PB_Cptrsv( TYPE, FBCAST, UPLO, TRANS, DIAG, N, A, IA, JA, DESCA,
26 XC, INCXC, XR, INCXR )
27/*
28* .. Scalar Arguments ..
29*/
30 char * DIAG, * TRANS, * UPLO;
31 Int FBCAST, IA, INCXC, INCXR, JA, N;
32 PBTYP_T * TYPE;
33/*
34* .. Array Arguments ..
35*/
36 Int * DESCA;
37 char * A, * XC, * XR;
38#endif
39{
40/*
41* Purpose
42* =======
43*
44* PB_Cptrsv solves one of the systems of equations
45*
46* sub( A )*X = b, or sub( A )'*X = b, or conjg( sub( A )' )*X = b,
47*
48* where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
49*
50* b and X are n element subvectors and sub( A ) is an n by n unit, or
51* non-unit, upper or lower triangular submatrix.
52*
53* No test for singularity or near-singularity is included in this
54* routine. Such tests must be performed before calling this routine.
55*
56* Notes
57* =====
58*
59* A description vector is associated with each 2D block-cyclicly dis-
60* tributed matrix. This vector stores the information required to
61* establish the mapping between a matrix entry and its corresponding
62* process and memory location.
63*
64* In the following comments, the character _ should be read as
65* "of the distributed matrix". Let A be a generic term for any 2D
66* block cyclicly distributed matrix. Its description vector is DESC_A:
67*
68* NOTATION STORED IN EXPLANATION
69* ---------------- --------------- ------------------------------------
70* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
71* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
72* the NPROW x NPCOL BLACS process grid
73* A is distributed over. The context
74* itself is global, but the handle
75* (the integer value) may vary.
76* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
77* ted matrix A, M_A >= 0.
78* N_A (global) DESCA[ N_ ] The number of columns in the distri-
79* buted matrix A, N_A >= 0.
80* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
81* block of the matrix A, IMB_A > 0.
82* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
83* left block of the matrix A,
84* INB_A > 0.
85* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
86* bute the last M_A-IMB_A rows of A,
87* MB_A > 0.
88* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
89* bute the last N_A-INB_A columns of
90* A, NB_A > 0.
91* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
92* row of the matrix A is distributed,
93* NPROW > RSRC_A >= 0.
94* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
95* first column of A is distributed.
96* NPCOL > CSRC_A >= 0.
97* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
98* array storing the local blocks of
99* the distributed matrix A,
100* IF( Lc( 1, N_A ) > 0 )
101* LLD_A >= MAX( 1, Lr( 1, M_A ) )
102* ELSE
103* LLD_A >= 1.
104*
105* Let K be the number of rows of a matrix A starting at the global in-
106* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
107* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
108* receive if these K rows were distributed over NPROW processes. If K
109* is the number of columns of a matrix A starting at the global index
110* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
111* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
112* these K columns were distributed over NPCOL processes.
113*
114* The values of Lr() and Lc() may be determined via a call to the func-
115* tion PB_Cnumroc:
116* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
117* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
118*
119* Arguments
120* =========
121*
122* TYPE (local input) pointer to a PBTYP_T structure
123* On entry, TYPE is a pointer to a structure of type PBTYP_T,
124* that contains type information (See pblas.h).
125*
126* FBCAST (global input) INTEGER
127* On entry, FBCAST specifies whether the transposed of the vec-
128* tor solution should be broadcast or not when there is a pos-
129* sible ambiguity, i.e. when sub( A ) is just one block. When
130* FBCAST is zero, the solution vector is not broadcast, and the
131* the solution vector is broadcast otherwise.
132*
133* UPLO (global input) pointer to CHAR
134* On entry, UPLO specifies whether the submatrix sub( A ) is
135* an upper or lower triangular submatrix as follows:
136*
137* UPLO = 'U' or 'u' sub( A ) is an upper triangular
138* submatrix,
139*
140* UPLO = 'L' or 'l' sub( A ) is a lower triangular
141* submatrix.
142*
143* TRANS (global input) pointer to CHAR
144* On entry, TRANS specifies the operation to be performed as
145* follows:
146*
147* TRANS = 'N' or 'n' sub( A ) * X = b,
148*
149* TRANS = 'T' or 't' sub( A )' * X = b,
150*
151* TRANS = 'C' or 'c' conjg( sub( A )' ) * X = b.
152*
153* DIAG (global input) pointer to CHAR
154* On entry, DIAG specifies whether or not sub( A ) is unit
155* triangular as follows:
156*
157* DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
158* gular,
159*
160* DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
161* angular.
162*
163* N (global input) INTEGER
164* On entry, N specifies the order of the submatrix sub( A ).
165* N must be at least zero.
166*
167* A (local input) pointer to CHAR
168* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
169* at least Lc( 0, JA+N-1 ). Before entry, this array contains
170* the local entries of the matrix A.
171* Before entry with UPLO = 'U' or 'u', this array contains
172* the local entries corresponding to the upper triangular part
173* of the triangular submatrix sub( A ), and the local entries
174* corresponding to the strictly lower triangular of sub( A )
175* are not referenced.
176* Before entry with UPLO = 'L' or 'l', this array contains
177* the local entries corresponding to the lower triangular part
178* of the triangular submatrix sub( A ), and the local entries
179* corresponding to the strictly upper triangular of sub( A )
180* are not referenced.
181* Note that when DIAG = 'U' or 'u', the local entries corres-
182* ponding to the diagonal elements of the submatrix sub( A )
183* are not referenced either, but are assumed to be unity.
184*
185* IA (global input) INTEGER
186* On entry, IA specifies A's global row index, which points to
187* the beginning of the submatrix sub( A ).
188*
189* JA (global input) INTEGER
190* On entry, JA specifies A's global column index, which points
191* to the beginning of the submatrix sub( A ).
192*
193* DESCA (global and local input) INTEGER array
194* On entry, DESCA is an integer array of dimension DLEN_. This
195* is the array descriptor for the matrix A.
196*
197* XC (local input/local output) pointer to CHAR
198* On entry, XC is an array of dimension (LLD_X,Kx), where Kx is
199* at least 1 and LLD_X is at least Lr( IA, N ). Before entry,
200* when TRANS is 'N' or 'n' this array contains the local en-
201* tries of the right-hand-side vector b. When TRANS is not 'N'
202* or 'n', the entries of XC should be zero. On exit, this array
203* contains the partial solution vector x.
204*
205* INCXC (local input) INTEGER
206* On entry, INCXC specifies the local increment of the vector
207* XC.
208*
209* XR (local input/local output) pointer to CHAR
210* On entry, XR is an array of dimension (LLD_X,Kx), where Kx is
211* least Lc( JA, N ) and LLD_X at least 1. Before entry, when
212* TRANS is 'N' or 'n' the entries of XR should be zero. Other-
213* wise this array contains the local entries of the right-hand-
214* side vector b. On exit, this array contains the partial so-
215* lution vector x.
216*
217* INCXR (local input) INTEGER
218* On entry, INCXR specifies the local increment of the vector
219* XR.
220*
221* -- Written on April 1, 1998 by
222* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
223*
224* ---------------------------------------------------------------------
225*/
226/*
227* .. Local Scalars ..
228*/
229 char btop, * negone, * one, * zero;
230 Int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep,
231 AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp,
232 Anq, Arow, Asrc, ChangeRoc=0, bcst, ctxt, ione=1, k=0, kb,
233 kbprev=0, kbsize, mb1, mycol, myrow, n1, n1last, n1p,
234 n1pprev=0, nb1, nlast, npcol, nprow, rocprev, size, tmp1,
235 tmp2;
236 AXPY_T axpy;
237 COPY_T copy;
238 VVSET_T set;
239 GEMV_T gemv;
240 TRSV_T trsv;
241 GESD2D_T send;
242 GERV2D_T recv;
243 GEBS2D_T bsend;
244 GEBR2D_T brecv;
245/*
246* .. Local Arrays ..
247*/
248 char * Aprev = NULL, * Xd = NULL, * Xdprev = NULL,
249 * Xprev = NULL, * work = NULL;
250/* ..
251* .. Executable Statements ..
252*
253*/
254 if( N <= 0 ) return;
255/*
256* Retrieve process grid information
257*/
258 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
259/*
260* Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
261*/
262 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
263 &Acol );
264/*
265* Determine if sub( A ) spans more than one process row, and/or more than one
266* process column.
267*/
268 Amb = DESCA[MB_]; Anb = DESCA[NB_]; Ald = DESCA[LLD_ ];
269 Aimb1 = PB_Cfirstnb( N, IA, DESCA[IMB_], Amb );
270 Anp = PB_Cnumroc( N, 0, Aimb1, Amb, myrow, Arow, nprow );
271 Ais1Row = !( PB_Cspan( N, 0, Aimb1, Amb, Arow, nprow ) );
272 Ainb1 = PB_Cfirstnb( N, JA, DESCA[INB_], Anb );
273 Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
274 Ais1Col = !( PB_Cspan( N, 0, Ainb1, Anb, Acol, npcol ) );
275/*
276* When sub( A ) spans only one process, solve the system locally and return.
277*/
278 if( Ais1Row && Ais1Col )
279 {
280 if( Mupcase( TRANS[0] ) == CNOTRAN )
281 {
282 if( Anq > 0 )
283 {
284 if( Anp > 0 )
285 {
286 TYPE->Ftrsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ),
287 C2F_CHAR( DIAG ), &N, Mptr( A, Aii, Ajj, Ald,
288 TYPE->size ), &Ald, XC, &INCXC );
289 TYPE->Fcopy( &Anp, XC, &INCXC, XR, &INCXR );
290 }
291 if( ( Arow >= 0 ) && FBCAST )
292 {
293 btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
294 if( myrow == Arow )
295 TYPE->Cgebs2d( ctxt, COLUMN, &btop, 1, Anq, XR, INCXR );
296 else
297 TYPE->Cgebr2d( ctxt, COLUMN, &btop, 1, Anq, XR, INCXR, Arow,
298 mycol );
299 }
300 }
301 }
302 else
303 {
304 if( Anp > 0 )
305 {
306 if( Anq > 0 )
307 {
308 TYPE->Ftrsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ),
309 C2F_CHAR( DIAG ), &N, Mptr( A, Aii, Ajj, Ald,
310 TYPE->size ), &Ald, XR, &INCXR );
311 TYPE->Fcopy( &Anq, XR, &INCXR, XC, &INCXC );
312 }
313 if( Acol >= 0 && FBCAST )
314 {
315 btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
316 if( mycol == Acol )
317 TYPE->Cgebs2d( ctxt, ROW, &btop, Anp, 1, XC, Anp );
318 else
319 TYPE->Cgebr2d( ctxt, ROW, &btop, Anp, 1, XC, Anp, myrow,
320 Acol );
321 }
322 }
323 }
324 return;
325 }
326/*
327* Retrieve from TYPE structure useful BLAS and BLACS functions.
328*/
329 size = TYPE->size;
330 negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero;
331 axpy = TYPE->Faxpy; copy = TYPE->Fcopy; set = TYPE->Fset;
332 gemv = TYPE->Fgemv; trsv = TYPE->Ftrsv;
333 send = TYPE->Cgesd2d; recv = TYPE->Cgerv2d;
334 bsend = TYPE->Cgebs2d; brecv = TYPE->Cgebr2d;
335
336 if( ( Anp > 0 ) && ( Anq > 0 ) ) A = Mptr( A, Aii, Ajj, Ald, size );
337
338 if( Mupcase( TRANS[0] ) == CNOTRAN )
339 {
340 if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) &&
341 ( myrow != Arow ) ) ) ) return;
342 btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
343 bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) );
344 AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) );
345
346 if( Mupcase( UPLO[0] ) == CUPPER )
347 {
348/*
349* Initiate lookahead
350*/
351 nlast = ( npcol - 1 ) * Anb;
352 n1 = MAX( nlast, Anb );
353 nlast += Ainb1;
354 n1last = n1 - Anb + MAX( Ainb1, Anb );
355 work = PB_Cmalloc( MIN( n1last, Anp ) * size );
356 tmp1 = N-1;
357 Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
358 Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
359 rocprev = Alcol; Anpprev = Anp; Xprev = XC; Xdprev = XR;
360 Aprev = A = Mptr( A, 0, Anq, Ald, size );
361 mb1 = PB_Clastnb( N, 0, Aimb1, Amb );
362 nb1 = PB_Clastnb( N, 0, Ainb1, Anb );
363 tmp1 = N - ( kb = MIN( mb1, nb1 ) );
364 n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 );
365 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
366 Asrc = Arow;
367 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
368 nprow );
369 while( N > 0 )
370 {
371 kbsize = kb * size;
372
373 if( Ais1Col || ( mycol == Alcol ) )
374 {
375 A -= Ald * kbsize;
376 Anq -= kb;
377 Xd = Mptr( XR, 0, Anq, INCXR, size );
378 }
379 if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; }
380/*
381* Partial update of previous block
382*/
383 if( n1pprev > 0 )
384 {
385 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
386 {
387 tmp1 = ( Anpprev - n1pprev ) * size;
388 gemv( C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone,
389 Aprev+tmp1, &Ald, Xdprev, &INCXR, one, Xprev+tmp1,
390 &INCXC );
391 }
392/*
393* Send partial updated result to current column
394*/
395 if( !( Ais1Col ) && ChangeRoc )
396 {
397 if( mycol == rocprev )
398 {
399 send( ctxt, n1pprev, 1, Xprev+(Anpprev-n1pprev)*size,
400 n1pprev, myrow, Alcol );
401 }
402 else if( mycol == Alcol )
403 {
404 recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev );
405 axpy( &n1pprev, one, work, &ione, Mptr( Xprev,
406 Anpprev-n1pprev, 0, INCXC, size ), &INCXC );
407 }
408 }
409 }
410/*
411* Solve current diagonal block
412*/
413 if( Ais1Col || ( mycol == Alcol ) )
414 {
415 if( AisRowRep || ( myrow == Alrow ) )
416 {
417 trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
418 &kb, Mptr( A, Anp, 0, Ald, size ), &Ald, Mptr( XC, Anp,
419 0, INCXC, size ), &INCXC );
420 copy( &kb, Mptr( XC, Anp, 0, INCXC, size ), &INCXC, Mptr( XR,
421 0, Anq, INCXR, size ), &INCXR );
422 }
423 if( bcst )
424 {
425 if( myrow == Alrow )
426 bsend( ctxt, COLUMN, &btop, 1, kb, Mptr( XR, 0, Anq, INCXR,
427 size ), INCXR );
428 else
429 brecv( ctxt, COLUMN, &btop, 1, kb, Mptr( XR, 0, Anq, INCXR,
430 size ), INCXR, Alrow, mycol );
431 }
432 }
433 else
434 {
435 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) )
436 set( &kb, zero, Mptr( XC, Anp, 0, INCXC, size ), &ione );
437 }
438/*
439* Finish previous update
440*/
441 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) &&
442 ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
443 gemv( C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev, &Ald,
444 Xdprev, &INCXR, one, Xprev, &INCXC );
445/*
446* Save info of current step and update info for the next step
447*/
448 if( Ais1Col || ( mycol == Alcol ) ) { Xdprev = Xd; Aprev = A; }
449 if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; }
450
451 n1pprev = n1p;
452 rocprev = Alcol;
453 kbprev = kb;
454 k += kb;
455 N -= kb;
456
457 mb1 -= kb;
458 if( mb1 == 0 )
459 {
460 if( !( Ais1Row ) && ( Alrow >= 0 ) )
461 Alrow = MModSub1( Alrow, nprow );
462 mb1 = ( N > Aimb1 ? Amb : Aimb1 );
463 }
464
465 nb1 -= kb;
466 ChangeRoc = ( nb1 == 0 );
467
468 if( ChangeRoc )
469 {
470 if( !( Ais1Col ) && ( Alcol >= 0 ) )
471 Alcol = MModSub1( Alcol, npcol );
472 nb1 = ( N > Ainb1 ? Anb : Ainb1 );
473 }
474 tmp1 = N - ( kb = MIN( mb1, nb1 ) );
475 n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 );
476 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
477 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
478 nprow );
479 }
480 }
481 else
482 {
483/*
484* Initiate lookahead
485*/
486 n1 = ( MAX( npcol, 2 ) - 1 ) * Anb;
487 work = PB_Cmalloc( MIN( n1, Anp ) * size );
488 Aprev = A; Xprev = XC; Xdprev = XR; Anpprev = Anp;
489 mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol;
490 tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
491 Asrc = Arow;
492 n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc,
493 nprow );
494 while( kb > 0 )
495 {
496 kbsize = kb * size;
497/*
498* Partial update of previous block
499*/
500 if( n1pprev > 0 )
501 {
502 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
503 gemv( C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone, Aprev,
504 &Ald, Xdprev, &INCXR, one, Xprev, &INCXC );
505/*
506* Send partial updated result to current column
507*/
508 if( !( Ais1Col ) && ChangeRoc )
509 {
510 if( mycol == rocprev )
511 {
512 send( ctxt, n1pprev, 1, Xprev, n1pprev, myrow, Acol );
513 }
514 else if( mycol == Acol )
515 {
516 recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev );
517 axpy( &n1pprev, one, work, &ione, Xprev, &INCXC );
518 }
519 }
520 }
521/*
522* Solve current diagonal block
523*/
524 if( Ais1Col || ( mycol == Acol ) )
525 {
526 if( AisRowRep || ( myrow == Arow ) )
527 {
528 trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
529 &kb, A, &Ald, XC, &INCXC );
530 copy( &kb, XC, &INCXC, XR, &INCXR );
531 }
532 if( bcst )
533 {
534 if( myrow == Arow )
535 bsend( ctxt, COLUMN, &btop, 1, kb, XR, INCXR );
536 else
537 brecv( ctxt, COLUMN, &btop, 1, kb, XR, INCXR, Arow,
538 mycol );
539 }
540 }
541 else
542 {
543 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) )
544 set( &kb, zero, XC, &INCXC );
545 }
546/*
547* Finish previous update
548*/
549 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
550 {
551 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
552 {
553 tmp2 = n1pprev * size;
554 gemv( C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev+tmp2,
555 &Ald, Xdprev, &INCXR, one, Xprev+tmp2, &INCXC );
556 }
557 Aprev += Ald * kbprev * size;
558 }
559/*
560* Save info of current step and update info for the next step
561*/
562 if( Ais1Col || ( mycol == Acol ) )
563 { A += Ald*kbsize; Xdprev = Xd = XR; XR += INCXR*kbsize; }
564 if( AisRowRep || ( myrow == Arow ) )
565 {
566 Xprev = ( XC += kbsize );
567 A += kbsize;
568 Aprev += kbsize;
569 Anpprev = ( Anp -= kb );
570 }
571 n1pprev = n1p;
572 rocprev = Acol;
573 kbprev = kb;
574 k += kb;
575 N -= kb;
576
577 mb1 -= kb;
578 if( mb1 == 0 )
579 {
580 if( !( Ais1Row ) && ( Arow >= 0 ) )
581 Arow = MModAdd1( Arow, nprow );
582 mb1 = MIN( Amb, N );
583 }
584
585 nb1 -= kb;
586 ChangeRoc = ( nb1 == 0 );
587
588 if( ChangeRoc )
589 {
590 if( !( Ais1Col ) && ( Acol >= 0 ) )
591 Acol = MModAdd1( Acol, npcol );
592 nb1 = MIN( Anb, N );
593 }
594 tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
595 n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k+kb, Aimb1, Amb, myrow, Asrc,
596 nprow );
597 }
598 }
599 }
600 else
601 {
602 if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) &&
603 ( mycol != Acol ) ) ) ) return;
604 btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
605 bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) );
606 AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) );
607
608 if( Mupcase( UPLO[0] ) == CUPPER )
609 {
610/*
611* Initiate lookahead
612*/
613 n1 = ( MAX( nprow, 2 ) - 1 ) * Amb;
614 work = PB_Cmalloc( MIN( n1, Anq ) * size );
615 Aprev = A; Xprev = XR; Xdprev = XC; Anpprev = Anq;
616 mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow;
617 tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
618 Asrc = Acol;
619 n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc,
620 npcol );
621 while( kb > 0 )
622 {
623 kbsize = kb * size;
624/*
625* Partial update of previous block
626*/
627 if( n1pprev > 0 )
628 {
629 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
630 gemv( C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone, Aprev,
631 &Ald, Xdprev, &INCXC, one, Xprev, &INCXR );
632/*
633* Send partial updated result to current row
634*/
635 if( !( Ais1Row ) && ChangeRoc )
636 {
637 if( myrow == rocprev )
638 {
639 send( ctxt, 1, n1pprev, Xprev, INCXR, Arow, mycol );
640 }
641 else if( myrow == Arow )
642 {
643 recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol );
644 axpy( &n1pprev, one, work, &ione, Xprev, &INCXR );
645 }
646 }
647 }
648/*
649* Solve current diagonal block
650*/
651 if( Ais1Row || ( myrow == Arow ) )
652 {
653 if( AisColRep || ( mycol == Acol ) )
654 {
655 trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
656 &kb, A, &Ald, XR, &INCXR );
657 copy( &kb, XR, &INCXR, XC, &INCXC );
658 }
659 if( bcst )
660 {
661 if( mycol == Acol )
662 bsend( ctxt, ROW, &btop, kb, 1, XC, kb );
663 else
664 brecv( ctxt, ROW, &btop, kb, 1, XC, kb, myrow, Acol );
665 }
666 }
667 else
668 {
669 if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) )
670 set( &kb, zero, XR, &INCXR );
671 }
672/*
673* Finish previous update
674*/
675 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
676 {
677 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
678 {
679 tmp2 = n1pprev * size;
680 gemv( C2F_CHAR( TRANS ), &kbprev, &tmp1, negone,
681 Aprev+Ald*tmp2, &Ald, Xdprev, &INCXC, one,
682 Xprev+INCXR*tmp2, &INCXR );
683 }
684 Aprev += kbprev * size;
685 }
686/*
687* Save info of current step and update info for the next step
688*/
689 if( Ais1Row || ( myrow == Arow ) )
690 { A += kbsize; Xdprev = Xd = XC; XC += kbsize; }
691 if( AisColRep || ( mycol == Acol ) )
692 {
693 Xprev = ( XR += INCXR * kbsize );
694 A += Ald * kbsize;
695 Anpprev = ( Anq -= kb );
696 Aprev += Ald * kbsize;
697 }
698 n1pprev = n1p;
699 rocprev = Arow;
700 kbprev = kb;
701 k += kb;
702 N -= kb;
703
704 nb1 -= kb;
705 if( nb1 == 0 )
706 {
707 if( !( Ais1Col ) && ( Acol >= 0 ) )
708 Acol = MModAdd1( Acol, npcol );
709 nb1 = MIN( Anb, N );
710 }
711
712 mb1 -= kb;
713 ChangeRoc = ( mb1 == 0 );
714
715 if( ChangeRoc )
716 {
717 if( !( Ais1Row ) && ( Arow >= 0 ) )
718 Arow = MModAdd1( Arow, nprow );
719 mb1 = MIN( Amb, N );
720 }
721 tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
722 n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k+kb, Ainb1, Anb, mycol, Asrc,
723 npcol );
724 }
725 }
726 else
727 {
728/*
729* Initiate lookahead
730*/
731 nlast = ( nprow - 1 ) * Amb;
732 n1 = MAX( nlast, Amb );
733 nlast += Aimb1;
734 n1last = n1 - Amb + MAX( Aimb1, Amb );
735 work = PB_Cmalloc( MIN( n1last, Anq ) * size );
736 tmp1 = N-1;
737 Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
738 Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
739 rocprev = Alrow; Anpprev = Anq; Xprev = XR; Xdprev = XC;
740 Aprev = A = Mptr( A, Anp, 0, Ald, size );
741 mb1 = PB_Clastnb( N, 0, Aimb1, Amb );
742 nb1 = PB_Clastnb( N, 0, Ainb1, Anb );
743 tmp1 = N - ( kb = MIN( mb1, nb1 ) );
744 n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 );
745 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
746 Asrc = Acol;
747 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
748 npcol );
749 while( N > 0 )
750 {
751 kbsize = kb * size;
752
753 if( Ais1Row || ( myrow == Alrow ) )
754 {
755 A -= kbsize;
756 Anp -= kb;
757 Xd = Mptr( XC, Anp, 0, INCXC, size );
758 }
759 if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; }
760/*
761* Partial update of previous block
762*/
763 if( n1pprev > 0 )
764 {
765 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
766 {
767 tmp1 = ( Anpprev - n1pprev ) * size;
768 gemv( C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone,
769 Aprev+Ald*tmp1, &Ald, Xdprev, &INCXC, one,
770 Xprev+INCXR*tmp1, &INCXR );
771 }
772/*
773* Send partial updated result to current row
774*/
775 if( !( Ais1Row ) && ChangeRoc )
776 {
777 if( myrow == rocprev )
778 {
779 send( ctxt, 1, n1pprev, Mptr( Xprev, 0, Anpprev-n1pprev,
780 INCXR, size ), INCXR, Alrow, mycol );
781 }
782 else if( myrow == Alrow )
783 {
784 recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol );
785 axpy( &n1pprev, one, work, &ione, Mptr( Xprev, 0,
786 Anpprev-n1pprev, INCXR, size ), &INCXR );
787 }
788 }
789 }
790/*
791* Solve current diagonal block
792*/
793 if( Ais1Row || ( myrow == Alrow ) )
794 {
795 if( AisColRep || ( mycol == Alcol ) )
796 {
797 trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
798 &kb, Mptr( A, 0, Anq, Ald, size ), &Ald, Mptr( XR, 0,
799 Anq, INCXR, size ), &INCXR );
800 copy( &kb, Mptr( XR, 0, Anq, INCXR, size ), &INCXR, Mptr( XC,
801 0, Anp, INCXC, size ), &INCXC );
802 }
803 if( bcst )
804 {
805 if( mycol == Alcol )
806 bsend( ctxt, ROW, &btop, kb, 1, Mptr( XC, 0, Anp, INCXC,
807 size ), kb );
808 else
809 brecv( ctxt, ROW, &btop, kb, 1, Mptr( XC, 0, Anp, INCXC,
810 size ), kb, myrow, Alcol );
811 }
812 }
813 else
814 {
815 if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) )
816 set( &kb, zero, Mptr( XR, 0, Anq, INCXR, size ), &INCXR );
817 }
818/*
819* Finish previous update
820*/
821 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) &&
822 ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
823 gemv( C2F_CHAR( TRANS ), &kbprev, &tmp1, negone, Aprev, &Ald,
824 Xdprev, &INCXC, one, Xprev, &INCXR );
825/*
826* Save info of current step and update info for the next step
827*/
828 if( Ais1Row || ( myrow == Alrow ) ) { Xdprev = Xd; Aprev = A; }
829 if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; }
830
831 n1pprev = n1p;
832 rocprev = Alrow;
833 kbprev = kb;
834 k += kb;
835 N -= kb;
836
837 nb1 -= kb;
838 if( nb1 == 0 )
839 {
840 if( !( Ais1Col ) && ( Alcol >= 0 ) )
841 Alcol = MModSub1( Alcol, npcol );
842 nb1 = ( N > Ainb1 ? Anb : Ainb1 );
843 }
844
845 mb1 -= kb;
846 ChangeRoc = ( mb1 == 0 );
847
848 if( ChangeRoc )
849 {
850 if( !( Ais1Row ) && ( Alrow >= 0 ) )
851 Alrow = MModSub1( Alrow, nprow );
852 mb1 = ( N > Aimb1 ? Amb : Aimb1 );
853 }
854 tmp1 = N - ( kb = MIN( mb1, nb1 ) );
855 n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 );
856 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
857 n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
858 npcol );
859 }
860 }
861 }
862 if( work ) free( work );
863/*
864* End of PB_Cptrsv
865*/
866}
#define Int
Definition Bconfig.h:22
void(* GEBR2D_T)()
Definition pblas.h:285
F_VOID_FCT(* COPY_T)()
Definition pblas.h:298
#define C2F_CHAR(a)
Definition pblas.h:125
void(* GESD2D_T)()
Definition pblas.h:282
void(* GERV2D_T)()
Definition pblas.h:283
void(* GEBS2D_T)()
Definition pblas.h:284
F_VOID_FCT(* TRSV_T)()
Definition pblas.h:309
F_VOID_FCT(* GEMV_T)()
Definition pblas.h:301
F_VOID_FCT(* AXPY_T)()
Definition pblas.h:297
F_VOID_FCT(* VVSET_T)()
Definition pblas.h:291
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
Int PB_Clastnb()
char * PB_Ctop()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define INB_
Definition PBtools.h:42
#define MModSub1(I, d)
Definition PBtools.h:105
void PB_Cptrsv()
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define NB_
Definition PBtools.h:44
Int PB_Cspan()
#define TYPE
Definition clamov.c:7