ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cptrsv.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18
19 #ifdef __STDC__
20 void PB_Cptrsv( PBTYP_T * TYPE, int FBCAST, char * UPLO, char * TRANS,
21  char * DIAG, int N, char * A, int IA, int JA,
22  int * DESCA, char * XC, int INCXC, char * XR,
23  int INCXR )
24 #else
25 void PB_Cptrsv( TYPE, FBCAST, UPLO, TRANS, DIAG, N, A, IA, JA, DESCA,
26  XC, INCXC, XR, INCXR )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * DIAG, * TRANS, * UPLO;
31  int FBCAST, IA, INCXC, INCXR, JA, N;
32  PBTYP_T * TYPE;
33 /*
34 * .. Array Arguments ..
35 */
36  int * DESCA;
37  char * A, * XC, * XR;
38 #endif
39 {
40 /*
41 * Purpose
42 * =======
43 *
44 * PB_Cptrsv solves one of the systems of equations
45 *
46 * sub( A )*X = b, or sub( A )'*X = b, or conjg( sub( A )' )*X = b,
47 *
48 * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
49 *
50 * b and X are n element subvectors and sub( A ) is an n by n unit, or
51 * non-unit, upper or lower triangular submatrix.
52 *
53 * No test for singularity or near-singularity is included in this
54 * routine. Such tests must be performed before calling this routine.
55 *
56 * Notes
57 * =====
58 *
59 * A description vector is associated with each 2D block-cyclicly dis-
60 * tributed matrix. This vector stores the information required to
61 * establish the mapping between a matrix entry and its corresponding
62 * process and memory location.
63 *
64 * In the following comments, the character _ should be read as
65 * "of the distributed matrix". Let A be a generic term for any 2D
66 * block cyclicly distributed matrix. Its description vector is DESC_A:
67 *
68 * NOTATION STORED IN EXPLANATION
69 * ---------------- --------------- ------------------------------------
70 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
71 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
72 * the NPROW x NPCOL BLACS process grid
73 * A is distributed over. The context
74 * itself is global, but the handle
75 * (the integer value) may vary.
76 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
77 * ted matrix A, M_A >= 0.
78 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
79 * buted matrix A, N_A >= 0.
80 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
81 * block of the matrix A, IMB_A > 0.
82 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
83 * left block of the matrix A,
84 * INB_A > 0.
85 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
86 * bute the last M_A-IMB_A rows of A,
87 * MB_A > 0.
88 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
89 * bute the last N_A-INB_A columns of
90 * A, NB_A > 0.
91 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
92 * row of the matrix A is distributed,
93 * NPROW > RSRC_A >= 0.
94 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
95 * first column of A is distributed.
96 * NPCOL > CSRC_A >= 0.
97 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
98 * array storing the local blocks of
99 * the distributed matrix A,
100 * IF( Lc( 1, N_A ) > 0 )
101 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
102 * ELSE
103 * LLD_A >= 1.
104 *
105 * Let K be the number of rows of a matrix A starting at the global in-
106 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
107 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
108 * receive if these K rows were distributed over NPROW processes. If K
109 * is the number of columns of a matrix A starting at the global index
110 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
111 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
112 * these K columns were distributed over NPCOL processes.
113 *
114 * The values of Lr() and Lc() may be determined via a call to the func-
115 * tion PB_Cnumroc:
116 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
117 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
118 *
119 * Arguments
120 * =========
121 *
122 * TYPE (local input) pointer to a PBTYP_T structure
123 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
124 * that contains type information (See pblas.h).
125 *
126 * FBCAST (global input) INTEGER
127 * On entry, FBCAST specifies whether the transposed of the vec-
128 * tor solution should be broadcast or not when there is a pos-
129 * sible ambiguity, i.e. when sub( A ) is just one block. When
130 * FBCAST is zero, the solution vector is not broadcast, and the
131 * the solution vector is broadcast otherwise.
132 *
133 * UPLO (global input) pointer to CHAR
134 * On entry, UPLO specifies whether the submatrix sub( A ) is
135 * an upper or lower triangular submatrix as follows:
136 *
137 * UPLO = 'U' or 'u' sub( A ) is an upper triangular
138 * submatrix,
139 *
140 * UPLO = 'L' or 'l' sub( A ) is a lower triangular
141 * submatrix.
142 *
143 * TRANS (global input) pointer to CHAR
144 * On entry, TRANS specifies the operation to be performed as
145 * follows:
146 *
147 * TRANS = 'N' or 'n' sub( A ) * X = b,
148 *
149 * TRANS = 'T' or 't' sub( A )' * X = b,
150 *
151 * TRANS = 'C' or 'c' conjg( sub( A )' ) * X = b.
152 *
153 * DIAG (global input) pointer to CHAR
154 * On entry, DIAG specifies whether or not sub( A ) is unit
155 * triangular as follows:
156 *
157 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
158 * gular,
159 *
160 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
161 * angular.
162 *
163 * N (global input) INTEGER
164 * On entry, N specifies the order of the submatrix sub( A ).
165 * N must be at least zero.
166 *
167 * A (local input) pointer to CHAR
168 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
169 * at least Lc( 0, JA+N-1 ). Before entry, this array contains
170 * the local entries of the matrix A.
171 * Before entry with UPLO = 'U' or 'u', this array contains
172 * the local entries corresponding to the upper triangular part
173 * of the triangular submatrix sub( A ), and the local entries
174 * corresponding to the strictly lower triangular of sub( A )
175 * are not referenced.
176 * Before entry with UPLO = 'L' or 'l', this array contains
177 * the local entries corresponding to the lower triangular part
178 * of the triangular submatrix sub( A ), and the local entries
179 * corresponding to the strictly upper triangular of sub( A )
180 * are not referenced.
181 * Note that when DIAG = 'U' or 'u', the local entries corres-
182 * ponding to the diagonal elements of the submatrix sub( A )
183 * are not referenced either, but are assumed to be unity.
184 *
185 * IA (global input) INTEGER
186 * On entry, IA specifies A's global row index, which points to
187 * the beginning of the submatrix sub( A ).
188 *
189 * JA (global input) INTEGER
190 * On entry, JA specifies A's global column index, which points
191 * to the beginning of the submatrix sub( A ).
192 *
193 * DESCA (global and local input) INTEGER array
194 * On entry, DESCA is an integer array of dimension DLEN_. This
195 * is the array descriptor for the matrix A.
196 *
197 * XC (local input/local output) pointer to CHAR
198 * On entry, XC is an array of dimension (LLD_X,Kx), where Kx is
199 * at least 1 and LLD_X is at least Lr( IA, N ). Before entry,
200 * when TRANS is 'N' or 'n' this array contains the local en-
201 * tries of the right-hand-side vector b. When TRANS is not 'N'
202 * or 'n', the entries of XC should be zero. On exit, this array
203 * contains the partial solution vector x.
204 *
205 * INCXC (local input) INTEGER
206 * On entry, INCXC specifies the local increment of the vector
207 * XC.
208 *
209 * XR (local input/local output) pointer to CHAR
210 * On entry, XR is an array of dimension (LLD_X,Kx), where Kx is
211 * least Lc( JA, N ) and LLD_X at least 1. Before entry, when
212 * TRANS is 'N' or 'n' the entries of XR should be zero. Other-
213 * wise this array contains the local entries of the right-hand-
214 * side vector b. On exit, this array contains the partial so-
215 * lution vector x.
216 *
217 * INCXR (local input) INTEGER
218 * On entry, INCXR specifies the local increment of the vector
219 * XR.
220 *
221 * -- Written on April 1, 1998 by
222 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
223 *
224 * ---------------------------------------------------------------------
225 */
226 /*
227 * .. Local Scalars ..
228 */
229  char btop, * negone, * one, * zero;
230  int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep,
231  AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp,
232  Anq, Arow, Asrc, ChangeRoc=0, bcst, ctxt, ione=1, k=0, kb,
233  kbprev=0, kbsize, mb1, mycol, myrow, n1, n1last, n1p,
234  n1pprev=0, nb1, nlast, npcol, nprow, rocprev, size, tmp1,
235  tmp2;
236  AXPY_T axpy;
237  COPY_T copy;
238  VVSET_T set;
239  GEMV_T gemv;
240  TRSV_T trsv;
241  GESD2D_T send;
242  GERV2D_T recv;
243  GEBS2D_T bsend;
244  GEBR2D_T brecv;
245 /*
246 * .. Local Arrays ..
247 */
248  char * Aprev = NULL, * Xd = NULL, * Xdprev = NULL,
249  * Xprev = NULL, * work = NULL;
250 /* ..
251 * .. Executable Statements ..
252 *
253 */
254  if( N <= 0 ) return;
255 /*
256 * Retrieve process grid information
257 */
258  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
259 /*
260 * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
261 */
262  PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
263  &Acol );
264 /*
265 * Determine if sub( A ) spans more than one process row, and/or more than one
266 * process column.
267 */
268  Amb = DESCA[MB_]; Anb = DESCA[NB_]; Ald = DESCA[LLD_ ];
269  Aimb1 = PB_Cfirstnb( N, IA, DESCA[IMB_], Amb );
270  Anp = PB_Cnumroc( N, 0, Aimb1, Amb, myrow, Arow, nprow );
271  Ais1Row = !( PB_Cspan( N, 0, Aimb1, Amb, Arow, nprow ) );
272  Ainb1 = PB_Cfirstnb( N, JA, DESCA[INB_], Anb );
273  Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
274  Ais1Col = !( PB_Cspan( N, 0, Ainb1, Anb, Acol, npcol ) );
275 /*
276 * When sub( A ) spans only one process, solve the system locally and return.
277 */
278  if( Ais1Row && Ais1Col )
279  {
280  if( Mupcase( TRANS[0] ) == CNOTRAN )
281  {
282  if( Anq > 0 )
283  {
284  if( Anp > 0 )
285  {
286  TYPE->Ftrsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ),
287  C2F_CHAR( DIAG ), &N, Mptr( A, Aii, Ajj, Ald,
288  TYPE->size ), &Ald, XC, &INCXC );
289  TYPE->Fcopy( &Anp, XC, &INCXC, XR, &INCXR );
290  }
291  if( ( Arow >= 0 ) && FBCAST )
292  {
293  btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
294  if( myrow == Arow )
295  TYPE->Cgebs2d( ctxt, COLUMN, &btop, 1, Anq, XR, INCXR );
296  else
297  TYPE->Cgebr2d( ctxt, COLUMN, &btop, 1, Anq, XR, INCXR, Arow,
298  mycol );
299  }
300  }
301  }
302  else
303  {
304  if( Anp > 0 )
305  {
306  if( Anq > 0 )
307  {
308  TYPE->Ftrsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ),
309  C2F_CHAR( DIAG ), &N, Mptr( A, Aii, Ajj, Ald,
310  TYPE->size ), &Ald, XR, &INCXR );
311  TYPE->Fcopy( &Anq, XR, &INCXR, XC, &INCXC );
312  }
313  if( Acol >= 0 && FBCAST )
314  {
315  btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
316  if( mycol == Acol )
317  TYPE->Cgebs2d( ctxt, ROW, &btop, Anp, 1, XC, Anp );
318  else
319  TYPE->Cgebr2d( ctxt, ROW, &btop, Anp, 1, XC, Anp, myrow,
320  Acol );
321  }
322  }
323  }
324  return;
325  }
326 /*
327 * Retrieve from TYPE structure useful BLAS and BLACS functions.
328 */
329  size = TYPE->size;
330  negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero;
331  axpy = TYPE->Faxpy; copy = TYPE->Fcopy; set = TYPE->Fset;
332  gemv = TYPE->Fgemv; trsv = TYPE->Ftrsv;
333  send = TYPE->Cgesd2d; recv = TYPE->Cgerv2d;
334  bsend = TYPE->Cgebs2d; brecv = TYPE->Cgebr2d;
335
336  if( ( Anp > 0 ) && ( Anq > 0 ) ) A = Mptr( A, Aii, Ajj, Ald, size );
337
338  if( Mupcase( TRANS[0] ) == CNOTRAN )
339  {
340  if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) &&
341  ( myrow != Arow ) ) ) ) return;
342  btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
343  bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) );
344  AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) );
345
346  if( Mupcase( UPLO[0] ) == CUPPER )
347  {
348 /*
349 * Initiate lookahead
350 */
351  nlast = ( npcol - 1 ) * Anb;
352  n1 = MAX( nlast, Anb );
353  nlast += Ainb1;
354  n1last = n1 - Anb + MAX( Ainb1, Anb );
355  work = PB_Cmalloc( MIN( n1last, Anp ) * size );
356  tmp1 = N-1;
357  Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
358  Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
359  rocprev = Alcol; Anpprev = Anp; Xprev = XC; Xdprev = XR;
360  Aprev = A = Mptr( A, 0, Anq, Ald, size );
361  mb1 = PB_Clastnb( N, 0, Aimb1, Amb );
362  nb1 = PB_Clastnb( N, 0, Ainb1, Anb );
363  tmp1 = N - ( kb = MIN( mb1, nb1 ) );
364  n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 );
365  tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
366  Asrc = Arow;
367  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
368  nprow );
369  while( N > 0 )
370  {
371  kbsize = kb * size;
372
373  if( Ais1Col || ( mycol == Alcol ) )
374  {
375  A -= Ald * kbsize;
376  Anq -= kb;
377  Xd = Mptr( XR, 0, Anq, INCXR, size );
378  }
379  if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; }
380 /*
381 * Partial update of previous block
382 */
383  if( n1pprev > 0 )
384  {
385  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
386  {
387  tmp1 = ( Anpprev - n1pprev ) * size;
388  gemv( C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone,
389  Aprev+tmp1, &Ald, Xdprev, &INCXR, one, Xprev+tmp1,
390  &INCXC );
391  }
392 /*
393 * Send partial updated result to current column
394 */
395  if( !( Ais1Col ) && ChangeRoc )
396  {
397  if( mycol == rocprev )
398  {
399  send( ctxt, n1pprev, 1, Xprev+(Anpprev-n1pprev)*size,
400  n1pprev, myrow, Alcol );
401  }
402  else if( mycol == Alcol )
403  {
404  recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev );
405  axpy( &n1pprev, one, work, &ione, Mptr( Xprev,
406  Anpprev-n1pprev, 0, INCXC, size ), &INCXC );
407  }
408  }
409  }
410 /*
411 * Solve current diagonal block
412 */
413  if( Ais1Col || ( mycol == Alcol ) )
414  {
415  if( AisRowRep || ( myrow == Alrow ) )
416  {
417  trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
418  &kb, Mptr( A, Anp, 0, Ald, size ), &Ald, Mptr( XC, Anp,
419  0, INCXC, size ), &INCXC );
420  copy( &kb, Mptr( XC, Anp, 0, INCXC, size ), &INCXC, Mptr( XR,
421  0, Anq, INCXR, size ), &INCXR );
422  }
423  if( bcst )
424  {
425  if( myrow == Alrow )
426  bsend( ctxt, COLUMN, &btop, 1, kb, Mptr( XR, 0, Anq, INCXR,
427  size ), INCXR );
428  else
429  brecv( ctxt, COLUMN, &btop, 1, kb, Mptr( XR, 0, Anq, INCXR,
430  size ), INCXR, Alrow, mycol );
431  }
432  }
433  else
434  {
435  if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) )
436  set( &kb, zero, Mptr( XC, Anp, 0, INCXC, size ), &ione );
437  }
438 /*
439 * Finish previous update
440 */
441  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) &&
442  ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
443  gemv( C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev, &Ald,
444  Xdprev, &INCXR, one, Xprev, &INCXC );
445 /*
446 * Save info of current step and update info for the next step
447 */
448  if( Ais1Col || ( mycol == Alcol ) ) { Xdprev = Xd; Aprev = A; }
449  if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; }
450
451  n1pprev = n1p;
452  rocprev = Alcol;
453  kbprev = kb;
454  k += kb;
455  N -= kb;
456
457  mb1 -= kb;
458  if( mb1 == 0 )
459  {
460  if( !( Ais1Row ) && ( Alrow >= 0 ) )
461  Alrow = MModSub1( Alrow, nprow );
462  mb1 = ( N > Aimb1 ? Amb : Aimb1 );
463  }
464
465  nb1 -= kb;
466  ChangeRoc = ( nb1 == 0 );
467
468  if( ChangeRoc )
469  {
470  if( !( Ais1Col ) && ( Alcol >= 0 ) )
471  Alcol = MModSub1( Alcol, npcol );
472  nb1 = ( N > Ainb1 ? Anb : Ainb1 );
473  }
474  tmp1 = N - ( kb = MIN( mb1, nb1 ) );
475  n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 );
476  tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
477  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
478  nprow );
479  }
480  }
481  else
482  {
483 /*
484 * Initiate lookahead
485 */
486  n1 = ( MAX( npcol, 2 ) - 1 ) * Anb;
487  work = PB_Cmalloc( MIN( n1, Anp ) * size );
488  Aprev = A; Xprev = XC; Xdprev = XR; Anpprev = Anp;
489  mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol;
490  tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
491  Asrc = Arow;
492  n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc,
493  nprow );
494  while( kb > 0 )
495  {
496  kbsize = kb * size;
497 /*
498 * Partial update of previous block
499 */
500  if( n1pprev > 0 )
501  {
502  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
503  gemv( C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone, Aprev,
504  &Ald, Xdprev, &INCXR, one, Xprev, &INCXC );
505 /*
506 * Send partial updated result to current column
507 */
508  if( !( Ais1Col ) && ChangeRoc )
509  {
510  if( mycol == rocprev )
511  {
512  send( ctxt, n1pprev, 1, Xprev, n1pprev, myrow, Acol );
513  }
514  else if( mycol == Acol )
515  {
516  recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev );
517  axpy( &n1pprev, one, work, &ione, Xprev, &INCXC );
518  }
519  }
520  }
521 /*
522 * Solve current diagonal block
523 */
524  if( Ais1Col || ( mycol == Acol ) )
525  {
526  if( AisRowRep || ( myrow == Arow ) )
527  {
528  trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
529  &kb, A, &Ald, XC, &INCXC );
530  copy( &kb, XC, &INCXC, XR, &INCXR );
531  }
532  if( bcst )
533  {
534  if( myrow == Arow )
535  bsend( ctxt, COLUMN, &btop, 1, kb, XR, INCXR );
536  else
537  brecv( ctxt, COLUMN, &btop, 1, kb, XR, INCXR, Arow,
538  mycol );
539  }
540  }
541  else
542  {
543  if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) )
544  set( &kb, zero, XC, &INCXC );
545  }
546 /*
547 * Finish previous update
548 */
549  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
550  {
551  if( ( tmp1 = Anpprev - n1pprev ) > 0 )
552  {
553  tmp2 = n1pprev * size;
554  gemv( C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev+tmp2,
555  &Ald, Xdprev, &INCXR, one, Xprev+tmp2, &INCXC );
556  }
557  Aprev += Ald * kbprev * size;
558  }
559 /*
560 * Save info of current step and update info for the next step
561 */
562  if( Ais1Col || ( mycol == Acol ) )
563  { A += Ald*kbsize; Xdprev = Xd = XR; XR += INCXR*kbsize; }
564  if( AisRowRep || ( myrow == Arow ) )
565  {
566  Xprev = ( XC += kbsize );
567  A += kbsize;
568  Aprev += kbsize;
569  Anpprev = ( Anp -= kb );
570  }
571  n1pprev = n1p;
572  rocprev = Acol;
573  kbprev = kb;
574  k += kb;
575  N -= kb;
576
577  mb1 -= kb;
578  if( mb1 == 0 )
579  {
580  if( !( Ais1Row ) && ( Arow >= 0 ) )
581  Arow = MModAdd1( Arow, nprow );
582  mb1 = MIN( Amb, N );
583  }
584
585  nb1 -= kb;
586  ChangeRoc = ( nb1 == 0 );
587
588  if( ChangeRoc )
589  {
590  if( !( Ais1Col ) && ( Acol >= 0 ) )
591  Acol = MModAdd1( Acol, npcol );
592  nb1 = MIN( Anb, N );
593  }
594  tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
595  n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k+kb, Aimb1, Amb, myrow, Asrc,
596  nprow );
597  }
598  }
599  }
600  else
601  {
602  if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) &&
603  ( mycol != Acol ) ) ) ) return;
604  btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
605  bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) );
606  AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) );
607
608  if( Mupcase( UPLO[0] ) == CUPPER )
609  {
610 /*
611 * Initiate lookahead
612 */
613  n1 = ( MAX( nprow, 2 ) - 1 ) * Amb;
614  work = PB_Cmalloc( MIN( n1, Anq ) * size );
615  Aprev = A; Xprev = XR; Xdprev = XC; Anpprev = Anq;
616  mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow;
617  tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
618  Asrc = Acol;
619  n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc,
620  npcol );
621  while( kb > 0 )
622  {
623  kbsize = kb * size;
624 /*
625 * Partial update of previous block
626 */
627  if( n1pprev > 0 )
628  {
629  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
630  gemv( C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone, Aprev,
631  &Ald, Xdprev, &INCXC, one, Xprev, &INCXR );
632 /*
633 * Send partial updated result to current row
634 */
635  if( !( Ais1Row ) && ChangeRoc )
636  {
637  if( myrow == rocprev )
638  {
639  send( ctxt, 1, n1pprev, Xprev, INCXR, Arow, mycol );
640  }
641  else if( myrow == Arow )
642  {
643  recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol );
644  axpy( &n1pprev, one, work, &ione, Xprev, &INCXR );
645  }
646  }
647  }
648 /*
649 * Solve current diagonal block
650 */
651  if( Ais1Row || ( myrow == Arow ) )
652  {
653  if( AisColRep || ( mycol == Acol ) )
654  {
655  trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
656  &kb, A, &Ald, XR, &INCXR );
657  copy( &kb, XR, &INCXR, XC, &INCXC );
658  }
659  if( bcst )
660  {
661  if( mycol == Acol )
662  bsend( ctxt, ROW, &btop, kb, 1, XC, kb );
663  else
664  brecv( ctxt, ROW, &btop, kb, 1, XC, kb, myrow, Acol );
665  }
666  }
667  else
668  {
669  if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) )
670  set( &kb, zero, XR, &INCXR );
671  }
672 /*
673 * Finish previous update
674 */
675  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
676  {
677  if( ( tmp1 = Anpprev - n1pprev ) > 0 )
678  {
679  tmp2 = n1pprev * size;
680  gemv( C2F_CHAR( TRANS ), &kbprev, &tmp1, negone,
681  Aprev+Ald*tmp2, &Ald, Xdprev, &INCXC, one,
682  Xprev+INCXR*tmp2, &INCXR );
683  }
684  Aprev += kbprev * size;
685  }
686 /*
687 * Save info of current step and update info for the next step
688 */
689  if( Ais1Row || ( myrow == Arow ) )
690  { A += kbsize; Xdprev = Xd = XC; XC += kbsize; }
691  if( AisColRep || ( mycol == Acol ) )
692  {
693  Xprev = ( XR += INCXR * kbsize );
694  A += Ald * kbsize;
695  Anpprev = ( Anq -= kb );
696  Aprev += Ald * kbsize;
697  }
698  n1pprev = n1p;
699  rocprev = Arow;
700  kbprev = kb;
701  k += kb;
702  N -= kb;
703
704  nb1 -= kb;
705  if( nb1 == 0 )
706  {
707  if( !( Ais1Col ) && ( Acol >= 0 ) )
708  Acol = MModAdd1( Acol, npcol );
709  nb1 = MIN( Anb, N );
710  }
711
712  mb1 -= kb;
713  ChangeRoc = ( mb1 == 0 );
714
715  if( ChangeRoc )
716  {
717  if( !( Ais1Row ) && ( Arow >= 0 ) )
718  Arow = MModAdd1( Arow, nprow );
719  mb1 = MIN( Amb, N );
720  }
721  tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
722  n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k+kb, Ainb1, Anb, mycol, Asrc,
723  npcol );
724  }
725  }
726  else
727  {
728 /*
729 * Initiate lookahead
730 */
731  nlast = ( nprow - 1 ) * Amb;
732  n1 = MAX( nlast, Amb );
733  nlast += Aimb1;
734  n1last = n1 - Amb + MAX( Aimb1, Amb );
735  work = PB_Cmalloc( MIN( n1last, Anq ) * size );
736  tmp1 = N-1;
737  Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
738  Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
739  rocprev = Alrow; Anpprev = Anq; Xprev = XR; Xdprev = XC;
740  Aprev = A = Mptr( A, Anp, 0, Ald, size );
741  mb1 = PB_Clastnb( N, 0, Aimb1, Amb );
742  nb1 = PB_Clastnb( N, 0, Ainb1, Anb );
743  tmp1 = N - ( kb = MIN( mb1, nb1 ) );
744  n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 );
745  tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
746  Asrc = Acol;
747  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
748  npcol );
749  while( N > 0 )
750  {
751  kbsize = kb * size;
752
753  if( Ais1Row || ( myrow == Alrow ) )
754  {
755  A -= kbsize;
756  Anp -= kb;
757  Xd = Mptr( XC, Anp, 0, INCXC, size );
758  }
759  if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; }
760 /*
761 * Partial update of previous block
762 */
763  if( n1pprev > 0 )
764  {
765  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
766  {
767  tmp1 = ( Anpprev - n1pprev ) * size;
768  gemv( C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone,
769  Aprev+Ald*tmp1, &Ald, Xdprev, &INCXC, one,
770  Xprev+INCXR*tmp1, &INCXR );
771  }
772 /*
773 * Send partial updated result to current row
774 */
775  if( !( Ais1Row ) && ChangeRoc )
776  {
777  if( myrow == rocprev )
778  {
779  send( ctxt, 1, n1pprev, Mptr( Xprev, 0, Anpprev-n1pprev,
780  INCXR, size ), INCXR, Alrow, mycol );
781  }
782  else if( myrow == Alrow )
783  {
784  recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol );
785  axpy( &n1pprev, one, work, &ione, Mptr( Xprev, 0,
786  Anpprev-n1pprev, INCXR, size ), &INCXR );
787  }
788  }
789  }
790 /*
791 * Solve current diagonal block
792 */
793  if( Ais1Row || ( myrow == Alrow ) )
794  {
795  if( AisColRep || ( mycol == Alcol ) )
796  {
797  trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ),
798  &kb, Mptr( A, 0, Anq, Ald, size ), &Ald, Mptr( XR, 0,
799  Anq, INCXR, size ), &INCXR );
800  copy( &kb, Mptr( XR, 0, Anq, INCXR, size ), &INCXR, Mptr( XC,
801  0, Anp, INCXC, size ), &INCXC );
802  }
803  if( bcst )
804  {
805  if( mycol == Alcol )
806  bsend( ctxt, ROW, &btop, kb, 1, Mptr( XC, 0, Anp, INCXC,
807  size ), kb );
808  else
809  brecv( ctxt, ROW, &btop, kb, 1, Mptr( XC, 0, Anp, INCXC,
810  size ), kb, myrow, Alcol );
811  }
812  }
813  else
814  {
815  if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) )
816  set( &kb, zero, Mptr( XR, 0, Anq, INCXR, size ), &INCXR );
817  }
818 /*
819 * Finish previous update
820 */
821  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) &&
822  ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
823  gemv( C2F_CHAR( TRANS ), &kbprev, &tmp1, negone, Aprev, &Ald,
824  Xdprev, &INCXC, one, Xprev, &INCXR );
825 /*
826 * Save info of current step and update info for the next step
827 */
828  if( Ais1Row || ( myrow == Alrow ) ) { Xdprev = Xd; Aprev = A; }
829  if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; }
830
831  n1pprev = n1p;
832  rocprev = Alrow;
833  kbprev = kb;
834  k += kb;
835  N -= kb;
836
837  nb1 -= kb;
838  if( nb1 == 0 )
839  {
840  if( !( Ais1Col ) && ( Alcol >= 0 ) )
841  Alcol = MModSub1( Alcol, npcol );
842  nb1 = ( N > Ainb1 ? Anb : Ainb1 );
843  }
844
845  mb1 -= kb;
846  ChangeRoc = ( mb1 == 0 );
847
848  if( ChangeRoc )
849  {
850  if( !( Ais1Row ) && ( Alrow >= 0 ) )
851  Alrow = MModSub1( Alrow, nprow );
852  mb1 = ( N > Aimb1 ? Amb : Aimb1 );
853  }
854  tmp1 = N - ( kb = MIN( mb1, nb1 ) );
855  n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 );
856  tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
857  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
858  npcol );
859  }
860  }
861  }
862  if( work ) free( work );
863 /*
864 * End of PB_Cptrsv
865 */
866 }
GEBR2D_T
void(* GEBR2D_T)()
Definition: pblas.h:281
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
TRSV_T
F_VOID_FCT(* TRSV_T)()
Definition: pblas.h:305
GESD2D_T
void(* GESD2D_T)()
Definition: pblas.h:278
MB_
#define MB_
Definition: PBtools.h:43
GEMV_T
F_VOID_FCT(* GEMV_T)()
Definition: pblas.h:297
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
AXPY_T
F_VOID_FCT(* AXPY_T)()
Definition: pblas.h:293
PB_Cfirstnb
int PB_Cfirstnb()
PB_Clastnb
int PB_Clastnb()
LLD_
#define LLD_
Definition: PBtools.h:47
IMB_
#define IMB_
Definition: PBtools.h:41
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
GERV2D_T
void(* GERV2D_T)()
Definition: pblas.h:279
PB_Cindxg2p
int PB_Cindxg2p()
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
BCAST
#define BCAST
Definition: PBblacs.h:48
VVSET_T
F_VOID_FCT(* VVSET_T)()
Definition: pblas.h:287
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
INB_
#define INB_
Definition: PBtools.h:42
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
PB_Cptrsv
void PB_Cptrsv(PBTYP_T *TYPE, int FBCAST, char *UPLO, char *TRANS, char *DIAG, int N, char *A, int IA, int JA, int *DESCA, char *XC, int INCXC, char *XR, int INCXR)
Definition: PB_Cptrsv.c:25
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
COPY_T
F_VOID_FCT(* COPY_T)()
Definition: pblas.h:294
CTXT_
#define CTXT_
Definition: PBtools.h:38
GEBS2D_T
void(* GEBS2D_T)()
Definition: pblas.h:280