SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CVMpack.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20Int PB_CVMpack( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS,
21 char * UNPA, char * TRANS, Int MN, Int K,
22 char * ALPHA, char * A, Int LDA,
23 char * BETA, char * B, Int LDB )
24#else
25Int PB_CVMpack( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
26 LDA, BETA, B, LDB )
27/*
28* .. Scalar Arguments ..
29*/
30 Int K, LDA, LDB, MN;
31 char * ALPHA, * BETA;
32/*
33* .. Array Arguments ..
34*/
35 char * VROCS, * ROCS, * UNPA, * TRANS;
36 PBTYP_T * TYPE;
37 PB_VM_T * VM;
38 char * A, * B;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PB_CVMpack packs a one-dimensional distributed array A into B, or
46* unpacks an array B into a one-dimensional distributed array A. This
47* operation is triggered by a virtual distributed array.
48*
49* Arguments
50* =========
51*
52* TYPE (local input) pointer to a PBTYP_T structure
53* On entry, TYPE is a pointer to a structure of type PBTYP_T,
54* that contains type information (see pblas.h).
55*
56* VM (local input) pointer to a VM structure
57* On entry, VM is a pointer to a structure of type PB_VM_T,
58* that contains the virtual matrix information (see pblas.h).
59*
60* VROCS (local input) pointer to CHAR
61* On entry, VROCS specifies if the rows or columns of the vir-
62* tual distributed array grid should be used for the packing or
63* unpacking operation as follows:
64* VROCS = 'R' or 'r', the rows should be used,
65* VROCS = 'C' or 'c', the columns should be used.
66*
67* ROCS (local input) pointer to CHAR
68* On entry, ROCS specifies if rows or columns should be used
69* packed or unpacked as follows:
70* ROCS = 'R' or 'r', rows should be (un)packed,
71* ROCS = 'C' or 'c', columns should be (un)packed.
72*
73* UNPA (local input) pointer to CHAR
74* On entry, UNPA specifies if the data should be packed or un-
75* packed as follows:
76* UNPA = 'P' or 'p', packing,
77* UNPA = 'U' or 'u', unpacking.
78*
79* TRANS (local input) pointer to CHAR
80* On entry, TRANS specifies if conjugation, transposition or
81* conjugate transposition should occur during the (un)packing
82* operation as follows:
83* TRANS = 'N' or 'n', natural (un)packing,
84* TRANS = 'Z' or 'z', conjugated (un)packing,
85* TRANS = 'T' or 't', transposed (un)packing,
86* TRANS = 'C' or 'c', conjugate transposed (un)packing.
87*
88* MN (local input) INTEGER
89* On entry, MN specifies the length of the distributed dimen-
90* sion to be (un)packed. MN must be at least zero.
91*
92* K (local input) INTEGER
93* On entry, K specifies the length of the non-distributed di-
94* mension to be (un)packed. K must be at least zero.
95*
96* ALPHA (local input) pointer to CHAR
97* On entry, ALPHA specifies the scalar alpha.
98*
99* A (local input/local output) pointer to CHAR
100* On entry, A points to an array of dimension (LDA, Ka), where
101* Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c',
102* Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and
103* when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This
104* array contains unpacked data.
105*
106* LDA (local input) INTEGER
107* On entry, LDA specifies the leading dimension of the array A.
108* LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and
109* MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and
110* VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB )
111* when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'.
112*
113* BETA (local input) pointer to CHAR
114* On entry, BETA specifies the scalar beta.
115*
116* B (local input/local output) pointer to CHAR
117* On entry, B points to an array of dimension (LDB,*). When
118* ROCS is 'C' or 'c', and TRANS is 'N', 'n', 'Z' or 'Z', B
119* points to an K by MN array. When TRANS is 'T', 't', 'C' or
120* 'c', B points to an MN by K array. When ROCS is 'R' or 'r',
121* and TRANS is 'T', 't', 'C' or 'c', B points to an K by MN ar-
122* ray. When TRANS is 'N', 'n', 'Z' or 'z', B points to an MN by
123* K array. This array contains the packed data.
124*
125* LDB (local input) INTEGER
126* On entry, LDB specifies the leading dimension of the array B.
127*
128* -- Written on April 1, 1998 by
129* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
130*
131* ---------------------------------------------------------------------
132*/
133/*
134* .. Local Scalars ..
135*/
136 Int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
137 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
138 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
139 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
140 char * aptrd;
141 MMADD_T add;
142/* ..
143* .. Executable Statements ..
144*
145*/
146 mblks = VM->mblks; nblks = VM->nblks;
147/*
148* Quick return if I don't own any blocks.
149*/
150 if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
151/*
152* Retrieve the contents of VM structure fields
153*/
154 lcmt00 = VM->lcmt00;
155 imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp;
156 iupp = VM->iupp; nprow = VM->nprow;
157 inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low;
158 ilow = VM->ilow; npcol = VM->npcol;
159
160 if( Mupcase( UNPA[0] ) == CPACKING )
161 {
162/*
163* B is the target packed buffer, A is the distributed source
164*/
165 if( Mupcase( TRANS[0] ) == CNOTRAN )
166 {
167/*
168* Add A to B
169*/
170 notran = 1; add = TYPE->Fmmadd;
171 }
172 else if( Mupcase( TRANS[0] ) == CCONJG )
173 {
174/*
175* Add the conjugate of A to B
176*/
177 notran = 1; add = TYPE->Fmmcadd;
178 }
179 else if( Mupcase( TRANS[0] ) == CTRAN )
180 {
181/*
182* Add the tranpose of A to B
183*/
184 notran = 0; add = TYPE->Fmmtadd;
185 }
186 else
187 {
188/*
189* Add the conjugate tranpose of A to B
190*/
191 notran = 0; add = TYPE->Fmmtcadd;
192 }
193 }
194 else
195 {
196/*
197* B is the source packed buffer, A is the distributed target
198*/
199 if( Mupcase( TRANS[0] ) == CNOTRAN )
200 {
201/*
202* Add B to A
203*/
204 notran = 1; add = TYPE->Fmmdda;
205 }
206 else if( Mupcase( TRANS[0] ) == CCONJG )
207 {
208/*
209* Add the conjugate of B to A
210*/
211 notran = 1; add = TYPE->Fmmddac;
212 }
213 else if( Mupcase( TRANS[0] ) == CTRAN )
214 {
215/*
216* Add the tranpose of B to A
217*/
218 notran = 0; add = TYPE->Fmmddat;
219 }
220 else
221 {
222/*
223* Add the conjugate tranpose of B to A
224*/
225 notran = 0; add = TYPE->Fmmddact;
226 }
227 }
228
229 size = TYPE->size;
230 rows = ( Mupcase( ROCS[0] ) == CROW );
231
232 if( Mupcase( VROCS[0] ) == CROW )
233 {
234/*
235* (un)packing using rows of virtual matrix
236*/
237 if( rows )
238 {
239/*
240* (un)packing rows of mn by k array A.
241*/
242 inca = size;
243 incb = ( notran ? size : LDB * size );
244 m = &tmp2;
245 n = &K;
246 }
247 else
248 {
249/*
250* (un)packing columns of k by mn array A
251*/
252 inca = LDA * size;
253 incb = ( notran ? LDB * size : size );
254 m = &K;
255 n = &tmp2;
256 }
257 kb = MN;
258/*
259* From the (un)packing point of view the only valuable shortcut is when the
260* virtual grid and the blocks are square, and the offset is zero or the grid
261* is 1x1.
262*/
263 if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
264 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
265 {
266 if( VM->prow == VM->pcol )
267 {
268 npq = ( ( mblks < 2 ) ? imbloc :
269 imbloc + ( mblks - 2 ) * mb + lmbloc );
270 npq = MIN( npq, kb );
271 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
272 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
273 }
274 return( npq );
275 }
276 pmb = nprow * mb;
277 qnb = npcol * nb;
278/*
279* Handle separately the first row and/or column of the LCM table. Update the
280* LCM value of the curent block lcmt00, as well as the number of rows and
281* columns mblks and nblks remaining in the LCM table.
282*/
283 GoSouth = ( lcmt00 > iupp );
284 GoEast = ( lcmt00 < ilow );
285
286 if( !( GoSouth ) && !( GoEast ) )
287 {
288/*
289* The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
290*/
291 if( lcmt00 >= 0 )
292 {
293 tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
294 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
295 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
296 }
297 else
298 {
299 tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
300 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
301 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
302 }
303 if( ( kb -= tmp2 ) == 0 ) return( npq );
304 B += tmp2 * incb;
305/*
306* Decide whether one should go south or east in the table: Go east if
307* the block below the current one only owns lower entries. If this block,
308* however, owns diagonals, then go south.
309*/
310 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
311 }
312
313 if( GoSouth )
314 {
315/*
316* Go one step south in the LCM table. Adjust the current LCM value as well as
317* the pointer to A. The pointer to B remains unchanged.
318*/
319 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
320/*
321* While there are blocks remaining that own upper entries, keep going south.
322* Adjust the current LCM value as well as the pointer to A accordingly.
323*/
324 while( mblks && ( lcmt00 > upp ) )
325 { lcmt00 -= pmb; mblks--; A += mb * inca; }
326/*
327* Return if no more row in the LCM table.
328*/
329 if( mblks <= 0 ) return( npq );
330/*
331* lcmt00 <= upp. The current block owns either diagonals or lower entries.
332* Save the current position in the LCM table. After this column has been
333* completely taken care of, re-start from this row and the next column of
334* the LCM table.
335*/
336 lcmt = lcmt00; mblkd = mblks; aptrd = A;
337
338 while( mblkd && ( lcmt >= ilow ) )
339 {
340/*
341* A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
342*/
343 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
344 if( lcmt >= 0 )
345 {
346 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
347 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
348 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
349 }
350 else
351 {
352 tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
353 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
354 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
355 }
356 if( ( kb -= tmp2 ) == 0 ) return( npq );
357/*
358* Keep going south until there are no more blocks owning diagonals
359*/
360 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
361 }
362/*
363* I am done with the first column of the LCM table. Go to the next column.
364*/
365 lcmt00 += low - ilow + qnb; nblks--;
366 }
367 else if( GoEast )
368 {
369/*
370* Go one step east in the LCM table. Adjust the current LCM value as
371* well as the pointer to B. The pointer to A remains unchanged.
372*/
373 lcmt00 += low - ilow + qnb; nblks--;
374/*
375* While there are blocks remaining that own lower entries, keep going east
376* in the LCM table. Adjust the current LCM value as well as the pointer to
377* B accordingly.
378*/
379 while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; }
380/*
381* Return if no more column in the LCM table.
382*/
383 if( nblks <= 0 ) return( npq );
384/*
385* lcmt00 >= low. The current block owns either diagonals or upper entries. Save
386* the current position in the LCM table. After this row has been completely
387* taken care of, re-start from this column and the next row of the LCM table.
388*/
389 lcmt = lcmt00; nblkd = nblks;
390
391 while( nblkd && ( lcmt <= iupp ) )
392 {
393/*
394* A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
395*/
396 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
397 if( lcmt >= 0 )
398 {
399 tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
400 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
401 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB );
402 }
403 else
404 {
405 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
406 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
407 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
408 }
409 if( ( kb -= tmp2 ) == 0 ) return( npq );
410/*
411* Keep going east until there are no more blocks owning diagonals.
412*/
413 lcmt += qnb; nblkd--; B += tmp2 * incb;
414 }
415/*
416* I am done with the first row of the LCM table. Go to the next row.
417*/
418 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
419 }
420/*
421* Loop over the remaining columns of the LCM table.
422*/
423 do
424 {
425/*
426* If the current block does not have diagonal elements, find the closest one in
427* the LCM table having some.
428*/
429 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
430 {
431 while( mblks && nblks )
432 {
433 while( mblks && ( lcmt00 > upp ) )
434 { lcmt00 -= pmb; mblks--; A += mb*inca; }
435 if( lcmt00 >= low ) break;
436 while( nblks && ( lcmt00 < low ) )
437 { lcmt00 += qnb; nblks--; }
438 if( lcmt00 <= upp ) break;
439 }
440 }
441 if( !( mblks ) || !( nblks ) ) return( npq );
442/*
443* The current block owns diagonals. Save the current position in the LCM table.
444* After this column has been completely taken care of, re-start from this row
445* and the next column in the LCM table.
446*/
447 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
448 lcmt = lcmt00; mblkd = mblks; aptrd = A;
449
450 while( mblkd && ( lcmt >= low ) )
451 {
452/*
453* A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
454*/
455 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
456 if( lcmt >= 0 )
457 {
458 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
459 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
460 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
461 }
462 else
463 {
464 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
465 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
466 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
467 }
468 if( ( kb -= tmp2 ) == 0 ) return( npq );
469/*
470* Keep going south until there are no more blocks owning diagonals
471*/
472 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
473 }
474/*
475* I am done with this column of the LCM table. Go to the next column ...
476*/
477 lcmt00 += qnb; nblks--;
478/*
479* ... until there are no more columns.
480*/
481 } while( nblks > 0 );
482/*
483* Return the number of diagonals found.
484*/
485 return( npq );
486 }
487 else
488 {
489/*
490* (un)packing using columns of virtual matrix
491*/
492 if( rows )
493 {
494/*
495* (un)packing rows of mn by k array A
496*/
497 inca = size;
498 incb = ( notran ? size : LDB * size );
499 m = &tmp2;
500 n = &K;
501 }
502 else
503 {
504/*
505* (un)packing columns of k by mn array A
506*/
507 inca = LDA * size;
508 incb = ( notran ? LDB * size : size );
509 m = &K;
510 n = &tmp2;
511 }
512 kb = MN;
513/*
514* From the (un)packing point of view the only valuable shortcut is when the
515* virtual grid and the blocks are square, and the offset is zero or the grid
516* is 1x1.
517*/
518 if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
519 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
520 {
521 if( VM->prow == VM->pcol )
522 {
523 npq = ( ( nblks < 2 ) ? inbloc :
524 inbloc + ( nblks - 2 ) * nb + lnbloc );
525 npq = MIN( npq, kb );
526 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
527 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
528 }
529 return( npq );
530 }
531 pmb = nprow * mb;
532 qnb = npcol * nb;
533/*
534* Handle separately the first row and/or column of the LCM table. Update the
535* LCM value of the curent block lcmt00, as well as the number of rows and
536* columns mblks and nblks remaining in the LCM table.
537*/
538 GoSouth = ( lcmt00 > iupp );
539 GoEast = ( lcmt00 < ilow );
540
541 if( !( GoSouth ) && !( GoEast ) )
542 {
543/*
544* The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
545*/
546 if( lcmt00 >= 0 )
547 {
548 tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
549 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
550 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
551 }
552 else
553 {
554 tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
555 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
556 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
557 }
558 if( ( kb -= tmp2 ) == 0 ) return( npq );
559 B += tmp2 * incb;
560/*
561* Decide whether one should go south or east in the table: Go east if
562* the block below the current one only owns lower entries. If this block,
563* however, owns diagonals, then go south.
564*/
565 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
566 }
567
568 if( GoSouth )
569 {
570/*
571* Go one step south in the LCM table. Adjust the current LCM value as well as
572* the pointer to B. The pointer to A remains unchanged.
573*/
574 lcmt00 -= iupp - upp + pmb; mblks--;
575/*
576* While there are blocks remaining that own upper entries, keep going south.
577* Adjust the current LCM value as well as the pointer to B accordingly.
578*/
579 while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; }
580/*
581* Return if no more row in the LCM table.
582*/
583 if( mblks <= 0 ) return( npq );
584/*
585* lcmt00 <= upp. The current block owns either diagonals or lower entries.
586* Save the current position in the LCM table. After this column has been
587* completely taken care of, re-start from this row and the next column of
588* the LCM table.
589*/
590 lcmt = lcmt00; mblkd = mblks;
591
592 while( mblkd && ( lcmt >= ilow ) )
593 {
594/*
595* A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
596*/
597 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
598 if( lcmt >= 0 )
599 {
600 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
601 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
602 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
603 }
604 else
605 {
606 tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
607 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
608 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
609 }
610 if( ( kb -= tmp2 ) == 0 ) return( npq );
611/*
612* Keep going south until there are no more blocks owning diagonals
613*/
614 lcmt -= pmb; mblkd--; B += tmp2 * incb;
615 }
616/*
617* I am done with the first column of the LCM table. Go to the next column.
618*/
619 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
620 }
621 else if( GoEast )
622 {
623/*
624* Go one step east in the LCM table. Adjust the current LCM value as
625* well as the pointer to A. The pointer to B remains unchanged.
626*/
627 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
628/*
629* While there are blocks remaining that own lower entries, keep going east
630* in the LCM table. Adjust the current LCM value as well as the pointer to
631* A accordingly.
632*/
633 while( nblks && ( lcmt00 < low ) )
634 { lcmt00 += qnb; nblks--; A += nb * inca; }
635/*
636* Return if no more column in the LCM table.
637*/
638 if( nblks <= 0 ) return( npq );
639/*
640* lcmt00 >= low. The current block owns either diagonals or upper entries. Save
641* the current position in the LCM table. After this row has been completely
642* taken care of, re-start from this column and the next row of the LCM table.
643*/
644 lcmt = lcmt00; nblkd = nblks; aptrd = A;
645
646 while( nblkd && ( lcmt <= iupp ) )
647 {
648/*
649* A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
650*/
651 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
652 if( lcmt >= 0 )
653 {
654 tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
655 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
656 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
657 }
658 else
659 {
660 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
661 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
662 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
663 }
664 if( ( kb -= tmp2 ) == 0 ) return( npq );
665/*
666* Keep going east until there are no more blocks owning diagonals.
667*/
668 lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb;
669 }
670/*
671* I am done with the first row of the LCM table. Go to the next row.
672*/
673 lcmt00 -= iupp - upp + pmb; mblks--;
674 }
675/*
676* Loop over the remaining columns of the LCM table.
677*/
678 do
679 {
680/*
681* If the current block does not have diagonal elements, find the closest one in
682* the LCM table having some.
683*/
684 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
685 {
686 while( mblks && nblks )
687 {
688 while( mblks && ( lcmt00 > upp ) )
689 { lcmt00 -= pmb; mblks--; }
690 if( lcmt00 >= low ) break;
691 while( nblks && ( lcmt00 < low ) )
692 { lcmt00 += qnb; nblks--; A += nb*inca; }
693 if( lcmt00 <= upp ) break;
694 }
695 }
696 if( !( mblks ) || !( nblks ) ) return( npq );
697/*
698* The current block owns diagonals. Save the current position in the LCM table.
699* After this column has been completely taken care of, re-start from this row
700* and the next column in the LCM table.
701*/
702 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
703 lcmt = lcmt00; mblkd = mblks;
704
705 while( mblkd && ( lcmt >= low ) )
706 {
707/*
708* A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
709*/
710 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
711 if( lcmt >= 0 )
712 {
713 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
714 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
715 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
716 }
717 else
718 {
719 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
720 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
721 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
722 }
723 if( ( kb -= tmp2 ) == 0 ) return( npq );
724/*
725* Keep going south until there are no more blocks owning diagonals
726*/
727 lcmt -= pmb; mblkd--; B += tmp2 * incb;
728 }
729/*
730* I am done with this column of the LCM table. Go to the next column ...
731*/
732 lcmt00 += qnb; nblks--; A += nbloc * inca;
733/*
734* ... until there are no more columns.
735*/
736 } while( nblks > 0 );
737/*
738* Return the number of diagonals found.
739*/
740 return( npq );
741 }
742/*
743* End of PB_CVMpack
744*/
745}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define CROW
Definition PBblacs.h:21
#define CCONJG
Definition PBblas.h:21
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define MAX(a_, b_)
Definition PBtools.h:77
#define MIN(a_, b_)
Definition PBtools.h:76
Int PB_CVMpack()
#define CPACKING
Definition PBtools.h:50
#define Mupcase(C)
Definition PBtools.h:83
#define TYPE
Definition clamov.c:7
Int lnbloc
Definition pblas.h:456
Int low
Definition pblas.h:459
Int iupp
Definition pblas.h:447
Int npcol
Definition pblas.h:461
Int nprow
Definition pblas.h:450
Int lcmt00
Definition pblas.h:439
Int pcol
Definition pblas.h:460
Int inbloc
Definition pblas.h:454
Int ilow
Definition pblas.h:458
Int inb1
Definition pblas.h:453
Int nblks
Definition pblas.h:457
Int prow
Definition pblas.h:449
Int imbloc
Definition pblas.h:443
Int lmbloc
Definition pblas.h:445
Int nb
Definition pblas.h:455
Int imb1
Definition pblas.h:442
Int mblks
Definition pblas.h:446
Int mb
Definition pblas.h:444
Int upp
Definition pblas.h:448